def parse_additional_resources(basepath, recid, yaml_document): """ Parses out the additional resource section for a full submission. :param basepath: the path the submission has been loaded to :param recid: :param yaml_document: :return: """ resources = [] for reference in yaml_document['additional_resources']: resource_location = reference['location'] file_type = infer_file_type(reference["location"]) contains_pattern, pattern = contains_accepted_url(reference['location']) if ('http' in resource_location.lower() and 'hepdata' not in resource_location) or contains_pattern: if pattern: file_type = pattern else: file_type = 'html' # in case URLs do not have http added. if 'http' not in resource_location.lower(): resource_location = "http://" + resource_location elif 'http' not in resource_location.lower() and 'www' not in resource_location.lower(): if resource_location.startswith('/resource'): # This is an old file migrated from hepdata.cedar.ac.uk. We # should only get here if using mock_import_old_record, in # which case the resources should already be in the 'resources' # directory parent_dir = os.path.dirname(basepath) resource_location = os.path.join( parent_dir, 'resources', os.path.basename(resource_location) ) if not os.path.exists(resource_location): raise ValueError("No such path %s" % resource_location) else: # this is a file local to the submission. try: resource_location = os.path.join(basepath, resource_location) except Exception as e: raise e if resource_location: new_reference = DataResource( file_location=resource_location, file_type=file_type, file_description=reference['description']) if "license" in reference: resource_license = get_license(reference["license"]) new_reference.file_license = resource_license.id resources.append(new_reference) return resources
def parse_additional_resources(basepath, recid, version, yaml_document): """ Parses out the additional resource section for a full submission :param hepsubmission: :param recid: :param submission_info_document: :return: """ resources = [] for reference in yaml_document['additional_resources']: resource_location = reference['location'] file_type = infer_file_type(reference["location"]) contains_pattern, pattern = contains_accepted_url(reference['location']) if ('http' in resource_location and 'hepdata' not in resource_location) or contains_pattern: if pattern: file_type = pattern else: file_type = 'html' # in case URLs do not have http added. if 'http' not in resource_location: resource_location = "http://" + resource_location elif 'http' not in resource_location and 'www' not in resource_location and 'resource' not in resource_location: # this is a file local to the submission. try: resource_location = os.path.join(basepath, resource_location) except Exception as e: raise e else: try: resource_location = download_resource_file(recid, resource_location) print('Downloaded resource location is {0}'.format(resource_location)) except URLError as url_error: log.error("Unable to download {0}. The resource is unavailable.".format(resource_location)) resource_location = None if resource_location: new_reference = DataResource( file_location=resource_location, file_type=file_type, file_description=reference['description']) if "license" in reference: dict = get_prefilled_dictionary( ["name", "url", "description"], reference["license"]) resource_license = get_or_create( db.session, License, name=dict['name'], url=dict['url'], description=dict['description']) new_reference.file_license = resource_license.id resources.append(new_reference) return resources
def parse_additional_resources(basepath, recid, version, yaml_document): """ Parses out the additional resource section for a full submission :param hepsubmission: :param recid: :param submission_info_document: :return: """ resources = [] for reference in yaml_document['additional_resources']: resource_location = reference['location'] file_type = infer_file_type(reference["location"]) contains_pattern, pattern = contains_accepted_url(reference['location']) if ('http' in resource_location and 'hepdata' not in resource_location) or contains_pattern: if pattern: file_type = pattern else: file_type = 'html' # in case URLs do not have http added. if 'http' not in resource_location: resource_location = "http://" + resource_location elif 'http' not in resource_location and 'www' not in resource_location and 'resource' not in resource_location: # this is a file local to the submission. try: resource_location = os.path.join(basepath, resource_location) except Exception as e: raise e else: try: resource_location = download_resource_file(recid, resource_location) print('Downloaded resource location is {0}'.format(resource_location)) except URLError as url_error: log.error("Unable to download {0}. The resource is unavailable.".format(resource_location)) resource_location = None if resource_location: new_reference = DataResource( file_location=resource_location, file_type=file_type, file_description=reference['description']) if "license" in reference: dict = get_prefilled_dictionary( ["name", "url", "description"], reference["license"]) resource_license = get_or_create( db.session, License, name=dict['name'], url=dict['url'], description=dict['description']) new_reference.file_license = resource_license.id resources.append(new_reference) return resources
def test_url_pattern(): test_urls = [ {"url": "http://amcfast.hepforge.org/", "exp_result": "hepforge"}, {"url": "https://bitbucket.org/eamonnmag/automacron-evaluation", "exp_result": "bitbucket"}, {"url": "http://sourceforge.net/projects/isacommons/", "exp_result": "sourceforge"}, {"url": "http://zenodo.net/record/11085", "exp_result": "zenodo"}, {"url": "https://github.com/HEPData/hepdata", "exp_result": "github"} ] for url_group in test_urls: contained, url_type = contains_accepted_url(url_group["url"]) assert (url_group["exp_result"] == url_type)
def test_url_pattern(): test_urls = [{ "url": "http://rivet.hepforge.org/analyses/ATLAS_2012_I1203852", "exp_result": "rivet" }, { "url": "https://bitbucket.org/eamonnmag/automacron-evaluation", "exp_result": "bitbucket" }, { "url": "http://sourceforge.net/projects/isacommons/", "exp_result": "sourceforge" }, { "url": "http://zenodo.net/record/11085", "exp_result": "zenodo" }, { "url": "https://github.com/HEPData/hepdata", "exp_result": "github" }] for url_group in test_urls: contained, url_type = contains_accepted_url(url_group["url"]) assert (url_group["exp_result"] == url_type)