def parse_additional_resources(basepath, recid, yaml_document): """ Parses out the additional resource section for a full submission. :param basepath: the path the submission has been loaded to :param recid: :param yaml_document: :return: """ resources = [] for reference in yaml_document['additional_resources']: resource_location = reference['location'] file_type = infer_file_type(reference["location"]) contains_pattern, pattern = contains_accepted_url(reference['location']) if ('http' in resource_location.lower() and 'hepdata' not in resource_location) or contains_pattern: if pattern: file_type = pattern else: file_type = 'html' # in case URLs do not have http added. if 'http' not in resource_location.lower(): resource_location = "http://" + resource_location elif 'http' not in resource_location.lower() and 'www' not in resource_location.lower(): if resource_location.startswith('/resource'): # This is an old file migrated from hepdata.cedar.ac.uk. We # should only get here if using mock_import_old_record, in # which case the resources should already be in the 'resources' # directory parent_dir = os.path.dirname(basepath) resource_location = os.path.join( parent_dir, 'resources', os.path.basename(resource_location) ) if not os.path.exists(resource_location): raise ValueError("No such path %s" % resource_location) else: # this is a file local to the submission. try: resource_location = os.path.join(basepath, resource_location) except Exception as e: raise e if resource_location: new_reference = DataResource( file_location=resource_location, file_type=file_type, file_description=reference['description']) if "license" in reference: resource_license = get_license(reference["license"]) new_reference.file_license = resource_license.id resources.append(new_reference) return resources
def parse_additional_resources(basepath, recid, version, yaml_document): """ Parses out the additional resource section for a full submission :param hepsubmission: :param recid: :param submission_info_document: :return: """ resources = [] for reference in yaml_document['additional_resources']: resource_location = reference['location'] file_type = infer_file_type(reference["location"]) contains_pattern, pattern = contains_accepted_url(reference['location']) if ('http' in resource_location and 'hepdata' not in resource_location) or contains_pattern: if pattern: file_type = pattern else: file_type = 'html' # in case URLs do not have http added. if 'http' not in resource_location: resource_location = "http://" + resource_location elif 'http' not in resource_location and 'www' not in resource_location and 'resource' not in resource_location: # this is a file local to the submission. try: resource_location = os.path.join(basepath, resource_location) except Exception as e: raise e else: try: resource_location = download_resource_file(recid, resource_location) print('Downloaded resource location is {0}'.format(resource_location)) except URLError as url_error: log.error("Unable to download {0}. The resource is unavailable.".format(resource_location)) resource_location = None if resource_location: new_reference = DataResource( file_location=resource_location, file_type=file_type, file_description=reference['description']) if "license" in reference: dict = get_prefilled_dictionary( ["name", "url", "description"], reference["license"]) resource_license = get_or_create( db.session, License, name=dict['name'], url=dict['url'], description=dict['description']) new_reference.file_license = resource_license.id resources.append(new_reference) return resources
def process_data_file(recid, version, basepath, data_obj, datasubmission, main_file_path): """ Takes a data file and any supplementary files and persists their metadata to the database whilst recording their upload path. :param recid: the record id :param version: version of the resource to be stored :param basepath: the path the submission has been loaded to :param data_obj: Object representation of loaded YAML file :param datasubmission: the DataSubmission object representing this file in the DB :param main_file_path: the data file path :return: """ main_data_file = DataResource(file_location=main_file_path, file_type="data") if "data_license" in data_obj: dict = get_prefilled_dictionary(["name", "url", "description"], data_obj["data_license"]) license = get_or_create(db.session, License, name=dict['name'], url=dict['url'], description=dict['description']) main_data_file.file_license = license.id db.session.add(main_data_file) # I have to do the commit here, otherwise I have no ID to reference in the data submission table. db.session.commit() datasubmission.data_file = main_data_file.id if "location" in data_obj: datasubmission.location_in_publication = data_obj["location"] cleanup_data_keywords(datasubmission) if "keywords" in data_obj: for keyword in data_obj["keywords"]: keyword_name = keyword['name'] for value in keyword['values']: keyword = Keyword(name=keyword_name, value=value) datasubmission.keywords.append(keyword) cleanup_data_resources(datasubmission) if "additional_resources" in data_obj: resources = parse_additional_resources(basepath, recid, version, data_obj) for resource in resources: datasubmission.resources.append(resource) db.session.commit()
def process_data_file(recid, version, basepath, data_obj, datasubmission, main_file_path): """ Takes a data file and any supplementary files and persists their metadata to the database whilst recording their upload path. :param recid: the record id :param version: version of the resource to be stored :param basepath: the path the submission has been loaded to :param data_obj: Object representation of loaded YAML file :param datasubmission: the DataSubmission object representing this file in the DB :param main_file_path: the data file path :return: """ main_data_file = DataResource( file_location=main_file_path, file_type="data") if "data_license" in data_obj: dict = get_prefilled_dictionary( ["name", "url", "description"], data_obj["data_license"]) license = get_or_create( db.session, License, name=dict['name'], url=dict['url'], description=dict['description']) main_data_file.file_license = license.id db.session.add(main_data_file) # I have to do the commit here, otherwise I have no ID to reference in the data submission table. db.session.commit() datasubmission.data_file = main_data_file.id if "location" in data_obj: datasubmission.location_in_publication = data_obj["location"] if "keywords" in data_obj: for keyword in data_obj["keywords"]: keyword_name = keyword['name'] for value in keyword['values']: keyword = Keyword(name=keyword_name, value=value) datasubmission.keywords.append(keyword) cleanup_data_resources(datasubmission) if "additional_resources" in data_obj: resources = parse_additional_resources(basepath, recid, version, data_obj) for resource in resources: datasubmission.resources.append(resource) db.session.commit()