def test_file_extension_pattern(): test_files = [{ "file": "test.py", "exp_result": "Python" }, { "file": "test.cpp", "exp_result": "C++" }, { "file": "test.c", "exp_result": "C" }, { "file": "test.sh", "exp_result": "Bash Shell" }, { "file": "test.root", "exp_result": "ROOT" }, { "file": "test.docx", "exp_result": "docx" }, { "file": "test", "exp_result": "resource" }] for file_group in test_files: extension = infer_file_type(file_group["file"]) assert (file_group["exp_result"] == extension)
def parse_additional_resources(basepath, recid, yaml_document): """ Parses out the additional resource section for a full submission. :param basepath: the path the submission has been loaded to :param recid: :param yaml_document: :return: """ resources = [] for reference in yaml_document['additional_resources']: resource_location = reference['location'] file_type = infer_file_type(reference["location"]) contains_pattern, pattern = contains_accepted_url(reference['location']) if ('http' in resource_location.lower() and 'hepdata' not in resource_location) or contains_pattern: if pattern: file_type = pattern else: file_type = 'html' # in case URLs do not have http added. if 'http' not in resource_location.lower(): resource_location = "http://" + resource_location elif 'http' not in resource_location.lower() and 'www' not in resource_location.lower(): if resource_location.startswith('/resource'): # This is an old file migrated from hepdata.cedar.ac.uk. We # should only get here if using mock_import_old_record, in # which case the resources should already be in the 'resources' # directory parent_dir = os.path.dirname(basepath) resource_location = os.path.join( parent_dir, 'resources', os.path.basename(resource_location) ) if not os.path.exists(resource_location): raise ValueError("No such path %s" % resource_location) else: # this is a file local to the submission. try: resource_location = os.path.join(basepath, resource_location) except Exception as e: raise e if resource_location: new_reference = DataResource( file_location=resource_location, file_type=file_type, file_description=reference['description']) if "license" in reference: resource_license = get_license(reference["license"]) new_reference.file_license = resource_license.id resources.append(new_reference) return resources
def parse_additional_resources(basepath, recid, version, yaml_document): """ Parses out the additional resource section for a full submission :param hepsubmission: :param recid: :param submission_info_document: :return: """ resources = [] for reference in yaml_document['additional_resources']: resource_location = reference['location'] file_type = infer_file_type(reference["location"]) contains_pattern, pattern = contains_accepted_url(reference['location']) if ('http' in resource_location and 'hepdata' not in resource_location) or contains_pattern: if pattern: file_type = pattern else: file_type = 'html' # in case URLs do not have http added. if 'http' not in resource_location: resource_location = "http://" + resource_location elif 'http' not in resource_location and 'www' not in resource_location and 'resource' not in resource_location: # this is a file local to the submission. try: resource_location = os.path.join(basepath, resource_location) except Exception as e: raise e else: try: resource_location = download_resource_file(recid, resource_location) print('Downloaded resource location is {0}'.format(resource_location)) except URLError as url_error: log.error("Unable to download {0}. The resource is unavailable.".format(resource_location)) resource_location = None if resource_location: new_reference = DataResource( file_location=resource_location, file_type=file_type, file_description=reference['description']) if "license" in reference: dict = get_prefilled_dictionary( ["name", "url", "description"], reference["license"]) resource_license = get_or_create( db.session, License, name=dict['name'], url=dict['url'], description=dict['description']) new_reference.file_license = resource_license.id resources.append(new_reference) return resources
def test_file_extension_pattern(): test_files = [ {"file": "test.py", "exp_result": "Python"}, {"file": "test.cpp", "exp_result": "C++"}, {"file": "test.c", "exp_result": "C"}, {"file": "test.sh", "exp_result": "Bash Shell"}, {"file": "test.root", "exp_result": "ROOT"}, {"file": "test.docx", "exp_result": "docx"}, {"file": "test", "exp_result": "resource"} ] for file_group in test_files: extension = infer_file_type(file_group["file"]) assert (file_group["exp_result"] == extension)