コード例 #1
0
def test_file_extension_pattern():
    test_files = [{
        "file": "test.py",
        "exp_result": "Python"
    }, {
        "file": "test.cpp",
        "exp_result": "C++"
    }, {
        "file": "test.c",
        "exp_result": "C"
    }, {
        "file": "test.sh",
        "exp_result": "Bash Shell"
    }, {
        "file": "test.root",
        "exp_result": "ROOT"
    }, {
        "file": "test.docx",
        "exp_result": "docx"
    }, {
        "file": "test",
        "exp_result": "resource"
    }]

    for file_group in test_files:
        extension = infer_file_type(file_group["file"])
        assert (file_group["exp_result"] == extension)
コード例 #2
0
def parse_additional_resources(basepath, recid, yaml_document):
    """
    Parses out the additional resource section for a full submission.

    :param basepath: the path the submission has been loaded to
    :param recid:
    :param yaml_document:
    :return:
    """
    resources = []
    for reference in yaml_document['additional_resources']:
        resource_location = reference['location']

        file_type = infer_file_type(reference["location"])
        contains_pattern, pattern = contains_accepted_url(reference['location'])
        if ('http' in resource_location.lower() and 'hepdata' not in resource_location) or contains_pattern:
            if pattern:
                file_type = pattern
            else:
                file_type = 'html'

            # in case URLs do not have http added.
            if 'http' not in resource_location.lower():
                resource_location = "http://" + resource_location

        elif 'http' not in resource_location.lower() and 'www' not in resource_location.lower():
            if resource_location.startswith('/resource'):
                # This is an old file migrated from hepdata.cedar.ac.uk. We
                # should only get here if using mock_import_old_record, in
                # which case the resources should already be in the 'resources'
                # directory
                parent_dir = os.path.dirname(basepath)
                resource_location = os.path.join(
                    parent_dir,
                    'resources',
                    os.path.basename(resource_location)
                )
                if not os.path.exists(resource_location):
                    raise ValueError("No such path %s" % resource_location)
            else:
                # this is a file local to the submission.
                try:
                    resource_location = os.path.join(basepath, resource_location)
                except Exception as e:
                    raise e

        if resource_location:
            new_reference = DataResource(
                file_location=resource_location, file_type=file_type,
                file_description=reference['description'])

            if "license" in reference:
                resource_license = get_license(reference["license"])
                new_reference.file_license = resource_license.id

            resources.append(new_reference)

    return resources
コード例 #3
0
ファイル: submission.py プロジェクト: HEPData/hepdata3
def parse_additional_resources(basepath, recid, version, yaml_document):
    """
    Parses out the additional resource section for a full submission
    :param hepsubmission:
    :param recid:
    :param submission_info_document:
    :return:
    """
    resources = []
    for reference in yaml_document['additional_resources']:
        resource_location = reference['location']

        file_type = infer_file_type(reference["location"])
        contains_pattern, pattern = contains_accepted_url(reference['location'])
        if ('http' in resource_location and 'hepdata' not in resource_location) or contains_pattern:
            if pattern:
                file_type = pattern
            else:
                file_type = 'html'

            # in case URLs do not have http added.
            if 'http' not in resource_location:
                resource_location = "http://" + resource_location

        elif 'http' not in resource_location and 'www' not in resource_location and 'resource' not in resource_location:
            # this is a file local to the submission.
            try:
                resource_location = os.path.join(basepath, resource_location)
            except Exception as e:
                raise e
        else:
            try:
                resource_location = download_resource_file(recid, resource_location)
                print('Downloaded resource location is {0}'.format(resource_location))
            except URLError as url_error:
                log.error("Unable to download {0}. The resource is unavailable.".format(resource_location))
                resource_location = None

        if resource_location:
            new_reference = DataResource(
                file_location=resource_location, file_type=file_type,
                file_description=reference['description'])

            if "license" in reference:
                dict = get_prefilled_dictionary(
                    ["name", "url", "description"],
                    reference["license"])

                resource_license = get_or_create(
                    db.session, License, name=dict['name'],
                    url=dict['url'], description=dict['description'])
                new_reference.file_license = resource_license.id

            resources.append(new_reference)

    return resources
コード例 #4
0
def parse_additional_resources(basepath, recid, version, yaml_document):
    """
    Parses out the additional resource section for a full submission
    :param hepsubmission:
    :param recid:
    :param submission_info_document:
    :return:
    """
    resources = []
    for reference in yaml_document['additional_resources']:
        resource_location = reference['location']

        file_type = infer_file_type(reference["location"])
        contains_pattern, pattern = contains_accepted_url(reference['location'])
        if ('http' in resource_location and 'hepdata' not in resource_location) or contains_pattern:
            if pattern:
                file_type = pattern
            else:
                file_type = 'html'

            # in case URLs do not have http added.
            if 'http' not in resource_location:
                resource_location = "http://" + resource_location

        elif 'http' not in resource_location and 'www' not in resource_location and 'resource' not in resource_location:
            # this is a file local to the submission.
            try:
                resource_location = os.path.join(basepath, resource_location)
            except Exception as e:
                raise e
        else:
            try:
                resource_location = download_resource_file(recid, resource_location)
                print('Downloaded resource location is {0}'.format(resource_location))
            except URLError as url_error:
                log.error("Unable to download {0}. The resource is unavailable.".format(resource_location))
                resource_location = None

        if resource_location:
            new_reference = DataResource(
                file_location=resource_location, file_type=file_type,
                file_description=reference['description'])

            if "license" in reference:
                dict = get_prefilled_dictionary(
                    ["name", "url", "description"],
                    reference["license"])

                resource_license = get_or_create(
                    db.session, License, name=dict['name'],
                    url=dict['url'], description=dict['description'])
                new_reference.file_license = resource_license.id

            resources.append(new_reference)

    return resources
コード例 #5
0
ファイル: submission_test.py プロジェクト: HEPData/hepdata3
def test_file_extension_pattern():
    test_files = [
        {"file": "test.py", "exp_result": "Python"},
        {"file": "test.cpp", "exp_result": "C++"},
        {"file": "test.c", "exp_result": "C"},
        {"file": "test.sh", "exp_result": "Bash Shell"},
        {"file": "test.root", "exp_result": "ROOT"},
        {"file": "test.docx", "exp_result": "docx"},
        {"file": "test", "exp_result": "resource"}
    ]

    for file_group in test_files:
        extension = infer_file_type(file_group["file"])
        assert (file_group["exp_result"] == extension)