コード例 #1
0
def parse_additional_resources(basepath, recid, yaml_document):
    """
    Parses out the additional resource section for a full submission.

    :param basepath: the path the submission has been loaded to
    :param recid:
    :param yaml_document:
    :return:
    """
    resources = []
    for reference in yaml_document['additional_resources']:
        resource_location = reference['location']

        file_type = infer_file_type(reference["location"])
        contains_pattern, pattern = contains_accepted_url(reference['location'])
        if ('http' in resource_location.lower() and 'hepdata' not in resource_location) or contains_pattern:
            if pattern:
                file_type = pattern
            else:
                file_type = 'html'

            # in case URLs do not have http added.
            if 'http' not in resource_location.lower():
                resource_location = "http://" + resource_location

        elif 'http' not in resource_location.lower() and 'www' not in resource_location.lower():
            if resource_location.startswith('/resource'):
                # This is an old file migrated from hepdata.cedar.ac.uk. We
                # should only get here if using mock_import_old_record, in
                # which case the resources should already be in the 'resources'
                # directory
                parent_dir = os.path.dirname(basepath)
                resource_location = os.path.join(
                    parent_dir,
                    'resources',
                    os.path.basename(resource_location)
                )
                if not os.path.exists(resource_location):
                    raise ValueError("No such path %s" % resource_location)
            else:
                # this is a file local to the submission.
                try:
                    resource_location = os.path.join(basepath, resource_location)
                except Exception as e:
                    raise e

        if resource_location:
            new_reference = DataResource(
                file_location=resource_location, file_type=file_type,
                file_description=reference['description'])

            if "license" in reference:
                resource_license = get_license(reference["license"])
                new_reference.file_license = resource_license.id

            resources.append(new_reference)

    return resources
コード例 #2
0
ファイル: submission.py プロジェクト: HEPData/hepdata3
def parse_additional_resources(basepath, recid, version, yaml_document):
    """
    Parses out the additional resource section for a full submission
    :param hepsubmission:
    :param recid:
    :param submission_info_document:
    :return:
    """
    resources = []
    for reference in yaml_document['additional_resources']:
        resource_location = reference['location']

        file_type = infer_file_type(reference["location"])
        contains_pattern, pattern = contains_accepted_url(reference['location'])
        if ('http' in resource_location and 'hepdata' not in resource_location) or contains_pattern:
            if pattern:
                file_type = pattern
            else:
                file_type = 'html'

            # in case URLs do not have http added.
            if 'http' not in resource_location:
                resource_location = "http://" + resource_location

        elif 'http' not in resource_location and 'www' not in resource_location and 'resource' not in resource_location:
            # this is a file local to the submission.
            try:
                resource_location = os.path.join(basepath, resource_location)
            except Exception as e:
                raise e
        else:
            try:
                resource_location = download_resource_file(recid, resource_location)
                print('Downloaded resource location is {0}'.format(resource_location))
            except URLError as url_error:
                log.error("Unable to download {0}. The resource is unavailable.".format(resource_location))
                resource_location = None

        if resource_location:
            new_reference = DataResource(
                file_location=resource_location, file_type=file_type,
                file_description=reference['description'])

            if "license" in reference:
                dict = get_prefilled_dictionary(
                    ["name", "url", "description"],
                    reference["license"])

                resource_license = get_or_create(
                    db.session, License, name=dict['name'],
                    url=dict['url'], description=dict['description'])
                new_reference.file_license = resource_license.id

            resources.append(new_reference)

    return resources
コード例 #3
0
def parse_additional_resources(basepath, recid, version, yaml_document):
    """
    Parses out the additional resource section for a full submission
    :param hepsubmission:
    :param recid:
    :param submission_info_document:
    :return:
    """
    resources = []
    for reference in yaml_document['additional_resources']:
        resource_location = reference['location']

        file_type = infer_file_type(reference["location"])
        contains_pattern, pattern = contains_accepted_url(reference['location'])
        if ('http' in resource_location and 'hepdata' not in resource_location) or contains_pattern:
            if pattern:
                file_type = pattern
            else:
                file_type = 'html'

            # in case URLs do not have http added.
            if 'http' not in resource_location:
                resource_location = "http://" + resource_location

        elif 'http' not in resource_location and 'www' not in resource_location and 'resource' not in resource_location:
            # this is a file local to the submission.
            try:
                resource_location = os.path.join(basepath, resource_location)
            except Exception as e:
                raise e
        else:
            try:
                resource_location = download_resource_file(recid, resource_location)
                print('Downloaded resource location is {0}'.format(resource_location))
            except URLError as url_error:
                log.error("Unable to download {0}. The resource is unavailable.".format(resource_location))
                resource_location = None

        if resource_location:
            new_reference = DataResource(
                file_location=resource_location, file_type=file_type,
                file_description=reference['description'])

            if "license" in reference:
                dict = get_prefilled_dictionary(
                    ["name", "url", "description"],
                    reference["license"])

                resource_license = get_or_create(
                    db.session, License, name=dict['name'],
                    url=dict['url'], description=dict['description'])
                new_reference.file_license = resource_license.id

            resources.append(new_reference)

    return resources
コード例 #4
0
def process_data_file(recid, version, basepath, data_obj, datasubmission,
                      main_file_path):
    """
    Takes a data file and any supplementary files and persists their
    metadata to the database whilst recording their upload path.
    :param recid: the record id
    :param version: version of the resource to be stored
    :param basepath: the path the submission has been loaded to
    :param data_obj: Object representation of loaded YAML file
    :param datasubmission: the DataSubmission object representing this file in the DB
    :param main_file_path: the data file path
    :return:
    """
    main_data_file = DataResource(file_location=main_file_path,
                                  file_type="data")

    if "data_license" in data_obj:
        dict = get_prefilled_dictionary(["name", "url", "description"],
                                        data_obj["data_license"])

        license = get_or_create(db.session,
                                License,
                                name=dict['name'],
                                url=dict['url'],
                                description=dict['description'])

        main_data_file.file_license = license.id

    db.session.add(main_data_file)
    # I have to do the commit here, otherwise I have no ID to reference in the data submission table.
    db.session.commit()

    datasubmission.data_file = main_data_file.id

    if "location" in data_obj:
        datasubmission.location_in_publication = data_obj["location"]

    cleanup_data_keywords(datasubmission)

    if "keywords" in data_obj:
        for keyword in data_obj["keywords"]:
            keyword_name = keyword['name']
            for value in keyword['values']:
                keyword = Keyword(name=keyword_name, value=value)
                datasubmission.keywords.append(keyword)

    cleanup_data_resources(datasubmission)

    if "additional_resources" in data_obj:
        resources = parse_additional_resources(basepath, recid, version,
                                               data_obj)
        for resource in resources:
            datasubmission.resources.append(resource)

    db.session.commit()
コード例 #5
0
ファイル: submission.py プロジェクト: HEPData/hepdata3
def process_data_file(recid, version, basepath, data_obj, datasubmission, main_file_path):
    """
    Takes a data file and any supplementary files and persists their
    metadata to the database whilst recording their upload path.
    :param recid: the record id
    :param version: version of the resource to be stored
    :param basepath: the path the submission has been loaded to
    :param data_obj: Object representation of loaded YAML file
    :param datasubmission: the DataSubmission object representing this file in the DB
    :param main_file_path: the data file path
    :return:
    """
    main_data_file = DataResource(
        file_location=main_file_path, file_type="data")

    if "data_license" in data_obj:
        dict = get_prefilled_dictionary(
            ["name", "url", "description"], data_obj["data_license"])

        license = get_or_create(
            db.session, License, name=dict['name'],
            url=dict['url'], description=dict['description'])

        main_data_file.file_license = license.id

    db.session.add(main_data_file)
    # I have to do the commit here, otherwise I have no ID to reference in the data submission table.
    db.session.commit()

    datasubmission.data_file = main_data_file.id

    if "location" in data_obj:
        datasubmission.location_in_publication = data_obj["location"]

    if "keywords" in data_obj:
        for keyword in data_obj["keywords"]:
            keyword_name = keyword['name']
            for value in keyword['values']:
                keyword = Keyword(name=keyword_name, value=value)
                datasubmission.keywords.append(keyword)

    cleanup_data_resources(datasubmission)

    if "additional_resources" in data_obj:
        resources = parse_additional_resources(basepath, recid, version, data_obj)
        for resource in resources:
            datasubmission.resources.append(resource)

    db.session.commit()