예제 #1
0
def parse_additional_resources(basepath, recid, yaml_document):
    """
    Parses out the additional resource section for a full submission.

    :param basepath: the path the submission has been loaded to
    :param recid:
    :param yaml_document:
    :return:
    """
    resources = []
    for reference in yaml_document['additional_resources']:
        resource_location = reference['location']

        file_type = infer_file_type(reference["location"])
        contains_pattern, pattern = contains_accepted_url(reference['location'])
        if ('http' in resource_location.lower() and 'hepdata' not in resource_location) or contains_pattern:
            if pattern:
                file_type = pattern
            else:
                file_type = 'html'

            # in case URLs do not have http added.
            if 'http' not in resource_location.lower():
                resource_location = "http://" + resource_location

        elif 'http' not in resource_location.lower() and 'www' not in resource_location.lower():
            if resource_location.startswith('/resource'):
                # This is an old file migrated from hepdata.cedar.ac.uk. We
                # should only get here if using mock_import_old_record, in
                # which case the resources should already be in the 'resources'
                # directory
                parent_dir = os.path.dirname(basepath)
                resource_location = os.path.join(
                    parent_dir,
                    'resources',
                    os.path.basename(resource_location)
                )
                if not os.path.exists(resource_location):
                    raise ValueError("No such path %s" % resource_location)
            else:
                # this is a file local to the submission.
                try:
                    resource_location = os.path.join(basepath, resource_location)
                except Exception as e:
                    raise e

        if resource_location:
            new_reference = DataResource(
                file_location=resource_location, file_type=file_type,
                file_description=reference['description'])

            if "license" in reference:
                resource_license = get_license(reference["license"])
                new_reference.file_license = resource_license.id

            resources.append(new_reference)

    return resources
예제 #2
0
def parse_additional_resources(basepath, recid, version, yaml_document):
    """
    Parses out the additional resource section for a full submission
    :param hepsubmission:
    :param recid:
    :param submission_info_document:
    :return:
    """
    resources = []
    for reference in yaml_document['additional_resources']:
        resource_location = reference['location']

        file_type = infer_file_type(reference["location"])
        contains_pattern, pattern = contains_accepted_url(reference['location'])
        if ('http' in resource_location and 'hepdata' not in resource_location) or contains_pattern:
            if pattern:
                file_type = pattern
            else:
                file_type = 'html'

            # in case URLs do not have http added.
            if 'http' not in resource_location:
                resource_location = "http://" + resource_location

        elif 'http' not in resource_location and 'www' not in resource_location and 'resource' not in resource_location:
            # this is a file local to the submission.
            try:
                resource_location = os.path.join(basepath, resource_location)
            except Exception as e:
                raise e
        else:
            try:
                resource_location = download_resource_file(recid, resource_location)
                print('Downloaded resource location is {0}'.format(resource_location))
            except URLError as url_error:
                log.error("Unable to download {0}. The resource is unavailable.".format(resource_location))
                resource_location = None

        if resource_location:
            new_reference = DataResource(
                file_location=resource_location, file_type=file_type,
                file_description=reference['description'])

            if "license" in reference:
                dict = get_prefilled_dictionary(
                    ["name", "url", "description"],
                    reference["license"])

                resource_license = get_or_create(
                    db.session, License, name=dict['name'],
                    url=dict['url'], description=dict['description'])
                new_reference.file_license = resource_license.id

            resources.append(new_reference)

    return resources
예제 #3
0
def parse_additional_resources(basepath, recid, version, yaml_document):
    """
    Parses out the additional resource section for a full submission
    :param hepsubmission:
    :param recid:
    :param submission_info_document:
    :return:
    """
    resources = []
    for reference in yaml_document['additional_resources']:
        resource_location = reference['location']

        file_type = infer_file_type(reference["location"])
        contains_pattern, pattern = contains_accepted_url(reference['location'])
        if ('http' in resource_location and 'hepdata' not in resource_location) or contains_pattern:
            if pattern:
                file_type = pattern
            else:
                file_type = 'html'

            # in case URLs do not have http added.
            if 'http' not in resource_location:
                resource_location = "http://" + resource_location

        elif 'http' not in resource_location and 'www' not in resource_location and 'resource' not in resource_location:
            # this is a file local to the submission.
            try:
                resource_location = os.path.join(basepath, resource_location)
            except Exception as e:
                raise e
        else:
            try:
                resource_location = download_resource_file(recid, resource_location)
                print('Downloaded resource location is {0}'.format(resource_location))
            except URLError as url_error:
                log.error("Unable to download {0}. The resource is unavailable.".format(resource_location))
                resource_location = None

        if resource_location:
            new_reference = DataResource(
                file_location=resource_location, file_type=file_type,
                file_description=reference['description'])

            if "license" in reference:
                dict = get_prefilled_dictionary(
                    ["name", "url", "description"],
                    reference["license"])

                resource_license = get_or_create(
                    db.session, License, name=dict['name'],
                    url=dict['url'], description=dict['description'])
                new_reference.file_license = resource_license.id

            resources.append(new_reference)

    return resources
예제 #4
0
def process_data_file(recid, version, basepath, data_obj, datasubmission,
                      main_file_path):
    """
    Takes a data file and any supplementary files and persists their
    metadata to the database whilst recording their upload path.
    :param recid: the record id
    :param version: version of the resource to be stored
    :param basepath: the path the submission has been loaded to
    :param data_obj: Object representation of loaded YAML file
    :param datasubmission: the DataSubmission object representing this file in the DB
    :param main_file_path: the data file path
    :return:
    """
    main_data_file = DataResource(file_location=main_file_path,
                                  file_type="data")

    if "data_license" in data_obj:
        dict = get_prefilled_dictionary(["name", "url", "description"],
                                        data_obj["data_license"])

        license = get_or_create(db.session,
                                License,
                                name=dict['name'],
                                url=dict['url'],
                                description=dict['description'])

        main_data_file.file_license = license.id

    db.session.add(main_data_file)
    # I have to do the commit here, otherwise I have no ID to reference in the data submission table.
    db.session.commit()

    datasubmission.data_file = main_data_file.id

    if "location" in data_obj:
        datasubmission.location_in_publication = data_obj["location"]

    cleanup_data_keywords(datasubmission)

    if "keywords" in data_obj:
        for keyword in data_obj["keywords"]:
            keyword_name = keyword['name']
            for value in keyword['values']:
                keyword = Keyword(name=keyword_name, value=value)
                datasubmission.keywords.append(keyword)

    cleanup_data_resources(datasubmission)

    if "additional_resources" in data_obj:
        resources = parse_additional_resources(basepath, recid, version,
                                               data_obj)
        for resource in resources:
            datasubmission.resources.append(resource)

    db.session.commit()
예제 #5
0
def process_data_file(recid, version, basepath, data_obj, datasubmission, main_file_path):
    """
    Takes a data file and any supplementary files and persists their
    metadata to the database whilst recording their upload path.
    :param recid: the record id
    :param version: version of the resource to be stored
    :param basepath: the path the submission has been loaded to
    :param data_obj: Object representation of loaded YAML file
    :param datasubmission: the DataSubmission object representing this file in the DB
    :param main_file_path: the data file path
    :return:
    """
    main_data_file = DataResource(
        file_location=main_file_path, file_type="data")

    if "data_license" in data_obj:
        dict = get_prefilled_dictionary(
            ["name", "url", "description"], data_obj["data_license"])

        license = get_or_create(
            db.session, License, name=dict['name'],
            url=dict['url'], description=dict['description'])

        main_data_file.file_license = license.id

    db.session.add(main_data_file)
    # I have to do the commit here, otherwise I have no ID to reference in the data submission table.
    db.session.commit()

    datasubmission.data_file = main_data_file.id

    if "location" in data_obj:
        datasubmission.location_in_publication = data_obj["location"]

    if "keywords" in data_obj:
        for keyword in data_obj["keywords"]:
            keyword_name = keyword['name']
            for value in keyword['values']:
                keyword = Keyword(name=keyword_name, value=value)
                datasubmission.keywords.append(keyword)

    cleanup_data_resources(datasubmission)

    if "additional_resources" in data_obj:
        resources = parse_additional_resources(basepath, recid, version, data_obj)
        for resource in resources:
            datasubmission.resources.append(resource)

    db.session.commit()
예제 #6
0
def update_analyses():
    endpoints = current_app.config["ANALYSES_ENDPOINTS"]
    for analysis_endpoint in endpoints:

        if "endpoint_url" in endpoints[analysis_endpoint]:

            log.info("Updating analyses from {0}...".format(analysis_endpoint))

            response = requests.get(
                endpoints[analysis_endpoint]["endpoint_url"])

            if response:

                analyses = response.json()

                for record in analyses:
                    submission = get_latest_hepsubmission(
                        inspire_id=record, overall_status='finished')

                    if submission:
                        num_new_resources = 0

                        for analysis in analyses[record]:
                            _resource_url = endpoints[analysis_endpoint][
                                "url_template"].format(analysis)
                            if not is_resource_added_to_submission(
                                    submission.publication_recid,
                                    submission.version, _resource_url):
                                print(
                                    'Adding {} analysis to ins{} with URL {}'.
                                    format(analysis_endpoint, record,
                                           _resource_url))
                                new_resource = DataResource(
                                    file_location=_resource_url,
                                    file_type=analysis_endpoint)

                                submission.resources.append(new_resource)
                                num_new_resources += 1

                        if num_new_resources:

                            try:
                                db.session.add(submission)
                                db.session.commit()
                                index_record_ids(
                                    [submission.publication_recid])
                            except Exception as e:
                                db.session.rollback()
                                log.error(e)

                    else:
                        log.debug(
                            "An analysis is available in {0} but with no equivalent in HEPData (ins{1})."
                            .format(analysis_endpoint, record))
        else:
            log.debug(
                "No endpoint url configured for {0}".format(analysis_endpoint))
예제 #7
0
def test_receive_before_flush_errors(app, mocker, caplog):
    # Test that errors are logged in receive_before_flush
    # We mimic errors by providing unpersisted objects to the DataResource and
    # DataReview queries using mocking, so that they cannot successfully be
    # deleted from the db
    caplog.set_level(logging.ERROR)

    recid = "12345"
    datasubmission = DataSubmission(publication_recid=recid)
    db.session.add(datasubmission)
    db.session.commit()

    mockResourceFilterBy = mocker.Mock(first=lambda: DataResource())
    mockResourceQuery = mocker.Mock(filter_by=lambda id: mockResourceFilterBy)
    mockDataResource = mocker.Mock(query=mockResourceQuery)

    mocker.patch('hepdata.modules.submission.models.DataResource',
                 mockDataResource)

    mockReviewFilterBy = mocker.Mock(all=lambda: [DataReview()])
    mockReviewQuery = mocker.Mock(
        filter_by=lambda data_recid: mockReviewFilterBy)
    mockDataReview = mocker.Mock(query=mockReviewQuery)

    mocker.patch('hepdata.modules.submission.models.DataReview',
                 mockDataReview)

    db.session.delete(datasubmission)
    db.session.commit()

    # Last error logs are what we're looking for
    assert (len(caplog.records) == 2)

    assert (caplog.records[0].levelname == "ERROR")
    assert (caplog.records[0].msg.startswith(
        "Unable to delete data resource with id None whilst deleting data submission id 1. Error was: Instance '<DataResource at "
    ))
    assert (caplog.records[0].msg.endswith(" is not persisted"))

    assert (caplog.records[1].levelname == "ERROR")
    assert (caplog.records[1].msg.startswith(
        "Unable to delete review with id None whilst deleting data submission id 1. Error was: Instance '<DataReview at "
    ))
    assert (caplog.records[1].msg.endswith(" is not persisted"))
예제 #8
0
def add_resource(type, identifier, version):
    """
    Adds a data resource to either the submission or individual data files.

    :param type:
    :param identifier:
    :param version:
    :return:
    """

    submission = None
    inspire_id = None
    recid = None

    if type == 'submission':
        submission = HEPSubmission.query.filter_by(publication_recid=identifier, version=version).one()
        if submission:
            inspire_id = submission.inspire_id
            recid = submission.publication_recid

    elif type == 'data':
        submission = DataSubmission.query.filter_by(id=identifier).one()
        if submission:
            inspire_id = submission.publication_inspire_id
            recid = submission.publication_recid

    if not user_allowed_to_perform_action(recid):
        abort(403)

    analysis_type = request.form.get('analysisType', None)
    analysis_other = request.form.get('analysisOther', None)
    analysis_url = request.form.get('analysisURL', None)
    analysis_description = request.form.get('analysisDescription', None)

    if analysis_type == 'other':
        analysis_type = analysis_other

    if analysis_type and analysis_url:

        if submission:
            new_resource = DataResource(file_location=analysis_url, file_type=analysis_type,
                                        file_description=str(analysis_description))

            submission.resources.append(new_resource)

            try:
                db.session.add(submission)
                db.session.commit()

                try:
                    index_record_ids([recid])
                except:
                    log.error('Failed to reindex {0}'.format(recid))

                if inspire_id:
                    return redirect('/record/ins{0}'.format(inspire_id))
                else:
                    return redirect('/record/{0}'.format(recid))
            except Exception as e:
                db.session.rollback()
                raise e

    return render_template('hepdata_records/error_page.html', recid=None,
                           header_message='Error adding resource.',
                           message='Unable to add resource. Please try again.',
                           errors={})
예제 #9
0
def test_data_submission_cascades(app):
    # Create a data submission
    recid = "12345"
    datasubmission = DataSubmission(publication_recid=recid)
    db.session.add(datasubmission)
    db.session.commit()

    # Add a data review with a message
    message = Message(user=1, message="Test review message")
    datareview = DataReview(publication_recid=recid,
                            data_recid=datasubmission.id,
                            messages=[message])
    db.session.add(datareview)
    db.session.commit()

    reviews = DataReview.query.filter_by(publication_recid=recid).all()
    assert (len(reviews) == 1)
    assert (reviews[0] == datareview)
    messages = Message.query.all()
    assert (len(messages) == 1)

    # Add some data resources with files
    files_dir = os.path.join(app.config['CFG_DATADIR'], 'models_test')
    os.makedirs(files_dir, exist_ok=True)

    resources = []

    for i in range(3):
        file_path = os.path.join(files_dir, f'file{i}.txt')
        f = open(file_path, 'w')
        f.close()
        dataresource = DataResource(file_location=file_path, file_type="data")
        db.session.add(dataresource)
        db.session.commit()
        resources.append(dataresource)

    # Create an extra DataResource linking to file0.txt but
    # not linked to the submission
    # (because this situation has arisen in prod)
    dataresource = DataResource(file_location=os.path.join(
        files_dir, 'file0.txt'),
                                file_type="data")
    db.session.add(dataresource)
    db.session.commit()

    assert (len(os.listdir(files_dir)) == 3)

    datasubmission.data_file = resources[0].id
    datasubmission.resources = resources[1:]
    db.session.add(datasubmission)
    db.session.commit()

    # Check we can find resources in db
    dataresources = DataResource.query.filter(
        DataResource.id.in_([x.id for x in resources])).all()
    assert (len(dataresources) == 3)

    # Delete datasubmission
    db.session.delete(datasubmission)
    db.session.commit()

    # Check that datareview is deleted
    reviews = DataReview.query.filter_by(publication_recid=recid).all()
    assert (len(reviews) == 0)
    # Check that message is deleted
    messages = Message.query.all()
    assert (len(messages) == 0)

    # Check all resources have been deleted
    dataresources = DataResource.query.filter(
        DataResource.id.in_([x.id for x in resources])).all()

    assert (len(dataresources) == 0)

    # Check files are also deleted, apart from file0
    # as that's referenced by another DataResource
    assert (os.listdir(files_dir) == ['file0.txt'])

    # Tidy up
    shutil.rmtree(files_dir)