Exemple #1
0
def attach_information_to_record(recid):
    """
    Given an INSPIRE data representation, this will process the data, and update information
    for a given record id with the contents.

    :return:
    """

    inspire_id = request.form['inspire_id']

    content, status = get_inspire_record_information(inspire_id)
    content["inspire_id"] = inspire_id

    record = get_record_by_id(recid)
    if record is not None and status == 'success':
        content['recid'] = recid
        record.update(content)
        record.commit()

        hep_submission = HEPSubmission.query.filter_by(
            publication_recid=recid, overall_status="todo").first()
        hep_submission.inspire_id = inspire_id
        db.session.add(hep_submission)

        db.session.commit()

        return jsonify({'status': 'success'})

    elif status != 'success':
        return jsonify({'status': status,
                        'message': 'Request for INSPIRE record {} failed.'.format(inspire_id)})

    else:
        return jsonify({'status': 'failed',
                        'message': 'No record with recid {} was found.'.format(str(recid))})
Exemple #2
0
def attach_information_to_record(recid):
    """
    Given an INSPIRE data representation, this will process the data, and update information
    for a given record id with the contents.
    :return:
    """

    inspire_id = request.form['inspire_id']

    content, status = get_inspire_record_information(inspire_id)
    content["inspire_id"] = inspire_id

    record = get_record_by_id(recid)
    if record is not None:
        content['recid'] = recid

        patch = jsonpatch.JsonPatch.from_diff(record, content)
        record = record.patch(patch=patch)
        record.commit()
        db.session.commit()

        return jsonify({'status': 'success'})
    else:
        return jsonify({'status': 'failed',
                        'message': 'No record with that recid was found.'})
def test_parser(inspire_id, title, creation_date, year, subject_area):
    content, status = get_inspire_record_information(inspire_id)

    assert decode_string(content["title"]) == decode_string(title)
    assert content["creation_date"] == creation_date
    assert int(content["year"]) == year
    if subject_area is not None:
        assert content["subject_area"] == subject_area
Exemple #4
0
def test_update_record_info(app):
    """Test update of publication information from INSPIRE."""
    assert update_record_info(
        None) == 'Inspire ID is None'  # case where Inspire ID is None
    for inspire_id in (
            '1311487',
            '19999999'):  # check both a valid and invalid Inspire ID
        assert update_record_info(
            inspire_id
        ) == 'No HEPData submission'  # before creation of HEPSubmission object
        submission = process_submission_payload(inspire_id=inspire_id,
                                                submitter_id=1,
                                                reviewer={
                                                    'name': 'Reviewer',
                                                    'email':
                                                    '*****@*****.**'
                                                },
                                                uploader={
                                                    'name': 'Uploader',
                                                    'email':
                                                    '*****@*****.**'
                                                },
                                                send_upload_email=False)

        # Process the files to create DataSubmission tables in the DB.
        base_dir = os.path.dirname(os.path.realpath(__file__))
        directory = os.path.join(base_dir, 'test_data/test_submission')
        tmp_path = os.path.join(tempfile.mkdtemp(dir=CFG_TMPDIR),
                                'test_submission')
        shutil.copytree(directory, tmp_path)
        process_submission_directory(tmp_path,
                                     os.path.join(tmp_path, 'submission.yaml'),
                                     submission.publication_recid)
        do_finalise(submission.publication_recid,
                    force_finalise=True,
                    convert=False)

        if inspire_id == '19999999':
            assert update_record_info(inspire_id) == 'Invalid Inspire ID'
        else:

            # First change the publication information to that of a different record.
            different_inspire_record_information, status = get_inspire_record_information(
                '1650066')
            assert status == 'success'
            hep_submission = get_latest_hepsubmission(inspire_id=inspire_id)
            assert hep_submission is not None
            update_record(hep_submission.publication_recid,
                          different_inspire_record_information)

            # Then can check that the update works and that a further update is not required.
            assert update_record_info(inspire_id, send_email=True) == 'Success'
            assert update_record_info(
                inspire_id
            ) == 'No update needed'  # check case where information already current

        unload_submission(submission.publication_recid)
Exemple #5
0
def process_submission_payload(*args, **kwargs):
    """
    Processes the submission payload.

    :param inspire_id:
    :param title:
    :param reviewer:
    :param uploader:
    :param send_upload_email:
    :return:
    """
    if kwargs.get('inspire_id'):
        content, status = get_inspire_record_information(kwargs.get('inspire_id'))
        content["inspire_id"] = kwargs.get('inspire_id')
    elif kwargs.get('title'):
        content = {'title': kwargs.get('title')}
    else:
        raise ValueError(message="A title or inspire_id must be provided.")

    record_information = create_record(content)
    submitter_id = kwargs.get('submitter_id')
    if submitter_id is None:
        submitter_id = kwargs.get('user_id') if 'user_id' in kwargs else int(current_user.get_id())

    hepsubmission = get_or_create_hepsubmission(record_information["recid"], submitter_id)

    if kwargs.get('inspire_id'):
        hepsubmission.inspire_id = kwargs.get('inspire_id')
        db.session.add(hepsubmission)

    reviewer_details = kwargs.get('reviewer')

    reviewer = create_participant_record(
        reviewer_details.get('name'),
        reviewer_details.get('email'), 'reviewer', 'primary',
        record_information['recid'])
    hepsubmission.participants.append(reviewer)

    uploader_details = kwargs.get('uploader')
    uploader = create_participant_record(uploader_details.get('name'), uploader_details.get('email'),
                                         'uploader', 'primary',
                                         record_information['recid'])
    hepsubmission.participants.append(uploader)

    db.session.commit()

    if kwargs.get('send_upload_email', True):
        # Now Send Email only to the uploader first. The reviewer will be asked to
        # review only when an upload has been performed.
        message = kwargs.get('message', None)
        send_cookie_email(uploader, record_information, message)

    admin_idx = AdminIndexer()
    admin_idx.index_submission(hepsubmission)

    return hepsubmission
Exemple #6
0
def finalise(recid, publication_record=None, force_finalise=False):
    commit_message = request.form.get('message')

    # Update publication information from INSPIRE record before finalising.
    if not publication_record:
        record = get_record_by_id(recid)
        content, status = get_inspire_record_information(record['inspire_id'])
        if status == 'success':
            publication_record = update_record(recid, content)

    return do_finalise(recid, publication_record=publication_record, force_finalise=force_finalise,
                       commit_message=commit_message, send_tweet=True)
Exemple #7
0
def test_parser():
    test_data = [
        {
            "inspire_id": "1245023",
            "title": "High-statistics study of $K^0_S$ pair " "production in two-photon collisions",
            "creation_date": "2013-07-29",
            "year": 2013,
        },
        {
            "inspire_id": "1183818",
            "title": "Measurements of the pseudorapidity dependence "
            "of the total transverse energy "
            "in proton-proton "
            "collisions at $\sqrt{s}=7$ TeV with ATLAS",
            "creation_date": "2012-08-01",
            "year": 2012,
        },
        {
            "inspire_id": "1407276",
            "title": "Elastic scattering of negative pions by protons at 2 BeV/c",
            "creation_date": "1963-01-01",
            "year": 1963,
        },
        {
            "inspire_id": "44234",
            "title": "DIFFERENTIAL ELASTIC PION-PROTON SCATTERING AT 600-MEV, 650-MEV and 750-MEV",
            "creation_date": "2006-04-11",
            "year": 2006,
        },
        {
            "inspire_id": "1187688",
            "title": "Mesure de la polarisation du proton de recul dans la diffusion élastique "
            "pi+- p entre 550 et 1025 MeV",
            "creation_date": "1970-01-01",
            "year": 1970,
        },
        {
            "inspire_id": "67677",
            "title": "INELASTIC ELECTRON - DEUTERON SCATTERING AT HIGH-ENERGIES",
            "creation_date": "1971-01-01",
            "year": 1971,
        },
    ]

    for test in test_data:
        content, status = get_inspire_record_information(test["inspire_id"])

        assert decode_string(content["title"]) == decode_string(test["title"])
        assert content["creation_date"] == test["creation_date"]
        assert int(content["year"]) == test["year"]
Exemple #8
0
def process_submission_payload(*args, **kwargs):
    """
    Processes the submission payload
    :param inspire_id:
    :param title:
    :param reviewer:
    :param uploader:
    :param send_upload_email:
    :return:
    """
    if kwargs.get('inspire_id'):
        content, status = get_inspire_record_information(kwargs.get('inspire_id'))
        content["inspire_id"] = kwargs.get('inspire_id')
    elif kwargs.get('title'):
        content = {'title': kwargs.get('title')}
    else:
        raise ValueError(message="A title or inspire_id must be provided.")

    record_information = create_record(content)
    submitter_id = kwargs.get('submitter_id')
    if submitter_id is None:
        submitter_id = kwargs.get('user_id') if 'user_id' in kwargs else int(current_user.get_id())

    hepsubmission = get_or_create_hepsubmission(record_information["recid"], submitter_id)

    reviewer_details = kwargs.get('reviewer')

    reviewer = create_participant_record(
        reviewer_details.get('name'),
        reviewer_details.get('email'), 'reviewer', 'primary',
        record_information['recid'])
    hepsubmission.participants.append(reviewer)

    uploader_details = kwargs.get('uploader')
    uploader = create_participant_record(uploader_details.get('name'), uploader_details.get('email'),
                                         'uploader', 'primary',
                                         record_information['recid'])
    hepsubmission.participants.append(uploader)

    db.session.commit()

    if kwargs.get('send_upload_email', True):
        # Now Send Email only to the uploader first. The reviewer will be asked to
        # review only when an upload has been performed.
        message = kwargs.get('message', None)
        send_cookie_email(uploader, record_information, message)

    return hepsubmission
Exemple #9
0
    def retrieve_publication_information(self, inspire_id):
        """
        :param inspire_id: id for record to get. If this contains "ins", the "ins" is removed.
        :return: dict containing keys for:
            title
            doi
            authors
            abstract
            arxiv_id
            collaboration
        """
        if "ins" in inspire_id:
            inspire_id = int(inspire_id.replace("ins", ""))

        content, status = get_inspire_record_information(inspire_id)

        content["inspire_id"] = inspire_id
        return content, status
Exemple #10
0
def test_parser():
    test_data = [{"inspire_id": "1245023",
                  "title": "High-statistics study of $K^0_S$ pair "
                           "production in two-photon collisions",
                  "creation_date": "2013-07-29", "year": 2013,
                  "subject_area": ['HEP Experiment']},

                 {"inspire_id": "1183818",
                  "title": "Measurements of the pseudorapidity dependence "
                           "of the total transverse energy "
                           "in proton-proton "
                           "collisions at $\sqrt{s}=7$ TeV with ATLAS",
                  "creation_date": "2012-08-01",
                  "year": 2012,
                  "subject_area": ["HEP Experiment"]},
                 {"inspire_id": "1407276",
                  "title": "Elastic scattering of negative pions by protons at 2 BeV/c",
                  "creation_date": "1963-01-01",
                  "year": 1963},
                 {"inspire_id": "44234",
                  "title": "DIFFERENTIAL ELASTIC PION-PROTON SCATTERING AT 600-MEV, 650-MEV and 750-MEV",
                  "creation_date": "2006-04-11",
                  "year": 2006},
                 {"inspire_id": "1187688",
                  "title": "Mesure de la polarisation du proton de recul dans la diffusion élastique "
                           "pi+- p entre 550 et 1025 MeV",
                  "creation_date": "1970-01-01",
                  "year": 1970},
                 {"inspire_id": "67677",
                  "title": "INELASTIC ELECTRON - DEUTERON SCATTERING AT HIGH-ENERGIES",
                  "creation_date": "1971-01-01",
                  "year": 1971
                  }
                 ]

    for test in test_data:
        content, status = get_inspire_record_information(
            test["inspire_id"])

        assert (decode_string(content["title"]) == decode_string(test["title"]))
        assert (content["creation_date"] == test["creation_date"])
        assert (int(content["year"]) == test["year"])
        if 'subject_area' in test:
            assert (content["subject_area"] == test["subject_area"])
Exemple #11
0
    def retrieve_publication_information(self, inspire_id):
        """
        :param inspire_id: id for record to get. If this contains
        'ins', the 'ins' is removed.
        :return: dict containing keys for:
            title
            doi
            authors
            abstract
            arxiv_id
            collaboration
        """
        if "ins" in inspire_id:
            inspire_id = int(inspire_id.replace("ins", ""))

        content, status = get_inspire_record_information(inspire_id)

        content["inspire_id"] = inspire_id
        return create_record(content)
Exemple #12
0
def attach_information_to_record(recid):
    """
    Given an INSPIRE data representation, this will process the data, and update information
    for a given record id with the contents.
    :return:
    """

    inspire_id = request.form["inspire_id"]

    content, status = get_inspire_record_information(inspire_id)
    content["inspire_id"] = inspire_id

    record = get_record_by_id(recid)
    if record is not None:
        content["recid"] = recid

        patch = jsonpatch.JsonPatch.from_diff(record, content)
        record = record.patch(patch=patch)
        record.commit()
        db.session.commit()

        return jsonify({"status": "success"})
    else:
        return jsonify({"status": "failed", "message": "No record with that recid was found."})
Exemple #13
0
def mock_import_old_record(inspire_id=mock_inspire_ids[1], send_email=False):
    """Creates a submission but mimics the old migrated paths. (See hepdata
    master branch at ccd691b for old migrator module.)
    """
    if inspire_id not in mock_inspire_ids:
        raise ValueError('Invalid inspire id %s. Accepted values are: %s' %
                         (inspire_id, ', '.join(mock_inspire_ids)))

    # Use zipped test data for specific record(s)
    publication_information, status = get_inspire_record_information(
        inspire_id)
    publication_information["inspire_id"] = inspire_id

    # Create record
    if status == "success":
        record_information = create_record(publication_information)
    else:
        log.error("Failed to retrieve publication information for " +
                  inspire_id)
        return False

    # Unzip into correct data dir
    data_path = get_data_path_for_record(record_information['recid'])
    base_dir = os.path.dirname(os.path.realpath(__file__))
    zip_path = os.path.join(base_dir, 'old_hepdata_zips',
                            'ins%s.zip' % inspire_id)
    if os.path.isfile(zip_path):
        log.info('Unzipping %s to %s' % (zip_path, data_path))
        shutil.unpack_archive(zip_path, data_path)
        time_stamp = str(int(round(time.time())))
        yaml_path = os.path.join(data_path, time_stamp)
        sub_zip_path = os.path.join(data_path, 'ins%s.zip' % inspire_id)
        shutil.unpack_archive(sub_zip_path, yaml_path)
    else:
        log.error('Invalid path %s' % zip_path)
        return False

    # Create submission
    admin_user_id = 1

    # Consume data payload and store in db.
    get_or_create_hepsubmission(record_information["recid"], admin_user_id)

    errors = process_submission_directory(yaml_path,
                                          os.path.join(yaml_path,
                                                       "submission.yaml"),
                                          record_information["recid"],
                                          old_submission_schema=True,
                                          old_data_schema=True)

    if errors:
        log.error(
            "Submission failed for {0}.".format(record_information["recid"]),
            errors, record_information["recid"])
        return False

    do_finalise(record_information['recid'],
                publication_record=record_information,
                force_finalise=True,
                convert=False,
                send_email=send_email)
def update_record_info(inspire_id, send_email=False):
    """Update publication information from INSPIRE for a specific record."""

    if inspire_id is None:
        log.error("Inspire ID is None")
        return 'Inspire ID is None'

    inspire_id = inspire_id.replace("ins", "")

    hep_submission = get_latest_hepsubmission(inspire_id=inspire_id)
    if hep_submission is None:
        log.warning("Failed to retrieve HEPData submission for Inspire ID {0}".format(inspire_id))
        return 'No HEPData submission'

    publication_recid = hep_submission.publication_recid

    log.info("Updating recid {} with information from Inspire record {}".format(publication_recid, inspire_id))

    updated_inspire_record_information, status = get_inspire_record_information(inspire_id)

    if status == 'success':

        # Also need to update publication information for data records.
        data_submissions = DataSubmission.query.filter_by(
            publication_recid=publication_recid, version=hep_submission.version
        ).order_by(DataSubmission.id.asc())
        record_ids = [publication_recid]  # list of record IDs
        for data_submission in data_submissions:
            record_ids.append(data_submission.associated_recid)

        same_information = {}
        for index, recid in enumerate(record_ids):

            if index == 0:
                updated_record_information = updated_inspire_record_information
            else:
                # Only update selected keys for data records.
                updated_record_information = {
                    key: updated_inspire_record_information[key] for key in (
                        'authors', 'creation_date', 'journal_info', 'collaborations'
                    )
                }

            record_information = get_record_by_id(recid)
            same_information[recid] = True
            for key, value in updated_record_information.items():
                if key not in record_information or record_information[key] != value:
                    log.debug('For recid {}, key {} has new value {}'.format(recid, key, value))
                    same_information[recid] = False
                    update_record(recid, updated_record_information)
                    break
            log.info('For recid {}, information needs to be updated: {}'.format(recid, str(not(same_information[recid]))))

        if all(same for same in same_information.values()):
            return 'No update needed'

    else:
        log.warning("Failed to retrieve publication information for Inspire record {0}".format(inspire_id))
        return 'Invalid Inspire ID'

    if hep_submission.overall_status == 'finished':
        index_record_ids(record_ids)  # index for Elasticsearch
        push_data_keywords(pub_ids=[recid])
        if not TESTING:
            generate_dois_for_submission.delay(inspire_id=inspire_id)  # update metadata stored in DataCite
        if send_email:
            record_information = get_record_by_id(publication_recid)
            notify_publication_update(hep_submission, record_information)   # send email to all participants

    return 'Success'
Exemple #15
0
def _import_record(inspire_id,
                   update_existing=False,
                   base_url='https://hepdata.net',
                   send_email=False):
    publication_information, status = get_inspire_record_information(
        inspire_id)
    if status != "success":
        log.error("Failed to retrieve publication information for " +
                  inspire_id)
        return False

    current_submission = get_latest_hepsubmission(inspire_id=inspire_id)

    if not current_submission:
        log.info(
            "The record with id {0} does not exist in the database, so we're loading it."
            .format(inspire_id))
        publication_information["inspire_id"] = inspire_id
        record_information = create_record(publication_information)
        recid = record_information['recid']
    else:
        log.info("The record with inspire id {0} already exists.".format(
            inspire_id))
        if update_existing:
            log.info("Updating instead")
            recid = current_submission.publication_recid
        else:
            log.info("Not updating as update_existing is False")
            return False

    try:
        download_path = _download_file(base_url, inspire_id)

        filename = os.path.basename(download_path)

        time_stamp = str(int(round(time.time())))
        file_save_directory = get_data_path_for_record(str(recid), time_stamp)
        if not os.path.exists(file_save_directory):
            os.makedirs(file_save_directory)

        file_path = os.path.join(file_save_directory, filename)
        log.info("Moving file to %s" % file_path)
        shutil.copy(download_path, file_path)

        # Create submission
        admin_user_id = 1
        hepsubmission = get_or_create_hepsubmission(recid, admin_user_id)
        db.session.add(hepsubmission)
        db.session.commit()

        # Then process the payload as for any other record
        errors = process_zip_archive(file_path, recid)
        if errors:
            log.info("Errors processing archive. Re-trying with old schema.")
            # Try again with old schema
            # Need to clean up first to avoid errors
            # First delete tables
            cleanup_submission(recid, 1, [])
            # Next remove remaining files
            file_save_directory = os.path.dirname(file_path)
            submission_path = os.path.join(file_save_directory,
                                           remove_file_extension(filename))
            shutil.rmtree(submission_path)

            errors = process_zip_archive(file_path,
                                         recid,
                                         old_submission_schema=True,
                                         old_data_schema=True)

            if errors:
                log.error("Could not process zip archive: ")
                for file, file_errors in errors.items():
                    log.error("    %s:" % file)
                    for error in file_errors:
                        log.error("        %s" % error['message'])

                raise ValueError("Could not validate record.")

        # Delete any previous upload folders
        cleanup_old_files(hepsubmission)

        log.info("Finalising record %s" % recid)

        result_json = do_finalise(recid,
                                  force_finalise=True,
                                  update=(current_submission is not None),
                                  convert=False,
                                  send_email=send_email)
        result = json.loads(result_json)

        if result and result['success']:
            log.info("Imported record %s with %s submissions" %
                     (recid, result['data_count']))
            return True
        else:
            raise ValueError("Failed to finalise record.")
    except Exception as e:
        # Unload record
        unload_submission(recid)
        log.error(e)
        return False