Exemple #1
0
def load_files(inspire_ids, send_tweet=False, synchronous=False):
    """
    :param synchronous: if should be run immediately
    :param send_tweet: whether or not to tweet this entry.
    :param inspire_ids: array of inspire ids to load (in the format insXXX).
    :return: None
    """
    migrator = Migrator()

    for index, inspire_id in enumerate(inspire_ids):
        _cleaned_id = inspire_id.replace("ins", "")
        if not record_exists(inspire_id=_cleaned_id):
            print('The record with id {0} does not exist in the database, so we\'re loading it.'.format(inspire_id))
            try:
                log.info('Loading {0}'.format(inspire_id))
                if synchronous:
                    migrator.load_file(inspire_id, send_tweet)
                else:
                    migrator.load_file.delay(inspire_id, send_tweet)
            except socket.error as se:
                print('socket error...')
                log.error(se.message)
            except Exception as e:
                print('Failed to load {0}. {1} '.format(inspire_id, e))
                log.error('Failed to load {0}. {1} '.format(inspire_id, e))
        else:
            print('The record with inspire id {0} already exists. Updating instead.'.format(inspire_id))
            log.info('Updating {}'.format(inspire_id))
            if synchronous:
                update_submissions([inspire_id], send_tweet)
            else:
                update_submissions.delay([inspire_id], send_tweet)
Exemple #2
0
def get_missing_records():
    inspire_ids = get_all_ids_in_current_system(prepend_id_with="")
    missing_ids = []
    for inspire_id in inspire_ids:
        if not record_exists(inspire_id=inspire_id):
            missing_ids.append(inspire_id)

    print("Missing {} records.".format(len(missing_ids)))
    print(missing_ids)
    return missing_ids
Exemple #3
0
def test_create_submission(app):
    """
    Test the whole submission pipeline in loading a file, ensuring the HEPSubmission object is created,
    all the files have been added, and the record has been indexed.
    :return:
    """
    with app.app_context():
        # test submission part works

        record = {'inspire_id': '19999999',
                  'title': 'HEPData Testing 1',
                  'reviewer': {'name': 'Testy McTester', 'email': '*****@*****.**'},
                  'uploader': {'name': 'Testy McTester', 'email': '*****@*****.**'},
                  'message': 'This is ready',
                  'user_id': 1}

        hepdata_submission = process_submission_payload(**record)

        assert (hepdata_submission.version == 1)
        assert (hepdata_submission.overall_status == 'todo')

        # test upload works
        base_dir = os.path.dirname(os.path.realpath(__file__))

        directory = os.path.join(base_dir, 'test_data/test_submission')
        process_submission_directory(directory, os.path.join(directory, 'submission.yaml'),
                                     hepdata_submission.publication_recid)

        data_submissions = DataSubmission.query.filter_by(
            publication_recid=hepdata_submission.publication_recid).count()
        assert (data_submissions == 8)
        assert (len(hepdata_submission.resources) == 4)
        assert (len(hepdata_submission.participants) == 4)

        do_finalise(hepdata_submission.publication_recid, force_finalise=True)

        assert (record_exists(inspire_id=record['inspire_id']))

        # Test record is in index...
        index_records = get_records_matching_field('inspire_id', record['inspire_id'], doc_type='publication')
        print(index_records)
        assert (len(index_records['hits']['hits']) == 1)

        publication_record = get_record_contents(hepdata_submission.publication_recid)

        print(publication_record)
        assert (publication_record is not None)

        ctx = format_submission(hepdata_submission.publication_recid, publication_record, hepdata_submission.version, 1,
                                hepdata_submission)

        assert(ctx is not None)

        assert(ctx['version'] == 1)
        assert (ctx['recid'] == hepdata_submission.publication_recid)
Exemple #4
0
def get_missing_records():
    """
    Finds all records that are missing in the new system (compared to the legacy environment)
    and returns the IDs as a list
    :return: an array of missing IDd
    """
    inspire_ids = get_all_ids_in_current_system(prepend_id_with="")
    missing_ids = []
    for inspire_id in inspire_ids:
        if not record_exists(inspire_id=inspire_id):
            missing_ids.append(inspire_id)

    print("Missing {} records.".format(len(missing_ids)))
    print(missing_ids)
    return missing_ids
Exemple #5
0
def load_files(inspire_ids,
               send_tweet=False,
               synchronous=False,
               convert=False,
               base_url='http://hepdata.cedar.ac.uk/view/{0}/yaml'):
    """
    :param base_url: override default base URL
    :param convert:
    :param synchronous: if should be run immediately
    :param send_tweet: whether or not to tweet this entry.
    :param inspire_ids: array of inspire ids to load (in the format insXXX).
    :return: None
    """
    migrator = Migrator()

    for index, inspire_id in enumerate(inspire_ids):
        _cleaned_id = inspire_id.replace("ins", "")
        if not record_exists(inspire_id=_cleaned_id):
            print(
                "The record with id {0} does not exist in the database, so we're loading it."
                .format(inspire_id))
            try:
                log.info("Loading {0}".format(inspire_id))
                if synchronous:
                    migrator.load_file(inspire_id,
                                       send_tweet,
                                       convert=convert,
                                       base_url=base_url)
                else:
                    migrator.load_file.delay(inspire_id,
                                             send_tweet,
                                             convert=convert,
                                             base_url=base_url)
            except socket.error as se:
                print("socket error...")
                log.error(se.message)
            except Exception as e:
                print("Failed to load {0}. {1} ".format(inspire_id, e))
                log.error("Failed to load {0}. {1} ".format(inspire_id, e))
        else:
            print(
                "The record with inspire id {0} already exists. Updating instead."
                .format(inspire_id))
            log.info("Updating {}".format(inspire_id))
            if synchronous:
                update_submissions([inspire_id])
            else:
                update_submissions.delay([inspire_id])
Exemple #6
0
def get_record_from_inspire():
    if 'id' not in request.args:
        return jsonify({'status': 'no inspire id provided'})

    inspire_id = request.args['id']

    content, status = get_inspire_record_information(inspire_id)

    # check that id is not present already.
    exists = record_exists(inspire_id=inspire_id)
    if exists:
        status = 'exists'

    return jsonify({'source': 'inspire',
                    'id': inspire_id,
                    'query': content,
                    'status': status})
Exemple #7
0
def get_record_from_inspire():
    if 'id' not in request.args:
        return jsonify({'status': 'no inspire id provided'})

    inspire_id = request.args['id']

    # check that id is not present already.
    exists = record_exists(inspire_id=inspire_id)

    if exists:
        return jsonify({'status': 'exists', 'id': inspire_id})

    content, status = get_inspire_record_information(inspire_id)

    return jsonify({'source': 'inspire',
                    'id': inspire_id,
                    'query': content,
                    'status': status})
def test_create_submission(app, admin_idx):
    """
    Test the whole submission pipeline in loading a file, ensuring the HEPSubmission object is created,
    all the files have been added, and the record has been indexed.
    :return:
    """
    with app.app_context():

        admin_idx.recreate_index()

        # test submission part works

        record = {
            'inspire_id': '19999999',
            'title': 'HEPData Testing 1',
            'reviewer': {
                'name': 'Testy McTester',
                'email': '*****@*****.**'
            },
            'uploader': {
                'name': 'Testy McTester',
                'email': '*****@*****.**'
            },
            'message': 'This is ready',
            'user_id': 1
        }

        hepdata_submission = process_submission_payload(**record)

        assert (hepdata_submission.version == 1)
        assert (hepdata_submission.overall_status == 'todo')

        # test upload works
        base_dir = os.path.dirname(os.path.realpath(__file__))

        test_directory = os.path.join(base_dir, 'test_data/test_submission')
        time_stamp = str(int(round(time.time())))
        directory = get_data_path_for_record(
            hepdata_submission.publication_recid, time_stamp)
        shutil.copytree(test_directory, directory)
        assert (os.path.exists(directory))

        process_submission_directory(
            directory, os.path.join(directory, 'submission.yaml'),
            hepdata_submission.publication_recid)

        admin_idx_results = admin_idx.search(
            term=hepdata_submission.publication_recid, fields=['recid'])
        assert (admin_idx_results is not None)

        data_submissions = DataSubmission.query.filter_by(
            publication_recid=hepdata_submission.publication_recid).count()
        assert (data_submissions == 8)
        assert (len(hepdata_submission.resources) == 4)
        assert (len(hepdata_submission.participants) == 4)

        do_finalise(hepdata_submission.publication_recid,
                    force_finalise=True,
                    convert=False)

        assert (record_exists(inspire_id=record['inspire_id']))

        # Test record is in index...
        index_records = get_records_matching_field('inspire_id',
                                                   record['inspire_id'],
                                                   doc_type='publication')
        assert (len(index_records['hits']['hits']) == 1)

        publication_record = get_record_contents(
            hepdata_submission.publication_recid)

        assert (publication_record is not None)

        ctx = format_submission(hepdata_submission.publication_recid,
                                publication_record, hepdata_submission.version,
                                1, hepdata_submission)

        assert (ctx is not None)

        assert (ctx['version'] == 1)
        assert (ctx['recid'] == hepdata_submission.publication_recid)

        # remove the submission and test that all is remove

        unload_submission(hepdata_submission.publication_recid)

        assert (not record_exists(inspire_id=record['inspire_id']))

        data_submissions = DataSubmission.query.filter_by(
            publication_recid=hepdata_submission.publication_recid).count()

        assert (data_submissions == 0)

        sleep(2)

        admin_idx_results = admin_idx.search(
            term=hepdata_submission.publication_recid, fields=['recid'])
        assert (len(admin_idx_results) == 0)

        # Check file dir has been deleted
        assert (not os.path.exists(directory))