def test_save_roots(workflow_app):

    head = InspireRecord.create_or_update(fake_record('title1', 123), skip_files=False)
    head.commit()
    update = InspireRecord.create_or_update(fake_record('title2', 456), skip_files=False)
    update.commit()

    obj = workflow_object_class.create(
        data={},
        data_type='hep'
    )
    obj.extra_data['head_uuid'] = str(head.id)
    obj.extra_data['update_uuid'] = str(update.id)
    obj.save()

    # Union: keep the most recently created/updated root from each source.
    insert_wf_record_source(json={'version': 'original'}, record_uuid=head.id, source='arxiv')

    insert_wf_record_source(json={'version': 'updated'}, record_uuid=update.id, source='arxiv')

    insert_wf_record_source(json={'version': 'updated'}, record_uuid=update.id, source='publisher')

    save_roots(obj, None)

    arxiv_rec = read_wf_record_source(head.id, 'arxiv')
    assert arxiv_rec.json == {'version': 'updated'}

    pub_rec = read_wf_record_source(head.id, 'publisher')
    assert pub_rec.json == {'version': 'updated'}

    assert not read_wf_record_source(update.id, 'arxiv')
    assert not read_wf_record_source(update.id, 'publisher')
Beispiel #2
0
def record_with_two_revisions(app):
    record = {
        '$schema': 'http://localhost:5000/schemas/records/hep.json',
        'control_number': 111,
        'document_type': [
            'article',
        ],
        'titles': [
            {'title': 'record rev0'},
        ],
        'self': {
            '$ref': 'http://localhost:5000/schemas/records/hep.json',
        },
        '_collections': ['Literature']
    }

    with db.session.begin_nested():
        record = InspireRecord.create_or_update(record)
        record.commit()
    db.session.commit()

    record['titles'][0]['title'] = 'record rev1'

    with db.session.begin_nested():
        record = InspireRecord.create_or_update(record)
        record.commit()
    db.session.commit()

    yield

    _delete_record('lit', 111)
Beispiel #3
0
def test_save_roots(workflow_app):

    head = InspireRecord.create_or_update(fake_record('title1', 123),
                                          skip_files=False)
    head.commit()
    update = InspireRecord.create_or_update(fake_record('title2', 456),
                                            skip_files=False)
    update.commit()

    obj = workflow_object_class.create(data={}, data_type='hep')
    obj.extra_data['head_uuid'] = str(head.id)
    obj.extra_data['update_uuid'] = str(update.id)
    obj.save()

    insert_wf_record_source(json={}, record_uuid=head.id, source='a')
    insert_wf_record_source(json={}, record_uuid=head.id, source='b')

    # this will not be saved because there's already an entry with source `a`
    insert_wf_record_source(json={}, record_uuid=update.id, source='a')
    insert_wf_record_source(json={}, record_uuid=update.id, source='c')

    save_roots(obj, None)

    assert read_wf_record_source(str(head.id), 'a')
    assert read_wf_record_source(str(head.id), 'b')
    assert read_wf_record_source(str(head.id), 'c')
def record_with_two_revisions(app):
    record = {
        '$schema': 'http://localhost:5000/schemas/records/hep.json',
        'control_number': 111,
        'document_type': [
            'article',
        ],
        'titles': [
            {'title': 'record rev0'},
        ],
        'self': {
            '$ref': 'http://localhost:5000/schemas/records/hep.json',
        },
        '_collections': ['Literature']
    }

    with db.session.begin_nested():
        record = InspireRecord.create_or_update(record)
        record.commit()
    db.session.commit()

    record['titles'][0]['title'] = 'record rev1'

    with db.session.begin_nested():
        record = InspireRecord.create_or_update(record)
        record.commit()
    db.session.commit()

    yield

    _delete_record('lit', 111)
Beispiel #5
0
def test_manual_merge_existing_records(workflow_app):

    json_head = fake_record('This is the HEAD', 1)
    json_update = fake_record('While this is the update', 2)

    # this two fields will create a merging conflict
    json_head['core'] = True
    json_update['core'] = False

    head = InspireRecord.create_or_update(json_head, skip_files=False)
    head.commit()
    update = InspireRecord.create_or_update(json_update, skip_files=False)
    update.commit()
    head_id = head.id
    update_id = update.id

    obj_id = start_merger(
        head_id=1,
        update_id=2,
        current_user_id=1,
    )

    do_resolve_manual_merge_wf(workflow_app, obj_id)

    # retrieve it again, otherwise Detached Instance Error
    obj = workflow_object_class.get(obj_id)

    assert obj.status == ObjectStatus.COMPLETED
    assert obj.extra_data['approved'] is True
    assert obj.extra_data['auto-approved'] is False

    # no root present before
    last_root = read_wf_record_source(head_id, 'arxiv')
    assert last_root is None

    update_source = LiteratureReader(update).source
    root_update = read_wf_record_source(update_id, update_source)
    assert root_update is None

    # check that head's content has been replaced by merged
    deleted_record = RecordMetadata.query.filter_by(id=update_id).one()

    latest_record = get_db_record('lit', 1)

    assert deleted_record.json['deleted'] is True

    # check deleted record is linked in the latest one
    deleted_rec_ref = {'$ref': 'http://localhost:5000/api/literature/2'}
    assert [deleted_rec_ref] == latest_record['deleted_records']

    # check the merged record is linked in the deleted one
    new_record_metadata = {'$ref': 'http://localhost:5000/api/literature/1'}
    assert new_record_metadata == deleted_record.json['new_record']

    del latest_record['deleted_records']
    assert latest_record == obj.data  # -> resulted merged record
def test_manual_merge_existing_records(workflow_app):

    json_head = fake_record('This is the HEAD', 1)
    json_update = fake_record('While this is the update', 2)

    # this two fields will create a merging conflict
    json_head['core'] = True
    json_update['core'] = False

    head = InspireRecord.create_or_update(json_head, skip_files=False)
    head.commit()
    update = InspireRecord.create_or_update(json_update, skip_files=False)
    update.commit()
    head_id = head.id
    update_id = update.id

    obj_id = start_merger(
        head_id=1,
        update_id=2,
        current_user_id=1,
    )

    do_resolve_manual_merge_wf(workflow_app, obj_id)

    # retrieve it again, otherwise Detached Instance Error
    obj = workflow_object_class.get(obj_id)

    assert obj.status == ObjectStatus.COMPLETED
    assert obj.extra_data['approved'] is True
    assert obj.extra_data['auto-approved'] is False

    # no root present before
    last_root = read_wf_record_source(head_id, 'arxiv')
    assert last_root is None

    update_source = LiteratureReader(update).source
    root_update = read_wf_record_source(update_id, update_source)
    assert root_update is None

    # check that head's content has been replaced by merged
    deleted_record = RecordMetadata.query.filter_by(id=update_id).one()

    latest_record = get_db_record('lit', 1)

    assert deleted_record.json['deleted'] is True

    # check deleted record is linked in the latest one
    deleted_rec_ref = {'$ref': 'http://localhost:5000/api/literature/2'}
    assert [deleted_rec_ref] == latest_record['deleted_records']

    # check the merged record is linked in the deleted one
    new_record_metadata = {'$ref': 'http://localhost:5000/api/literature/1'}
    assert new_record_metadata == deleted_record.json['new_record']

    del latest_record['deleted_records']
    assert latest_record == obj.data  # -> resulted merged record
def book_with_another_document_type(app):
    """Temporarily add another document type to a book record."""
    record = get_db_record('lit', 1373790)
    record['document_type'] = ['book', 'proceedings']
    record = InspireRecord.create_or_update(record)
    record.commit()

    yield

    record = get_db_record('lit', 1373790)
    record['document_type'] = ['book']
    record = InspireRecord.create_or_update(record)
    record.commit()
def jhep_with_malformed_title(app):
    """Temporarily add a malformed title to the JHEP record."""
    record = get_db_record('jou', 1213103)
    record['title_variants'].append('+++++')
    record = InspireRecord.create_or_update(record)
    record.commit()

    yield

    record = get_db_record('jou', 1213103)
    record['title_variants'] = record['title_variants'][:-1]
    record = InspireRecord.create_or_update(record)
    record.commit()
def book_with_another_document_type(app):
    """Temporarily add another document type to a book record."""
    record = get_db_record('lit', 1373790)
    record['document_type'] = ['book', 'proceedings']
    record = InspireRecord.create_or_update(record)
    record.commit()

    yield

    record = get_db_record('lit', 1373790)
    record['document_type'] = ['book']
    record = InspireRecord.create_or_update(record)
    record.commit()
def jhep_with_malformed_title(app):
    """Temporarily add a malformed title to the JHEP record."""
    record = get_db_record('jou', 1213103)
    record['title_variants'].append('+++++')
    record = InspireRecord.create_or_update(record)
    record.commit()

    yield

    record = get_db_record('jou', 1213103)
    record['title_variants'] = record['title_variants'][:-1]
    record = InspireRecord.create_or_update(record)
    record.commit()
def cern_with_hal_id(app):
    """Temporarily add the HAL id to the CERN record."""
    record = get_db_record('ins', 902725)
    record['external_system_identifiers'] = [{'schema': 'HAL', 'value': '300037'}]
    record = InspireRecord.create_or_update(record)
    record.commit()
    es.indices.refresh('records-institutions')

    yield

    record = get_db_record('ins', 902725)
    del record['external_system_identifiers']
    record = InspireRecord.create_or_update(record)
    record.commit()
    es.indices.refresh('records-institutions')
Beispiel #12
0
def author_in_isolated_app(isolated_app):
    record = {
        '$schema':
        'http://localhost:5000/schemas/records/authors.json',
        '_collections': ['Authors'],
        'control_number':
        123456789,  # FIXME remove when there is an easy way to insert new records
        'ids': [
            {
                'schema': 'INSPIRE BAI',
                'value': 'J.Smith.1',
            },
            {
                'schema': 'ORCID',
                'value': '0000-0002-1825-0097',
            },
        ],
        'name': {
            'value': 'Smith, John'
        },
    }

    assert validate(record, 'authors') is None

    record = InspireRecord.create_or_update(record)
    record.commit()
    yield record['control_number']
def test_get_literature_recids_for_orcid_still_works_if_author_has_no_orcid_id(isolated_app):
    record = get_db_record('aut', 1061000)
    record['ids'] = [{'schema': 'INSPIRE BAI', 'value': 'Maurizio.Martinelli.1'}]
    record = InspireRecord.create_or_update(record)
    record.commit()

    with pytest.raises(NoResultFound):
        get_literature_recids_for_orcid('0000-0003-4792-9178')
def test_get_literature_recids_for_orcid_still_works_if_author_has_no_ids(isolated_app):
    record = get_db_record('aut', 1061000)
    del record['ids']
    record = InspireRecord.create_or_update(record)
    record.commit()

    with pytest.raises(NoResultFound):
        get_literature_recids_for_orcid('0000-0003-4792-9178')
def test_get_literature_recids_for_orcid_raises_if_two_authors_are_found(isolated_app):
    record = get_db_record('aut', 1061000)
    record['control_number'] = 1061001
    record = InspireRecord.create_or_update(record)
    record.commit()

    with pytest.raises(MultipleResultsFound):
        get_literature_recids_for_orcid('0000-0003-4792-9178')
Beispiel #16
0
def _create_record(record_json):
    with db.session.begin_nested():
        record = InspireRecord.create_or_update(record_json)
        record.commit()

    db.session.commit()
    es.indices.refresh()

    return record_json
Beispiel #17
0
def _create_record(record_json):
    with db.session.begin_nested():
        record = InspireRecord.create_or_update(record_json)
        record.commit()

    db.session.commit()
    es.indices.refresh()

    return record_json
Beispiel #18
0
def test_get_literature_recids_for_orcid_raises_if_two_authors_are_found(
        isolated_app):
    record = get_db_record('aut', 1061000)
    record['control_number'] = 1061001
    record = InspireRecord.create_or_update(record)
    record.commit()

    with pytest.raises(MultipleResultsFound):
        get_literature_recids_for_orcid('0000-0003-4792-9178')
Beispiel #19
0
def test_get_literature_recids_for_orcid_still_works_if_author_has_no_ids(
        isolated_app):
    record = get_db_record('aut', 1061000)
    del record['ids']
    record = InspireRecord.create_or_update(record)
    record.commit()

    with pytest.raises(NoResultFound):
        get_literature_recids_for_orcid('0000-0003-4792-9178')
def cern_with_hal_id(app):
    """Temporarily add the HAL id to the CERN record."""
    record = get_db_record('ins', 902725)
    record['external_system_identifiers'] = [{
        'schema': 'HAL',
        'value': '300037'
    }]
    record = InspireRecord.create_or_update(record)
    record.commit()
    es.indices.refresh('records-institutions')

    yield

    record = get_db_record('ins', 902725)
    del record['external_system_identifiers']
    record = InspireRecord.create_or_update(record)
    record.commit()
    es.indices.refresh('records-institutions')
Beispiel #21
0
def test_get_literature_recids_for_orcid_still_works_if_author_has_no_orcid_id(
        isolated_app):
    record = get_db_record('aut', 1061000)
    record['ids'] = [{
        'schema': 'INSPIRE BAI',
        'value': 'Maurizio.Martinelli.1'
    }]
    record = InspireRecord.create_or_update(record)
    record.commit()

    with pytest.raises(NoResultFound):
        get_literature_recids_for_orcid('0000-0003-4792-9178')
def test_manual_merge_existing_records(mock_put_record_to_hep,
                                       mock_store_records, workflow_app):

    json_head = fake_record('This is the HEAD', 1)
    json_update = fake_record('While this is the update', 2)

    # this two fields will create a merging conflict
    json_head['core'] = True
    json_update['core'] = False

    head = InspireRecord.create_or_update(json_head, skip_files=False)
    head.commit()
    update = InspireRecord.create_or_update(json_update, skip_files=False)
    update.commit()
    head_id = head.id
    update_id = update.id

    obj_id = start_merger(
        head_id=1,
        update_id=2,
        current_user_id=1,
    )

    do_resolve_manual_merge_wf(workflow_app, obj_id)
    mock_put_record_to_hep.assert_called()

    # retrieve it again, otherwise Detached Instance Error
    obj = workflow_object_class.get(obj_id)

    assert obj.status == ObjectStatus.COMPLETED
    assert obj.extra_data['approved'] is True
    assert obj.extra_data['auto-approved'] is False

    # no root present before
    last_root = read_wf_record_source(head_id, 'arxiv')
    assert last_root is None

    update_source = LiteratureReader(update).source
    root_update = read_wf_record_source(update_id, update_source)
    assert root_update is None
def test_manual_merge_with_none_record(workflow_app):

    json_head = fake_record('This is the HEAD', 1)

    head = InspireRecord.create_or_update(json_head, skip_files=False)
    head.commit()
    non_existing_id = 123456789

    with pytest.raises(RecordGetterError):
        start_merger(
            head_id=1,
            update_id=non_existing_id,
            current_user_id=1,
        )
Beispiel #24
0
def test_manual_merge_with_none_record(workflow_app):

    json_head = fake_record('This is the HEAD', 1)

    head = InspireRecord.create_or_update(json_head, skip_files=False)
    head.commit()
    non_existing_id = 123456789

    with pytest.raises(RecordGetterError):
        start_merger(
            head_id=1,
            update_id=non_existing_id,
            current_user_id=1,
        )
Beispiel #25
0
def test_save_roots(workflow_app):

    head = InspireRecord.create_or_update(fake_record('title1', 123),
                                          skip_files=False)
    head.commit()
    update = InspireRecord.create_or_update(fake_record('title2', 456),
                                            skip_files=False)
    update.commit()

    obj = workflow_object_class.create(data={}, data_type='hep')
    obj.extra_data['head_uuid'] = str(head.id)
    obj.extra_data['update_uuid'] = str(update.id)
    obj.save()

    # Union: keep the most recently created/updated root from each source.
    insert_wf_record_source(json={'version': 'original'},
                            record_uuid=head.id,
                            source='arxiv')

    insert_wf_record_source(json={'version': 'updated'},
                            record_uuid=update.id,
                            source='arxiv')

    insert_wf_record_source(json={'version': 'updated'},
                            record_uuid=update.id,
                            source='publisher')

    save_roots(obj, None)

    arxiv_rec = read_wf_record_source(head.id, 'arxiv')
    assert arxiv_rec.json == {'version': 'updated'}

    pub_rec = read_wf_record_source(head.id, 'publisher')
    assert pub_rec.json == {'version': 'updated'}

    assert not read_wf_record_source(update.id, 'arxiv')
    assert not read_wf_record_source(update.id, 'publisher')
def insert_journals_in_db(workflow_app):
    """Temporarily add few journals in the DB"""

    journal_no_pro_and_ref = json.loads(pkg_resources.resource_string(
        __name__, os.path.join('fixtures', 'jou_record_refereed.json')))

    journal_pro_and_ref = json.loads(pkg_resources.resource_string(
        __name__, os.path.join('fixtures', 'jou_record_refereed_and_proceedings.json')))

    with db.session.begin_nested():
        journal_no_pro_and_ref = InspireRecord.create_or_update(
            journal_no_pro_and_ref, skip_files=False)
        journal_no_pro_and_ref.commit()
        journal_pro_and_ref = InspireRecord.create_or_update(
            journal_pro_and_ref, skip_files=False)
        journal_pro_and_ref.commit()
    db.session.commit()
    es.indices.refresh('records-journals')

    yield

    _delete_record('jou', 1936475)
    _delete_record('jou', 1936476)
    es.indices.refresh('records-journals')
def test_get_head_source_return_arxiv_when_one_arxive_source_present(
        app, simple_record):
    # XXX: for some reason, this must be internal.
    from inspirehep.modules.records.api import InspireRecord

    rec = InspireRecord.create_or_update(simple_record)
    rec.commit()
    uuid = rec.id

    # two sources for the same record
    insert_wf_record_source(json=simple_record, record_uuid=uuid, source='ejl')

    assert get_head_source(uuid) == 'publisher'

    insert_wf_record_source(json=simple_record,
                            record_uuid=uuid,
                            source='arxiv')
    assert get_head_source(uuid) == 'arxiv'
Beispiel #28
0
def migrate_record_from_mirror(prod_record, skip_files=False):
    """Migrate a mirrored legacy record into an Inspire record.

    Args:
        prod_record(LegacyRecordsMirror): the mirrored record to migrate.
        skip_files(bool): flag indicating whether the files in the record
            metadata should be copied over from legacy and attach to the
            record.

    Returns:
        dict: the migrated record metadata, which is also inserted into the database.
    """
    try:
        json_record = marcxml2record(prod_record.marcxml)
    except Exception as exc:
        LOGGER.exception('Migrator DoJSON Error')
        prod_record.error = exc
        db.session.merge(prod_record)
        return None

    if '$schema' in json_record:
        ensure_valid_schema(json_record)

    try:
        with db.session.begin_nested():
            record = InspireRecord.create_or_update(json_record,
                                                    skip_files=skip_files)
            record.commit()
    except ValidationError as exc:
        pattern = u'Migrator Validator Error: {}, Value: %r, Record: %r'
        LOGGER.error(pattern.format('.'.join(exc.schema_path)), exc.instance,
                     prod_record.recid)
        prod_record.error = exc
        db.session.merge(prod_record)
    except Exception as exc:
        LOGGER.exception('Migrator Record Insert Error')
        prod_record.error = exc
        db.session.merge(prod_record)
    else:
        prod_record.valid = True
        db.session.merge(prod_record)
        return record
Beispiel #29
0
def migrate_record_from_mirror(prod_record, skip_files=False):
    """Migrate a mirrored legacy record into an Inspire record.

    Args:
        prod_record(LegacyRecordsMirror): the mirrored record to migrate.
        skip_files(bool): flag indicating whether the files in the record
            metadata should be copied over from legacy and attach to the
            record.

    Returns:
        dict: the migrated record metadata, which is also inserted into the database.
    """
    try:
        json_record = marcxml2record(prod_record.marcxml)
    except Exception as exc:
        LOGGER.exception('Migrator DoJSON Error')
        prod_record.error = exc
        db.session.merge(prod_record)
        return None

    if '$schema' in json_record:
        ensure_valid_schema(json_record)

    try:
        with db.session.begin_nested():
            record = InspireRecord.create_or_update(json_record, skip_files=skip_files)
            record.commit()
    except ValidationError as exc:
        pattern = u'Migrator Validator Error: {}, Value: %r, Record: %r'
        LOGGER.error(pattern.format('.'.join(exc.schema_path)), exc.instance, prod_record.recid)
        prod_record.error = exc
        db.session.merge(prod_record)
    except Exception as exc:
        LOGGER.exception('Migrator Record Insert Error')
        prod_record.error = exc
        db.session.merge(prod_record)
    else:
        prod_record.valid = True
        db.session.merge(prod_record)
        return record
def author_in_isolated_app(isolated_app):
    record = {
        '$schema': 'http://localhost:5000/schemas/records/authors.json',
        '_collections': ['Authors'],
        'control_number': 123456789,  # FIXME remove when there is an easy way to insert new records
        'ids': [
            {
                'schema': 'INSPIRE BAI',
                'value': 'J.Smith.1',
            },
            {
                'schema': 'ORCID',
                'value': '0000-0002-1825-0097',
            },
        ],
        'name': {'value': 'Smith, John'},
    }

    assert validate(record, 'authors') is None

    record = InspireRecord.create_or_update(record)
    record.commit()
    yield record['control_number']
Beispiel #31
0
def _create_record(json):
    """Insert or replace a record."""
    record = InspireRecord.create_or_update(json, skip_files=False)
    record.commit()
    return record
Beispiel #32
0
def _create_record(json):
    """Insert or replace a record."""
    record = InspireRecord.create_or_update(json, skip_files=False)
    record.commit()
    return record