def record_with_two_revisions(app):
    record = {
        '$schema': 'http://localhost:5000/schemas/records/hep.json',
        'control_number': 111,
        'document_type': [
            'article',
        ],
        'titles': [
            {
                'title': 'record rev0'
            },
        ],
        'self': {
            '$ref': 'http://localhost:5000/schemas/records/hep.json',
        },
        '_collections': ['Literature']
    }

    with db.session.begin_nested():
        record_insert_or_replace(record)
    db.session.commit()

    record['titles'][0]['title'] = 'record rev1'

    with db.session.begin_nested():
        record_insert_or_replace(record)
    db.session.commit()

    yield

    _delete_record('lit', 111)
def test_save_roots(workflow_app):
    # XXX: for some reason, this must be internal.
    from inspirehep.modules.migrator.tasks import record_insert_or_replace

    head = record_insert_or_replace(fake_record('title1', 123))
    update = record_insert_or_replace(fake_record('title2', 456))

    obj = workflow_object_class.create(
        data={},
        data_type='hep'
    )
    obj.extra_data['head_uuid'] = str(head.id)
    obj.extra_data['update_uuid'] = str(update.id)
    obj.save()

    insert_wf_record_source(json={}, record_uuid=head.id, source='a')
    insert_wf_record_source(json={}, record_uuid=head.id, source='b')

    # this will not be saved because there's already an entry with source `a`
    insert_wf_record_source(json={}, record_uuid=update.id, source='a')
    insert_wf_record_source(json={}, record_uuid=update.id, source='c')

    save_roots(obj, None)

    assert read_wf_record_source(str(head.id), 'a')
    assert read_wf_record_source(str(head.id), 'b')
    assert read_wf_record_source(str(head.id), 'c')
def record_with_two_revisions(app):
    record = {
        '$schema': 'http://localhost:5000/schemas/records/hep.json',
        'control_number': 111,
        'document_type': [
            'article',
        ],
        'titles': [
            {'title': 'record rev0'},
        ],
        'self': {
            '$ref': 'http://localhost:5000/schemas/records/hep.json',
        },
        '_collections': ['Literature']
    }

    with db.session.begin_nested():
        record_insert_or_replace(record)
    db.session.commit()

    record['titles'][0]['title'] = 'record rev1'

    with db.session.begin_nested():
        record_insert_or_replace(record)
    db.session.commit()

    yield

    _delete_record('lit', 111)
Example #4
0
def _create_record(record_json):
    with db.session.begin_nested():
        record_insert_or_replace(record_json)

    db.session.commit()
    es.indices.refresh()

    return record_json
Example #5
0
def book_with_another_document_type(app):
    """Temporarily add another document type to a book record."""
    record = get_db_record('lit', 1373790)
    record['document_type'] = ['book', 'proceedings']
    record_insert_or_replace(record)

    yield

    record = get_db_record('lit', 1373790)
    record['document_type'] = ['book']
    record_insert_or_replace(record)
def test_manual_merge_existing_records(workflow_app):
    # XXX: for some reason, this must be internal.
    from inspirehep.modules.migrator.tasks import record_insert_or_replace

    json_head = fake_record('This is the HEAD', 1)
    json_update = fake_record('While this is the update', 2)

    # this two fields will create a merging conflict
    json_head['core'] = True
    json_update['core'] = False

    head = record_insert_or_replace(json_head)
    update = record_insert_or_replace(json_update)
    head_id = head.id
    update_id = update.id

    obj_id = start_merger(
        head_id=1,
        update_id=2,
        current_user_id=1,
    )

    do_resolve_manual_merge_wf(workflow_app, obj_id)

    # retrieve it again, otherwise Detached Instance Error
    obj = workflow_object_class.get(obj_id)

    assert obj.status == ObjectStatus.COMPLETED
    assert obj.extra_data['approved'] is True
    assert obj.extra_data['auto-approved'] is False

    # no root present before
    last_root = read_wf_record_source(head_id, 'arxiv')
    assert last_root is None
    root_update = read_wf_record_source(update_id, get_source(update))
    assert root_update is None

    # check that head's content has been replaced by merged
    deleted_record = RecordMetadata.query.filter_by(id=update_id).one()

    latest_record = get_db_record('lit', 1)

    assert deleted_record.json['deleted'] is True

    # check deleted record is linked in the latest one
    deleted_rec_ref = {'$ref': 'http://localhost:5000/api/literature/2'}
    assert [deleted_rec_ref] == latest_record['deleted_records']

    # check the merged record is linked in the deleted one
    new_record_metadata = {'$ref': 'http://localhost:5000/api/literature/1'}
    assert new_record_metadata == deleted_record.json['new_record']

    del latest_record['deleted_records']
    assert latest_record == obj.data  # -> resulted merged record
def jhep_with_malformed_title(app):
    """Temporarily add a malformed title to the JHEP record."""
    record = get_db_record('jou', 1213103)
    record['title_variants'].append('+++++')
    record_insert_or_replace(record)

    yield

    record = get_db_record('jou', 1213103)
    record['title_variants'] = record['title_variants'][:-1]
    record_insert_or_replace(record)
def test_manual_merge_existing_records(workflow_app):
    # XXX: for some reason, this must be internal.
    from inspirehep.modules.migrator.tasks import record_insert_or_replace

    json_head = fake_record('This is the HEAD', 1)
    json_update = fake_record('While this is the update', 2)

    # this two fields will create a merging conflict
    json_head['core'] = True
    json_update['core'] = False

    head = record_insert_or_replace(json_head)
    update = record_insert_or_replace(json_update)
    head_id = head.id
    update_id = update.id

    obj_id = start_merger(
        head_id=1,
        update_id=2,
        current_user_id=1,
    )

    do_resolve_manual_merge_wf(workflow_app, obj_id)

    # retrieve it again, otherwise Detached Instance Error
    obj = workflow_object_class.get(obj_id)

    assert obj.status == ObjectStatus.COMPLETED
    assert obj.extra_data['approved'] is True
    assert obj.extra_data['auto-approved'] is False

    # no root present before
    last_root = read_wf_record_source(head_id, 'arxiv')
    assert last_root is None
    root_update = read_wf_record_source(update_id, get_source(update))
    assert root_update is None

    # check that head's content has been replaced by merged
    deleted_record = RecordMetadata.query.filter_by(id=update_id).one()

    latest_record = get_db_record('lit', 1)

    assert deleted_record.json['deleted'] is True

    # check deleted record is linked in the latest one
    deleted_rec_ref = {'$ref': 'http://localhost:5000/api/literature/2'}
    assert [deleted_rec_ref] == latest_record['deleted_records']

    # check the merged record is linked in the deleted one
    new_record_metadata = {'$ref': 'http://localhost:5000/api/literature/1'}
    assert new_record_metadata == deleted_record.json['new_record']

    del latest_record['deleted_records']
    assert latest_record == obj.data  # -> resulted merged record
def jhep_with_malformed_title(app):
    """Temporarily add a malformed title to the JHEP record."""
    record = get_db_record('jou', 1213103)
    record['title_variants'].append('+++++')
    record_insert_or_replace(record)

    yield

    record = get_db_record('jou', 1213103)
    record['title_variants'] = record['title_variants'][:-1]
    record_insert_or_replace(record)
def cern_with_hal_id(app):
    """Temporarily add the HAL id to the CERN record."""
    record = get_db_record('ins', 902725)
    record['external_system_identifiers'] = [{'schema': 'HAL', 'value': '300037'}]
    record_insert_or_replace(record)
    es.indices.refresh('records-institutions')

    yield

    record = get_db_record('ins', 902725)
    del record['external_system_identifiers']
    record_insert_or_replace(record)
    es.indices.refresh('records-institutions')
def test_manual_merge_with_none_record(workflow_app):
    # XXX: for some reason, this must be internal.
    from inspirehep.modules.migrator.tasks import record_insert_or_replace

    json_head = fake_record('This is the HEAD', 1)

    record_insert_or_replace(json_head)
    non_existing_id = 123456789

    with pytest.raises(RecordGetterError):
        start_merger(
            head_id=1,
            update_id=non_existing_id,
            current_user_id=1,
        )
def test_manual_merge_with_none_record(workflow_app):
    # XXX: for some reason, this must be internal.
    from inspirehep.modules.migrator.tasks import record_insert_or_replace

    json_head = fake_record('This is the HEAD', 1)

    record_insert_or_replace(json_head)
    non_existing_id = 123456789

    with pytest.raises(RecordGetterError):
        start_merger(
            head_id=1,
            update_id=non_existing_id,
            current_user_id=1,
        )
Example #13
0
def merged_records(app):
    merged_snippet = (
        '<record>'
        '  <controlfield tag="001">111</controlfield>'
        '  <datafield tag="245" ind1=" " ind2=" ">'
        '    <subfield code="a">merged</subfield>'
        '  </datafield>'
        '  <datafield tag="981" ind1=" " ind2=" ">'
        '    <subfield code="a">222</subfield>'
        '  </datafield>'
        '  <datafield tag="980" ind1=" " ind2=" ">'
        '    <subfield code="a">HEP</subfield>'
        '  </datafield>'
        '</record>'
    )

    deleted_snippet = (
        '<record>'
        '  <controlfield tag="001">222</controlfield>'
        '  <datafield tag="245" ind1=" " ind2=" ">'
        '    <subfield code="a">deleted</subfield>'
        '  </datafield>'
        '  <datafield tag="970" ind1=" " ind2=" ">'
        '    <subfield code="d">111</subfield>'
        '  </datafield>'
        '  <datafield tag="980" ind1=" " ind2=" ">'
        '    <subfield code="a">HEP</subfield>'
        '    <subfield code="c">DELETED</subfield>'
        '  </datafield>'
        '</record>'
    )

    merged_record = marcxml2record(merged_snippet)
    merged_record['$schema'] = 'http://localhost:5000/schemas/records/hep.json'

    deleted_record = marcxml2record(deleted_snippet)
    deleted_record['$schema'] = 'http://localhost:5000/schemas/records/hep.json'

    with db.session.begin_nested():
        merged_uuid = record_insert_or_replace(merged_record).id
        deleted_uuid = record_insert_or_replace(deleted_record).id
    db.session.commit()

    es.indices.refresh('records-hep')

    yield

    _delete_merged_records('lit', 111, 222, merged_uuid, deleted_uuid)
Example #14
0
def merged_records(app):
    merged_snippet = (
        '<record>'
        '  <controlfield tag="001">111</controlfield>'
        '  <datafield tag="245" ind1=" " ind2=" ">'
        '    <subfield code="a">merged</subfield>'
        '  </datafield>'
        '  <datafield tag="981" ind1=" " ind2=" ">'
        '    <subfield code="a">222</subfield>'
        '  </datafield>'
        '  <datafield tag="980" ind1=" " ind2=" ">'
        '    <subfield code="a">HEP</subfield>'
        '  </datafield>'
        '</record>'
    )

    deleted_snippet = (
        '<record>'
        '  <controlfield tag="001">222</controlfield>'
        '  <datafield tag="245" ind1=" " ind2=" ">'
        '    <subfield code="a">deleted</subfield>'
        '  </datafield>'
        '  <datafield tag="970" ind1=" " ind2=" ">'
        '    <subfield code="d">111</subfield>'
        '  </datafield>'
        '  <datafield tag="980" ind1=" " ind2=" ">'
        '    <subfield code="a">HEP</subfield>'
        '    <subfield code="c">DELETED</subfield>'
        '  </datafield>'
        '</record>'
    )

    merged_record = marcxml2record(merged_snippet)
    merged_record['$schema'] = 'http://localhost:5000/schemas/records/hep.json'

    deleted_record = marcxml2record(deleted_snippet)
    deleted_record['$schema'] = 'http://localhost:5000/schemas/records/hep.json'

    with db.session.begin_nested():
        merged_uuid = record_insert_or_replace(merged_record).id
        deleted_uuid = record_insert_or_replace(deleted_record).id
    db.session.commit()

    es.indices.refresh('records-hep')

    yield

    _delete_merged_records('lit', 111, 222, merged_uuid, deleted_uuid)
Example #15
0
def cern_with_hal_id(app):
    """Temporarily add the HAL id to the CERN record."""
    record = get_db_record('ins', 902725)
    record['external_system_identifiers'] = [{
        'schema': 'HAL',
        'value': '300037'
    }]
    record_insert_or_replace(record)
    es.indices.refresh('records-institutions')

    yield

    record = get_db_record('ins', 902725)
    del record['external_system_identifiers']
    record_insert_or_replace(record)
    es.indices.refresh('records-institutions')
Example #16
0
def not_yet_merged_records(app):
    merged_record = {
        '$schema': 'http://localhost:5000/schemas/records/hep.json',
        'control_number': 111,
        'document_type': [
            'article',
        ],
        'titles': [
            {
                'title': 'merged'
            },
        ],
        'self': {
            '$ref': 'http://localhost:5000/api/literature/111',
        },
        '_collections': ['Literature'],
    }

    deleted_record = {
        '$schema': 'http://localhost:5000/schemas/records/hep.json',
        'control_number': 222,
        'document_type': [
            'article',
        ],
        'titles': [
            {
                'title': 'deleted'
            },
        ],
        'self': {
            '$ref': 'http://localhost:5000/api/literature/222',
        },
        '_collections': ['Literature'],
    }

    with db.session.begin_nested():
        merged_uuid = record_insert_or_replace(merged_record).id
        deleted_uuid = record_insert_or_replace(deleted_record).id
    db.session.commit()

    yield

    _delete_merged_records('lit', 111, 222, merged_uuid, deleted_uuid)
def insert_journals_in_db(workflow_app):
    """Temporarily add few journals in the DB"""
    from inspirehep.modules.migrator.tasks import record_insert_or_replace  # imported here because it is a Celery task

    journal_no_pro_and_ref = json.loads(pkg_resources.resource_string(
        __name__, os.path.join('fixtures', 'jou_record_refereed.json')))

    journal_pro_and_ref = json.loads(pkg_resources.resource_string(
        __name__, os.path.join('fixtures', 'jou_record_refereed_and_proceedings.json')))

    with db.session.begin_nested():
        record_insert_or_replace(journal_no_pro_and_ref)
        record_insert_or_replace(journal_pro_and_ref)
    db.session.commit()
    es.indices.refresh('records-journals')

    yield

    _delete_record('jou', 1936475)
    _delete_record('jou', 1936476)
    es.indices.refresh('records-journals')
Example #18
0
def deleted_record(app):
    snippet = ('<record>'
               '  <controlfield tag="001">111</controlfield>'
               '  <datafield tag="245" ind1=" " ind2=" ">'
               '    <subfield code="a">deleted</subfield>'
               '  </datafield>'
               '  <datafield tag="980" ind1=" " ind2=" ">'
               '    <subfield code="c">DELETED</subfield>'
               '  </datafield>'
               '</record>')

    record = hep.do(create_record(snippet))
    record['$schema'] = 'http://localhost:5000/schemas/records/hep.json'

    with db.session.begin_nested():
        record_insert_or_replace(record)
    db.session.commit()

    yield

    _delete_record('lit', 111)
Example #19
0
def not_yet_merged_records(app):
    merged_record = {
        '$schema': 'http://localhost:5000/schemas/records/hep.json',
        'control_number': 111,
        'document_type': [
            'article',
        ],
        'titles': [
            {'title': 'merged'},
        ],
        'self': {
            '$ref': 'http://localhost:5000/api/literature/111',
        },
        '_collections': ['Literature'],
    }

    deleted_record = {
        '$schema': 'http://localhost:5000/schemas/records/hep.json',
        'control_number': 222,
        'document_type': [
            'article',
        ],
        'titles': [
            {'title': 'deleted'},
        ],
        'self': {
            '$ref': 'http://localhost:5000/api/literature/222',
        },
        '_collections': ['Literature'],
    }

    with db.session.begin_nested():
        merged_uuid = record_insert_or_replace(merged_record).id
        deleted_uuid = record_insert_or_replace(deleted_record).id
    db.session.commit()

    yield

    _delete_merged_records('lit', 111, 222, merged_uuid, deleted_uuid)
Example #20
0
def test_get_head_source_return_arxiv_when_one_arxive_source_present(app, simple_record):
    # XXX: for some reason, this must be internal.
    from inspirehep.modules.migrator.tasks import record_insert_or_replace

    rec = record_insert_or_replace(simple_record)
    uuid = rec.id

    # two sources for the same record
    insert_wf_record_source(json=simple_record, record_uuid=uuid, source='ejl')

    assert get_head_source(uuid) == 'publisher'

    insert_wf_record_source(json=simple_record, record_uuid=uuid, source='arxiv')
    assert get_head_source(uuid) == 'arxiv'
def test_save_roots(workflow_app):
    # XXX: for some reason, this must be internal.
    from inspirehep.modules.migrator.tasks import record_insert_or_replace

    head = record_insert_or_replace(fake_record('title1', 123))
    update = record_insert_or_replace(fake_record('title2', 456))

    obj = workflow_object_class.create(data={}, data_type='hep')
    obj.extra_data['head_uuid'] = str(head.id)
    obj.extra_data['update_uuid'] = str(update.id)
    obj.save()

    insert_wf_record_source(json={}, record_uuid=head.id, source='a')
    insert_wf_record_source(json={}, record_uuid=head.id, source='b')

    # this will not be saved because there's already an entry with source `a`
    insert_wf_record_source(json={}, record_uuid=update.id, source='a')
    insert_wf_record_source(json={}, record_uuid=update.id, source='c')

    save_roots(obj, None)

    assert read_wf_record_source(str(head.id), 'a')
    assert read_wf_record_source(str(head.id), 'b')
    assert read_wf_record_source(str(head.id), 'c')
Example #22
0
def deleted_record(app):
    snippet = (
        '<record>'
        '  <controlfield tag="001">111</controlfield>'
        '  <datafield tag="245" ind1=" " ind2=" ">'
        '    <subfield code="a">deleted</subfield>'
        '  </datafield>'
        '  <datafield tag="980" ind1=" " ind2=" ">'
        '    <subfield code="a">HEP</subfield>'
        '    <subfield code="c">DELETED</subfield>'
        '  </datafield>'
        '</record>'
    )

    record = marcxml2record(snippet)
    record['$schema'] = 'http://localhost:5000/schemas/records/hep.json'

    with db.session.begin_nested():
        record_insert_or_replace(record)
    db.session.commit()

    yield

    _delete_record('lit', 111)
Example #23
0
def not_yet_deleted_record(app):
    record = {
        '$schema': 'http://localhost:5000/schemas/records/hep.json',
        'control_number': 111,
        'document_type': [
            'article',
        ],
        'titles': [
            {
                'title': 'deleted'
            },
        ],
        'self': {
            '$ref': 'http://localhost:5000/schemas/records/hep.json',
        }
    }

    with db.session.begin_nested():
        record_insert_or_replace(record)
    db.session.commit()

    yield

    _delete_record('lit', 111)
Example #24
0
def insert_journals_in_db(workflow_app):
    """Temporarily add few journals in the DB"""
    from inspirehep.modules.migrator.tasks import record_insert_or_replace  # imported here because it is a Celery task

    journal_full_1 = json.loads(pkg_resources.resource_string(
        __name__, os.path.join('fixtures', 'jou_record_fully_covered_1.json')))

    journal_partial_1 = json.loads(pkg_resources.resource_string(
        __name__, os.path.join('fixtures', 'jou_record_partially_covered_1.json')))

    journal_partial_2 = json.loads(pkg_resources.resource_string(
        __name__, os.path.join('fixtures', 'jou_record_partially_covered_2.json')))

    journal_no_pro_and_ref = json.loads(pkg_resources.resource_string(
                __name__, os.path.join('fixtures', 'jou_record_refereed.json')))

    journal_pro_and_ref = json.loads(pkg_resources.resource_string(
                __name__, os.path.join('fixtures', 'jou_record_refereed_and_proceedings.json')))

    with db.session.begin_nested():
        record_insert_or_replace(journal_full_1)
        record_insert_or_replace(journal_partial_1)
        record_insert_or_replace(journal_partial_2)
        record_insert_or_replace(journal_no_pro_and_ref)
        record_insert_or_replace(journal_pro_and_ref)
    db.session.commit()
    es.indices.refresh('records-journals')

    yield

    _delete_record('jou', 1936475)
    _delete_record('jou', 1936476)
    _delete_record('jou', 1936480)
    _delete_record('jou', 1936481)
    _delete_record('jou', 1936482)
    es.indices.refresh('records-journals')
Example #25
0
def test_get_head_source_return_arxiv_when_one_arxive_source_present(
        app, simple_record):
    # XXX: for some reason, this must be internal.
    from inspirehep.modules.migrator.tasks import record_insert_or_replace

    rec = record_insert_or_replace(simple_record)
    uuid = rec.id

    # two sources for the same record
    insert_wf_record_source(json=simple_record, record_uuid=uuid, source='ejl')

    assert get_head_source(uuid) == 'publisher'

    insert_wf_record_source(json=simple_record,
                            record_uuid=uuid,
                            source='arxiv')
    assert get_head_source(uuid) == 'arxiv'
Example #26
0
def records_to_be_merged(app):
    merged_record = {
        '$schema': 'http://localhost:5000/schemas/records/hep.json',
        'control_number': 111,
        'document_type': [
            'article',
        ],
        'titles': [
            {'title': 'merged'},
        ],
        'self': {
            '$ref': 'http://localhost:5000/api/literature/111',
        },
        '_collections': ['Literature'],
    }

    deleted_record = {
        '$schema': 'http://localhost:5000/schemas/records/hep.json',
        'control_number': 222,
        'document_type': [
            'article',
        ],
        'titles': [
            {'title': 'deleted'},
        ],
        'self': {
            '$ref': 'http://localhost:5000/api/literature/222',
        },
        '_collections': ['Literature'],
    }

    pointing_record = {
        '$schema': 'http://localhost:5000/schemas/records/hep.json',
        'accelerator_experiments': [
            {
                'record': {
                    '$ref': 'http://localhost:5000/api/literature/222',
                },
            },
        ],
        'control_number': 333,
        'document_type': [
            'article',
        ],
        'titles': [
            {'title': 'pointing'},
        ],
        'self': {
            '$ref': 'http://localhost:5000/api/literature/333',
        },
        '_collections': ['Literature'],
    }

    with db.session.begin_nested():
        merged_uuid = record_insert_or_replace(merged_record).id
        deleted_uuid = record_insert_or_replace(deleted_record).id
        record_insert_or_replace(pointing_record)
    db.session.commit()

    es.indices.refresh('records-hep')

    yield

    _delete_merged_records('lit', 111, 222, merged_uuid, deleted_uuid)
    _delete_record('lit', 333)
Example #27
0
def records_to_be_merged(app):
    merged_record = {
        '$schema': 'http://localhost:5000/schemas/records/hep.json',
        'control_number': 111,
        'document_type': [
            'article',
        ],
        'titles': [
            {
                'title': 'merged'
            },
        ],
        'self': {
            '$ref': 'http://localhost:5000/api/literature/111',
        },
    }

    deleted_record = {
        '$schema': 'http://localhost:5000/schemas/records/hep.json',
        'control_number': 222,
        'document_type': [
            'article',
        ],
        'titles': [
            {
                'title': 'deleted'
            },
        ],
        'self': {
            '$ref': 'http://localhost:5000/api/literature/222',
        },
    }

    pointing_record = {
        '$schema':
        'http://localhost:5000/schemas/records/hep.json',
        'accelerator_experiments': [
            {
                'record': {
                    '$ref': 'http://localhost:5000/api/literature/222',
                },
            },
        ],
        'control_number':
        333,
        'document_type': [
            'article',
        ],
        'titles': [
            {
                'title': 'pointing'
            },
        ],
        'self': {
            '$ref': 'http://localhost:5000/api/literature/333',
        },
    }

    with db.session.begin_nested():
        merged_uuid = record_insert_or_replace(merged_record).id
        deleted_uuid = record_insert_or_replace(deleted_record).id
        record_insert_or_replace(pointing_record)
    db.session.commit()

    es.indices.refresh('records-hep')

    yield

    _delete_merged_records('lit', 111, 222, merged_uuid, deleted_uuid)
    _delete_record('lit', 333)