Beispiel #1
0
def check_missing_records_in_es(data_output):
    """Checks if all not deleted records from pidstore are also in ElasticSearch"""
    all_records = int(
        PersistentIdentifier.query.filter(
            PersistentIdentifier.pid_type == 'lit').count())
    _prepare_logdir(data_output)
    click.echo("All missing records pids will be saved in %s file" %
               data_output)
    missing = 0
    _query = _gen_query(
        PersistentIdentifier.query.filter(
            PersistentIdentifier.pid_type == 'lit'))
    with click.progressbar(_query,
                           length=all_records,
                           label="Processing pids (%s pids)..." %
                           all_records) as pidstore:
        with open(data_output, 'w') as data_file:
            for pid in pidstore:
                db_rec = get_db_record('lit', pid.pid_value)
                if db_rec.get('deleted'):
                    continue
                try:
                    get_es_record('lit', pid.pid_value)
                except RecordGetterError:
                    missing += 1
                    data_file.write("%s\n" % pid.pid_value)
                    data_file.flush()
    click.echo("%s records are missing from es" % missing)
Beispiel #2
0
def test_migrate_from_mirror_doesnt_index_deleted_records(isolated_app):
    record_fixture_path = pkg_resources.resource_filename(
        __name__, os.path.join('fixtures', 'dummy.xml'))
    record_fixture_path_deleted = pkg_resources.resource_filename(
        __name__, os.path.join('fixtures', 'deleted_record.xml'))
    migrate_from_file(record_fixture_path, wait_for_results=True)
    migrate_from_file(record_fixture_path_deleted, wait_for_results=True)
    get_es_record('lit', 12345)
    with pytest.raises(RecordGetterError):
        get_es_record('lit', 1234)
def test_migrate_from_mirror_doesnt_index_deleted_records(isolated_app):
    record_fixture_path = pkg_resources.resource_filename(
        __name__,
        os.path.join('fixtures', 'dummy.xml')
    )
    record_fixture_path_deleted = pkg_resources.resource_filename(
        __name__,
        os.path.join('fixtures', 'deleted_record.xml')
    )
    migrate_from_file(record_fixture_path, wait_for_results=True)
    migrate_from_file(record_fixture_path_deleted, wait_for_results=True)
    get_es_record('lit', 12345)
    with pytest.raises(RecordGetterError):
        get_es_record('lit', 1234)
def test_record_enhanced_in_es_and_not_enhanced_in_db(app):
    record_json = {
        '$schema':
        'http://localhost:5000/schemas/records/hep.json',
        'document_type': [
            'article',
        ],
        'control_number':
        111,
        'titles': [
            {
                'title': 'Jessica Jones',
            },
        ],
        '_collections': ['Literature'],
        'references': [{
            'record': {
                '$ref': 'http://localhost:5000/api/literature/1498589'
            }
        }]
    }
    record = InspireRecord.create(record_json)
    record.commit()
    db.session.commit()
    es.indices.refresh('records-hep')
    rec1 = get_db_record('lit', 111)
    rec2 = get_es_record('lit', 111)
    assert 'facet_author_name' not in rec1
    assert 'facet_author_name' in rec2
    _delete_record('lit', 111)
Beispiel #5
0
 def _get_pubnote(self):
     """Return publication note"""
     journal, volume, pages = ['', '', '']
     if 'publication_info' in self.record:
         pub_info = self.record['publication_info']
         for field in pub_info:
             if 'journal_title' in field:
                 journal = field['journal_title']
             if 'journal_volume' in field:
                 volume = field['journal_volume']
             if 'page_artid' in field:
                 pages = field['page_artid']
                 if pages:
                     if isinstance(pages, list):
                         for page in pages:
                             dashpos = page.find('-')
                             break
                     else:
                         dashpos = pages.find('-')
                     if dashpos > -1:
                         pages = pages.split('-')[0]
             try:
                 if journal and (volume != '' or pages != ''):
                     recid = self.record['control_number', '']
                     record = get_es_record('journals', recid)
                     coden = ','.join(
                         [record['coden'][0], volume, pages])
                     return coden
             except:
                 return ''
     else:
         return ''
Beispiel #6
0
def test_batch_reindex_one_deleted_record(app, create_records,
                                          celery_app_with_context,
                                          celery_session_worker):
    records = create_records(n=1, additional_props={'deleted': True})
    uuid = str(records[0].id)

    task = batch_reindex.apply_async(kwargs={'uuids': [uuid]})

    result = task.get(timeout=10)
    expected = {'failures': [], 'success': 0}
    assert result == expected

    control_number = records[0].json['control_number']

    with pytest.raises(RecordGetterError):
        get_es_record('lit', control_number)
Beispiel #7
0
def test_batch_reindex_two_records(app, create_records,
                                   celery_app_with_context,
                                   celery_session_worker):
    records = create_records(n=2)
    uuids = [str(r.id) for r in records]

    task = batch_reindex.apply_async(kwargs={'uuids': uuids})

    result = task.get(timeout=10)
    expected = {'failures': [], 'success': 2}
    assert result == expected

    control_number = records[0].json['control_number']
    assert get_es_record('lit', control_number)

    control_number = records[1].json['control_number']
    assert get_es_record('lit', control_number)
Beispiel #8
0
def test_batch_reindex_one_records_and_skips_one_deleted(
        app, create_records, celery_app_with_context, celery_session_worker):
    records = create_records(n=1)
    uuids = [str(r.id) for r in records]

    failing_record = create_records(n=1, additional_props={'deleted': True})[0]
    uuids.append(failing_record.id)

    task = batch_reindex.apply_async(kwargs={'uuids': uuids})

    result = task.get(timeout=10)
    expected = {'failures': [], 'success': 1}
    assert result == expected

    control_number = records[0].json['control_number']
    assert get_es_record('lit', control_number)

    control_number = failing_record.json['control_number']
    with pytest.raises(RecordGetterError):
        get_es_record('lit', control_number)
Beispiel #9
0
def test_batch_reindex_one_record(app, create_records, celery_app_with_context,
                                  celery_session_worker):
    records = create_records(n=1)
    uuid = str(records[0].id)

    task = batch_reindex.apply_async(kwargs={'uuids': [uuid]})

    result = task.get(timeout=10)
    expected = {'failures': [], 'success': 1}
    assert result == expected

    control_number = records[0].json['control_number']
    assert get_es_record('lit', control_number)
Beispiel #10
0
def check_missing_records_in_es(data_output):
    """Checks if all not deleted records from pidstore are also in ElasticSearch"""
    all_records = int(PersistentIdentifier.query.filter(
        PersistentIdentifier.pid_type == 'lit').count())
    _prepare_logdir(data_output)
    click.echo("All missing records pids will be saved in %s file" % data_output)
    missing = 0
    _query = _gen_query(PersistentIdentifier.query.filter(
        PersistentIdentifier.pid_type == 'lit'))
    with click.progressbar(_query,
                           length=all_records,
                           label="Processing pids (%s pids)..." % all_records) as pidstore:
        with open(data_output, 'w') as data_file:
            for pid in pidstore:
                db_rec = get_db_record('lit', pid.pid_value)
                if db_rec.get('deleted'):
                    continue
                try:
                    get_es_record('lit', pid.pid_value)
                except RecordGetterError:
                    missing += 1
                    data_file.write("%s\n" % pid.pid_value)
                    data_file.flush()
    click.echo("%s records are missing from es" % missing)
Beispiel #11
0
 def _get_citation_number(self):
     """Returns how many times record was cited. If 0, returns nothing"""
     today = time.strftime("%d %b %Y")
     record = get_es_record('lit', self.record['control_number'])
     citations = ''
     try:
         times_cited = record['citation_count']
         if times_cited != 0:
             if times_cited > 1:
                 citations = '%d citations counted in INSPIRE as of %s' \
                             % (times_cited, today)
             else:
                 citations = '%d citation counted in INSPIRE as of %s'\
                             % (times_cited, today)
     except KeyError:
         pass
     return citations
Beispiel #12
0
 def _get_citation_number(self):
     """Returns how many times record was cited. If 0, returns nothing"""
     today = time.strftime("%d %b %Y")
     record = get_es_record('lit', self.record['control_number'])
     citations = ''
     try:
         times_cited = record['citation_count']
         if times_cited != 0:
             if times_cited > 1:
                 citations = '%d citations counted in INSPIRE as of %s' \
                             % (times_cited, today)
             else:
                 citations = '%d citation counted in INSPIRE as of %s'\
                             % (times_cited, today)
     except KeyError:
         pass
     return citations
Beispiel #13
0
 def _get_pubnote(self):
     """Return publication note"""
     journal, volume, pages = ["", "", ""]
     if "publication_info" in self.record:
         pub_info = self.record["publication_info"]
         for field in pub_info:
             if "journal_title" in field:
                 journal = field["journal_title"]
             if "journal_volume" in field:
                 volume = field["journal_volume"]
             if "page_start" in field or "artid" in field:
                 pages = field.get("page_start", "") or field.get("artid", "")
             try:
                 if journal and (volume != "" or pages != ""):
                     recid = self.record["control_number", ""]
                     record = get_es_record("journals", recid)
                     coden = ",".join([record["coden"][0], volume, pages])
                     return coden
             except:
                 return ""
     else:
         return ""
Beispiel #14
0
 def _get_pubnote(self):
     """Return publication note"""
     journal, volume, pages = ['', '', '']
     if 'publication_info' in self.record:
         pub_info = self.record['publication_info']
         for field in pub_info:
             if 'journal_title' in field:
                 journal = field['journal_title']
             if 'journal_volume' in field:
                 volume = field['journal_volume']
             if 'page_start' in field or 'artid' in field:
                 pages = field.get('page_start') or field['artid']
             try:
                 if journal and (volume != '' or pages != ''):
                     recid = self.record['control_number']
                     record = get_es_record('jou', recid)
                     coden = ','.join([record['coden'][0], volume, pages])
                     return coden
             except:
                 return ''
     else:
         return ''
Beispiel #15
0
 def _get_pubnote(self):
     """Return publication note"""
     journal, volume, pages = ['', '', '']
     if 'publication_info' in self.record:
         pub_info = self.record['publication_info']
         for field in pub_info:
             if 'journal_title' in field:
                 journal = field['journal_title']
             if 'journal_volume' in field:
                 volume = field['journal_volume']
             if 'page_start' in field or 'artid' in field:
                 pages = field.get('page_start', '') or field.get('artid', '')
             try:
                 if journal and (volume != '' or pages != ''):
                     recid = self.record['control_number', '']
                     record = get_es_record('journals', recid)
                     coden = ','.join(
                         [record['coden'][0], volume, pages])
                     return coden
             except:
                 return ''
     else:
         return ''
def test_record_enhanced_in_es_and_not_enhanced_in_db(app):
    record_json = {
        '$schema': 'http://localhost:5000/schemas/records/hep.json',
        'document_type': [
            'article',
        ],
        'control_number': 111,
        'titles': [
            {
                'title': 'Jessica Jones',
            },
        ],
        '_collections': ['Literature'],
        'references': [{'record': {'$ref': 'http://localhost:5000/api/literature/1498589'}}]
    }
    record = InspireRecord.create(record_json)
    record.commit()
    db.session.commit()
    es.indices.refresh('records-hep')
    rec1 = get_db_record('lit', 111)
    rec2 = get_es_record('lit', 111)
    assert 'facet_author_name' not in rec1
    assert 'facet_author_name' in rec2
    _delete_record('lit', 111)
Beispiel #17
0
def test_index_after_commit_indexes_also_cites_record_when_citer_is_deleted(
    mocked_indexing_task,
    mocked_permission_check,
    app,
):
    # this test doesn't use the isolated_app because it needs to commit to
    # the DB in order to create records versions.

    json_data = {
        '$schema': 'http://localhost:5000/schemas/records/hep.json',
        'document_type': ['article'],
        'titles': [{
            'title': 'This is the record being cited'
        }],
        'control_number': 9999,
        '_collections': ['Literature']
    }

    cited = InspireRecord.create(data=json_data, skip_files=True)
    cited.commit()
    db.session.commit()
    current_search.flush_and_refresh('records-hep')

    expected_args = ('lit', 9999, 2)
    mocked_indexing_task.assert_called_with(*expected_args)
    # execute mocked task
    index_modified_citations_from_record(*expected_args)

    es_rec = get_es_record('lit', 9999)
    assert es_rec['citation_count'] == 0
    assert LiteratureSearch.citations(es_rec).total == 0

    citing_json = {
        '$schema':
        'http://localhost:5000/schemas/records/hep.json',
        'document_type': ['article'],
        'titles': [{
            'title': 'Record citing the first one'
        }],
        '_collections': ['Literature'],
        'control_number':
        8888,
        'references': [{
            'record': {
                '$ref': 'http://localhost:5000/api/literature/9999'
            },
            'reference': {
                'authors': [{
                    'full_name': 'Smith, J.'
                }],
            }
        }]
    }

    record = InspireRecord.create(data=citing_json, skip_files=True)
    record.commit()
    db.session.commit()
    current_search.flush_and_refresh('records-hep')

    expected_args = ('lit', record['control_number'], 2)
    mocked_indexing_task.assert_called_with(*expected_args)
    # execute mocked task
    index_modified_citations_from_record(*expected_args)

    es_rec = get_es_record('lit', 9999)
    assert es_rec['citation_count'] == 1
    assert LiteratureSearch.citations(es_rec).total == 1

    record.delete()
    record.commit()
    db.session.commit()
    current_search.flush_and_refresh('records-hep')

    expected_args = ('lit', record['control_number'], 3)
    mocked_indexing_task.assert_called_with(*expected_args)
    # execute mocked task
    index_modified_citations_from_record(*expected_args)

    es_rec = get_es_record('lit', 9999)
    assert es_rec['citation_count'] == 0
    assert LiteratureSearch.citations(es_rec).total == 0

    _delete_record('lit', record['control_number'])
    _delete_record('lit', cited['control_number'])
    def get_citation_count(recid):
        record = get_es_record('lit', recid)
        citation_count = record['citation_count']

        return citation_count
def test_regression_index_after_commit_retries_for_new_record_not_yet_in_db(
    mocked_indexing_task,
    mocked_permission_check,
    app,
):
    # this test doesn't use the isolated_app because it needs to commit to
    # the DB in order to create records versions.

    json_data = {
        '$schema': 'http://localhost:5000/schemas/records/hep.json',
        'document_type': ['article'],
        'titles': [{'title': 'This is the record being cited'}],
        'control_number': 9999,
        '_collections': ['Literature']
    }

    cited = InspireRecord.create(data=json_data, skip_files=True)
    cited.commit()
    db.session.commit()
    es.indices.refresh('records-hep')

    expected_args = ('lit', 9999, 2)
    mocked_indexing_task.assert_called_with(*expected_args)
    # execute mocked task
    index_modified_citations_from_record(*expected_args)

    es_rec = get_es_record('lit', 9999)
    assert es_rec['citation_count'] == 0
    assert LiteratureSearch.citations(es_rec).total == 0

    citing_json = {
        '$schema': 'http://localhost:5000/schemas/records/hep.json',
        'document_type': ['article'],
        'titles': [{'title': 'Record citing the first one'}],
        '_collections': ['Literature'],
        'control_number': 8888,
        'references': [
            {
                'record': {
                    '$ref': 'http://localhost:5000/api/literature/9999'
                },
                'reference': {
                    'authors': [{'full_name': 'Smith, J.'}],
                }
            }
        ]
    }

    record = InspireRecord.create(data=citing_json, skip_files=True)
    record.commit()
    db.session.commit()
    es.indices.refresh('records-hep')

    expected_args = ('lit', record['control_number'], 2)
    mocked_indexing_task.assert_called_with(*expected_args)

    # execute mocked task pretending record is not committed yet to DB
    _delete_record('lit', record['control_number'])
    with pytest.raises(RecordGetterError):
        # XXX: celery in eager mode does not retry, so it raises the first time
        index_modified_citations_from_record(*expected_args)

    _delete_record('lit', cited['control_number'])
def test_index_after_commit_indexes_also_cites_two_records(
    mocked_indexing_task,
    mocked_permission_check,
    app,
):
    # this test doesn't use the isolated_app because it needs to commit to
    # the DB in order to create records versions.
    json1 = {
        '$schema': 'http://localhost:5000/schemas/records/hep.json',
        'document_type': ['article'],
        'titles': [{'title': 'This is the record being cited'}],
        'control_number': 9999,
        '_collections': ['Literature']
    }

    cited1 = InspireRecord.create(data=json1, skip_files=True)
    cited1.commit()
    db.session.commit()
    es.indices.refresh('records-hep')

    expected_args = ('lit', cited1['control_number'], 2)
    mocked_indexing_task.assert_called_with(*expected_args)
    # execute mocked task
    index_modified_citations_from_record(*expected_args)

    json2 = {
        '$schema': 'http://localhost:5000/schemas/records/hep.json',
        'document_type': ['article'],
        'titles': [{'title': 'This also is the record being cited'}],
        'control_number': 9998,
        '_collections': ['Literature']
    }

    cited2 = InspireRecord.create(data=json2, skip_files=True)
    cited2.commit()
    db.session.commit()
    es.indices.refresh('records-hep')

    expected_args = ('lit', cited2['control_number'], 2)
    mocked_indexing_task.assert_called_with(*expected_args)
    # execute mocked task
    index_modified_citations_from_record(*expected_args)

    es_rec1 = get_es_record('lit', 9999)
    es_rec2 = get_es_record('lit', 9998)
    assert es_rec1['citation_count'] == 0
    assert es_rec2['citation_count'] == 0
    assert LiteratureSearch.citations(es_rec1).total == 0
    assert LiteratureSearch.citations(es_rec2).total == 0

    citing_json = {
        '$schema': 'http://localhost:5000/schemas/records/hep.json',
        'document_type': ['article'],
        'titles': [{'title': 'Record citing the first one'}],
        '_collections': ['Literature'],
        'control_number': 8888,
        'references': [
            {
                'reference': {
                    'authors': [{'full_name': 'Smith, J.'}],
                }
            }
        ]
    }

    record = InspireRecord.create(data=citing_json, skip_files=True)
    record.commit()
    db.session.commit()
    es.indices.refresh('records-hep')

    expected_args = ('lit', record['control_number'], 2)
    mocked_indexing_task.assert_called_with(*expected_args)
    # execute mocked task
    index_modified_citations_from_record(*expected_args)

    es_rec1 = get_es_record('lit', 9999)
    es_rec2 = get_es_record('lit', 9998)
    assert es_rec1['citation_count'] == 0
    assert es_rec2['citation_count'] == 0
    assert LiteratureSearch.citations(es_rec1).total == 0
    assert LiteratureSearch.citations(es_rec2).total == 0

    references = {
        'references': [
            {
                'record': {
                    '$ref': 'http://localhost:5000/api/literature/9998'
                },
            },
            {
                'record': {
                    '$ref': 'http://localhost:5000/api/literature/9999'
                },
            }
        ]
    }

    citing_json.update(references)
    record.clear()
    record.update(citing_json)
    record.commit()
    db.session.commit()
    es.indices.refresh('records-hep')

    expected_args = ('lit', record['control_number'], 3)
    mocked_indexing_task.assert_called_with(*expected_args)
    # execute mocked task
    index_modified_citations_from_record(*expected_args)

    es_rec1 = get_es_record('lit', 9999)
    es_rec2 = get_es_record('lit', 9998)
    assert es_rec1['citation_count'] == 1
    assert es_rec2['citation_count'] == 1
    assert LiteratureSearch.citations(es_rec1).total == 1
    assert LiteratureSearch.citations(es_rec2).total == 1

    _delete_record('lit', record['control_number'])
    _delete_record('lit', cited1['control_number'])
    _delete_record('lit', cited2['control_number'])
def test_index_after_commit_indexes_also_cites_two_records(
    mocked_indexing_task,
    mocked_permission_check,
    app,
):
    # this test doesn't use the isolated_app because it needs to commit to
    # the DB in order to create records versions.
    json1 = {
        '$schema': 'http://localhost:5000/schemas/records/hep.json',
        'document_type': ['article'],
        'titles': [{
            'title': 'This is the record being cited'
        }],
        'control_number': 9999,
        '_collections': ['Literature']
    }

    cited1 = InspireRecord.create(data=json1, skip_files=True)
    cited1.commit()
    db.session.commit()
    es.indices.refresh('records-hep')

    expected_args = ('lit', cited1['control_number'], 2)
    mocked_indexing_task.assert_called_with(*expected_args)
    # execute mocked task
    index_modified_citations_from_record(*expected_args)

    json2 = {
        '$schema': 'http://localhost:5000/schemas/records/hep.json',
        'document_type': ['article'],
        'titles': [{
            'title': 'This also is the record being cited'
        }],
        'control_number': 9998,
        '_collections': ['Literature']
    }

    cited2 = InspireRecord.create(data=json2, skip_files=True)
    cited2.commit()
    db.session.commit()
    es.indices.refresh('records-hep')

    expected_args = ('lit', cited2['control_number'], 2)
    mocked_indexing_task.assert_called_with(*expected_args)
    # execute mocked task
    index_modified_citations_from_record(*expected_args)

    es_rec1 = get_es_record('lit', 9999)
    es_rec2 = get_es_record('lit', 9998)
    assert es_rec1['citation_count'] == 0
    assert es_rec2['citation_count'] == 0
    assert get_citations_from_es(es_rec1).total == 0
    assert get_citations_from_es(es_rec2).total == 0

    citing_json = {
        '$schema': 'http://localhost:5000/schemas/records/hep.json',
        'document_type': ['article'],
        'titles': [{
            'title': 'Record citing the first one'
        }],
        '_collections': ['Literature'],
        'control_number': 8888,
        'references': [{
            'reference': {
                'authors': [{
                    'full_name': 'Smith, J.'
                }],
            }
        }]
    }

    record = InspireRecord.create(data=citing_json, skip_files=True)
    record.commit()
    db.session.commit()
    es.indices.refresh('records-hep')

    expected_args = ('lit', record['control_number'], 2)
    mocked_indexing_task.assert_called_with(*expected_args)
    # execute mocked task
    index_modified_citations_from_record(*expected_args)

    es_rec1 = get_es_record('lit', 9999)
    es_rec2 = get_es_record('lit', 9998)
    assert es_rec1['citation_count'] == 0
    assert es_rec2['citation_count'] == 0
    assert get_citations_from_es(es_rec1).total == 0
    assert get_citations_from_es(es_rec2).total == 0

    references = {
        'references': [{
            'record': {
                '$ref': 'http://localhost:5000/api/literature/9998'
            },
        }, {
            'record': {
                '$ref': 'http://localhost:5000/api/literature/9999'
            },
        }]
    }

    citing_json.update(references)
    record.clear()
    record.update(citing_json)
    record.commit()
    db.session.commit()
    es.indices.refresh('records-hep')

    expected_args = ('lit', record['control_number'], 3)
    mocked_indexing_task.assert_called_with(*expected_args)
    # execute mocked task
    index_modified_citations_from_record(*expected_args)

    es_rec1 = get_es_record('lit', 9999)
    es_rec2 = get_es_record('lit', 9998)
    assert es_rec1['citation_count'] == 1
    assert es_rec2['citation_count'] == 1
    assert get_citations_from_es(es_rec1).total == 1
    assert get_citations_from_es(es_rec2).total == 1

    _delete_record('lit', record['control_number'])
    _delete_record('lit', cited1['control_number'])
    _delete_record('lit', cited2['control_number'])
 def get_record(self, pid_type, recid):
     try:
         return record_getter.get_es_record(pid_type, recid)
     except record_getter.RecordGetterError:
         return None
Beispiel #23
0
 def get_record(self, record_type, recid):
     try:
         return record_getter.get_es_record(record_type, recid)
     except record_getter.RecordGetterError:
         return None
def test_index_after_commit_indexes_also_cites_record_when_new_citation_is_added(
    mocked_indexing_task,
    mocked_permission_check,
    app,
):
    # this test doesn't use the isolated_app because it needs to commit to
    # the DB in order to create records versions.
    json_data = {
        '$schema': 'http://localhost:5000/schemas/records/hep.json',
        'document_type': ['article'],
        'titles': [{
            'title': 'This is the record being cited'
        }],
        'control_number': 9999,
        '_collections': ['Literature']
    }
    cited = InspireRecord.create(data=json_data, skip_files=True)
    db.session.commit()
    es.indices.refresh('records-hep')

    expected_args = 'lit', cited['control_number'], 1
    mocked_indexing_task.assert_called_with(*expected_args)
    # execute mocked task
    index_modified_citations_from_record(*expected_args)

    es_rec = get_es_record('lit', 9999)
    assert es_rec['citation_count'] == 0
    assert get_citations_from_es(es_rec).total == 0

    citing_json = {
        '$schema': 'http://localhost:5000/schemas/records/hep.json',
        'document_type': ['article'],
        'titles': [{
            'title': 'Record citing the first one'
        }],
        '_collections': ['Literature'],
        'control_number': 8888,
        'references': [{
            "reference": {
                'authors': [{
                    'full_name': 'Smith, J.'
                }]
            }
        }]
    }

    record = InspireRecord.create(data=citing_json, skip_files=True)
    db.session.commit()
    es.indices.refresh('records-hep')

    expected_args = 'lit', record['control_number'], 1
    mocked_indexing_task.assert_called_with(*expected_args)
    # execute mocked task
    index_modified_citations_from_record(*expected_args)

    es_rec = get_es_record('lit', 9999)
    assert es_rec['citation_count'] == 0
    assert get_citations_from_es(es_rec).total == 0

    references = {
        'references': [{
            "curated_relation": False,
            "record": {
                "$ref": "http://localhost:5000/api/literature/9999"
            },
            "reference": {
                'authors': [{
                    'full_name': 'Smith, J.'
                }],
            }
        }]
    }

    citing_json.update(references)
    record.clear()
    record.update(citing_json)
    record.commit()
    db.session.commit()
    es.indices.refresh('records-hep')

    expected_args = 'lit', record['control_number'], 2
    mocked_indexing_task.assert_called_with(*expected_args)
    # execute mocked task
    index_modified_citations_from_record(*expected_args)

    es_rec = get_es_record('lit', 9999)
    assert es_rec['citation_count'] == 1
    assert get_citations_from_es(es_rec).total == 1

    _delete_record('lit', 8888)
    _delete_record('lit', 9999)
def test_index_after_commit_indexes_also_cites_record_when_new_citation_is_added(
    mocked_indexing_task,
    mocked_permission_check,
    app,
):
    # this test doesn't use the isolated_app because it needs to commit to
    # the DB in order to create records versions.
    json_data = {
        '$schema': 'http://localhost:5000/schemas/records/hep.json',
        'document_type': ['article'],
        'titles': [{'title': 'This is the record being cited'}],
        'control_number': 9999,
        '_collections': ['Literature']
    }
    cited = InspireRecord.create(data=json_data, skip_files=True)
    db.session.commit()
    es.indices.refresh('records-hep')

    expected_args = 'lit', cited['control_number'], 1
    mocked_indexing_task.assert_called_with(*expected_args)
    # execute mocked task
    index_modified_citations_from_record(*expected_args)

    es_rec = get_es_record('lit', 9999)
    assert es_rec['citation_count'] == 0
    assert LiteratureSearch.citations(es_rec).total == 0

    citing_json = {
        '$schema': 'http://localhost:5000/schemas/records/hep.json',
        'document_type': ['article'],
        'titles': [{'title': 'Record citing the first one'}],
        '_collections': ['Literature'],
        'control_number': 8888,
        'references': [
            {"reference": {'authors': [{'full_name': 'Smith, J.'}]}}
        ]
    }

    record = InspireRecord.create(data=citing_json, skip_files=True)
    db.session.commit()
    es.indices.refresh('records-hep')

    expected_args = 'lit', record['control_number'], 1
    mocked_indexing_task.assert_called_with(*expected_args)
    # execute mocked task
    index_modified_citations_from_record(*expected_args)

    es_rec = get_es_record('lit', 9999)
    assert es_rec['citation_count'] == 0
    assert LiteratureSearch.citations(es_rec).total == 0

    references = {
        'references': [
            {
                "curated_relation": False,
                "record": {
                    "$ref": "http://localhost:5000/api/literature/9999"
                },
                "reference": {
                    'authors': [{'full_name': 'Smith, J.'}],
                }
            }
        ]
    }

    citing_json.update(references)
    record.clear()
    record.update(citing_json)
    record.commit()
    db.session.commit()
    es.indices.refresh('records-hep')

    expected_args = 'lit', record['control_number'], 2
    mocked_indexing_task.assert_called_with(*expected_args)
    # execute mocked task
    index_modified_citations_from_record(*expected_args)

    es_rec = get_es_record('lit', 9999)
    assert es_rec['citation_count'] == 1
    assert LiteratureSearch.citations(es_rec).total == 1

    _delete_record('lit', 8888)
    _delete_record('lit', 9999)
def test_regression_index_after_commit_retries_for_new_record_not_yet_in_db(
    mocked_indexing_task,
    mocked_permission_check,
    app,
):
    # this test doesn't use the isolated_app because it needs to commit to
    # the DB in order to create records versions.

    json_data = {
        '$schema': 'http://localhost:5000/schemas/records/hep.json',
        'document_type': ['article'],
        'titles': [{
            'title': 'This is the record being cited'
        }],
        'control_number': 9999,
        '_collections': ['Literature']
    }

    cited = InspireRecord.create(data=json_data, skip_files=True)
    cited.commit()
    db.session.commit()
    es.indices.refresh('records-hep')

    expected_args = ('lit', 9999, 2)
    mocked_indexing_task.assert_called_with(*expected_args)
    # execute mocked task
    index_modified_citations_from_record(*expected_args)

    es_rec = get_es_record('lit', 9999)
    assert es_rec['citation_count'] == 0
    assert get_citations_from_es(es_rec).total == 0

    citing_json = {
        '$schema':
        'http://localhost:5000/schemas/records/hep.json',
        'document_type': ['article'],
        'titles': [{
            'title': 'Record citing the first one'
        }],
        '_collections': ['Literature'],
        'control_number':
        8888,
        'references': [{
            'record': {
                '$ref': 'http://localhost:5000/api/literature/9999'
            },
            'reference': {
                'authors': [{
                    'full_name': 'Smith, J.'
                }],
            }
        }]
    }

    record = InspireRecord.create(data=citing_json, skip_files=True)
    record.commit()
    db.session.commit()
    es.indices.refresh('records-hep')

    expected_args = ('lit', record['control_number'], 2)
    mocked_indexing_task.assert_called_with(*expected_args)

    # execute mocked task pretending record is not committed yet to DB
    _delete_record('lit', record['control_number'])
    with pytest.raises(RecordGetterError):
        # XXX: celery in eager mode does not retry, so it raises the first time
        index_modified_citations_from_record(*expected_args)

    _delete_record('lit', cited['control_number'])
    def get_citation_count(recid):
        record = get_es_record("literature", recid)
        citation_count = record["citation_count"]

        return citation_count