def check_missing_records_in_es(data_output): """Checks if all not deleted records from pidstore are also in ElasticSearch""" all_records = int( PersistentIdentifier.query.filter( PersistentIdentifier.pid_type == 'lit').count()) _prepare_logdir(data_output) click.echo("All missing records pids will be saved in %s file" % data_output) missing = 0 _query = _gen_query( PersistentIdentifier.query.filter( PersistentIdentifier.pid_type == 'lit')) with click.progressbar(_query, length=all_records, label="Processing pids (%s pids)..." % all_records) as pidstore: with open(data_output, 'w') as data_file: for pid in pidstore: db_rec = get_db_record('lit', pid.pid_value) if db_rec.get('deleted'): continue try: get_es_record('lit', pid.pid_value) except RecordGetterError: missing += 1 data_file.write("%s\n" % pid.pid_value) data_file.flush() click.echo("%s records are missing from es" % missing)
def test_migrate_from_mirror_doesnt_index_deleted_records(isolated_app): record_fixture_path = pkg_resources.resource_filename( __name__, os.path.join('fixtures', 'dummy.xml')) record_fixture_path_deleted = pkg_resources.resource_filename( __name__, os.path.join('fixtures', 'deleted_record.xml')) migrate_from_file(record_fixture_path, wait_for_results=True) migrate_from_file(record_fixture_path_deleted, wait_for_results=True) get_es_record('lit', 12345) with pytest.raises(RecordGetterError): get_es_record('lit', 1234)
def test_migrate_from_mirror_doesnt_index_deleted_records(isolated_app): record_fixture_path = pkg_resources.resource_filename( __name__, os.path.join('fixtures', 'dummy.xml') ) record_fixture_path_deleted = pkg_resources.resource_filename( __name__, os.path.join('fixtures', 'deleted_record.xml') ) migrate_from_file(record_fixture_path, wait_for_results=True) migrate_from_file(record_fixture_path_deleted, wait_for_results=True) get_es_record('lit', 12345) with pytest.raises(RecordGetterError): get_es_record('lit', 1234)
def test_record_enhanced_in_es_and_not_enhanced_in_db(app): record_json = { '$schema': 'http://localhost:5000/schemas/records/hep.json', 'document_type': [ 'article', ], 'control_number': 111, 'titles': [ { 'title': 'Jessica Jones', }, ], '_collections': ['Literature'], 'references': [{ 'record': { '$ref': 'http://localhost:5000/api/literature/1498589' } }] } record = InspireRecord.create(record_json) record.commit() db.session.commit() es.indices.refresh('records-hep') rec1 = get_db_record('lit', 111) rec2 = get_es_record('lit', 111) assert 'facet_author_name' not in rec1 assert 'facet_author_name' in rec2 _delete_record('lit', 111)
def _get_pubnote(self): """Return publication note""" journal, volume, pages = ['', '', ''] if 'publication_info' in self.record: pub_info = self.record['publication_info'] for field in pub_info: if 'journal_title' in field: journal = field['journal_title'] if 'journal_volume' in field: volume = field['journal_volume'] if 'page_artid' in field: pages = field['page_artid'] if pages: if isinstance(pages, list): for page in pages: dashpos = page.find('-') break else: dashpos = pages.find('-') if dashpos > -1: pages = pages.split('-')[0] try: if journal and (volume != '' or pages != ''): recid = self.record['control_number', ''] record = get_es_record('journals', recid) coden = ','.join( [record['coden'][0], volume, pages]) return coden except: return '' else: return ''
def test_batch_reindex_one_deleted_record(app, create_records, celery_app_with_context, celery_session_worker): records = create_records(n=1, additional_props={'deleted': True}) uuid = str(records[0].id) task = batch_reindex.apply_async(kwargs={'uuids': [uuid]}) result = task.get(timeout=10) expected = {'failures': [], 'success': 0} assert result == expected control_number = records[0].json['control_number'] with pytest.raises(RecordGetterError): get_es_record('lit', control_number)
def test_batch_reindex_two_records(app, create_records, celery_app_with_context, celery_session_worker): records = create_records(n=2) uuids = [str(r.id) for r in records] task = batch_reindex.apply_async(kwargs={'uuids': uuids}) result = task.get(timeout=10) expected = {'failures': [], 'success': 2} assert result == expected control_number = records[0].json['control_number'] assert get_es_record('lit', control_number) control_number = records[1].json['control_number'] assert get_es_record('lit', control_number)
def test_batch_reindex_one_records_and_skips_one_deleted( app, create_records, celery_app_with_context, celery_session_worker): records = create_records(n=1) uuids = [str(r.id) for r in records] failing_record = create_records(n=1, additional_props={'deleted': True})[0] uuids.append(failing_record.id) task = batch_reindex.apply_async(kwargs={'uuids': uuids}) result = task.get(timeout=10) expected = {'failures': [], 'success': 1} assert result == expected control_number = records[0].json['control_number'] assert get_es_record('lit', control_number) control_number = failing_record.json['control_number'] with pytest.raises(RecordGetterError): get_es_record('lit', control_number)
def test_batch_reindex_one_record(app, create_records, celery_app_with_context, celery_session_worker): records = create_records(n=1) uuid = str(records[0].id) task = batch_reindex.apply_async(kwargs={'uuids': [uuid]}) result = task.get(timeout=10) expected = {'failures': [], 'success': 1} assert result == expected control_number = records[0].json['control_number'] assert get_es_record('lit', control_number)
def check_missing_records_in_es(data_output): """Checks if all not deleted records from pidstore are also in ElasticSearch""" all_records = int(PersistentIdentifier.query.filter( PersistentIdentifier.pid_type == 'lit').count()) _prepare_logdir(data_output) click.echo("All missing records pids will be saved in %s file" % data_output) missing = 0 _query = _gen_query(PersistentIdentifier.query.filter( PersistentIdentifier.pid_type == 'lit')) with click.progressbar(_query, length=all_records, label="Processing pids (%s pids)..." % all_records) as pidstore: with open(data_output, 'w') as data_file: for pid in pidstore: db_rec = get_db_record('lit', pid.pid_value) if db_rec.get('deleted'): continue try: get_es_record('lit', pid.pid_value) except RecordGetterError: missing += 1 data_file.write("%s\n" % pid.pid_value) data_file.flush() click.echo("%s records are missing from es" % missing)
def _get_citation_number(self): """Returns how many times record was cited. If 0, returns nothing""" today = time.strftime("%d %b %Y") record = get_es_record('lit', self.record['control_number']) citations = '' try: times_cited = record['citation_count'] if times_cited != 0: if times_cited > 1: citations = '%d citations counted in INSPIRE as of %s' \ % (times_cited, today) else: citations = '%d citation counted in INSPIRE as of %s'\ % (times_cited, today) except KeyError: pass return citations
def _get_citation_number(self): """Returns how many times record was cited. If 0, returns nothing""" today = time.strftime("%d %b %Y") record = get_es_record('lit', self.record['control_number']) citations = '' try: times_cited = record['citation_count'] if times_cited != 0: if times_cited > 1: citations = '%d citations counted in INSPIRE as of %s' \ % (times_cited, today) else: citations = '%d citation counted in INSPIRE as of %s'\ % (times_cited, today) except KeyError: pass return citations
def _get_pubnote(self): """Return publication note""" journal, volume, pages = ["", "", ""] if "publication_info" in self.record: pub_info = self.record["publication_info"] for field in pub_info: if "journal_title" in field: journal = field["journal_title"] if "journal_volume" in field: volume = field["journal_volume"] if "page_start" in field or "artid" in field: pages = field.get("page_start", "") or field.get("artid", "") try: if journal and (volume != "" or pages != ""): recid = self.record["control_number", ""] record = get_es_record("journals", recid) coden = ",".join([record["coden"][0], volume, pages]) return coden except: return "" else: return ""
def _get_pubnote(self): """Return publication note""" journal, volume, pages = ['', '', ''] if 'publication_info' in self.record: pub_info = self.record['publication_info'] for field in pub_info: if 'journal_title' in field: journal = field['journal_title'] if 'journal_volume' in field: volume = field['journal_volume'] if 'page_start' in field or 'artid' in field: pages = field.get('page_start') or field['artid'] try: if journal and (volume != '' or pages != ''): recid = self.record['control_number'] record = get_es_record('jou', recid) coden = ','.join([record['coden'][0], volume, pages]) return coden except: return '' else: return ''
def _get_pubnote(self): """Return publication note""" journal, volume, pages = ['', '', ''] if 'publication_info' in self.record: pub_info = self.record['publication_info'] for field in pub_info: if 'journal_title' in field: journal = field['journal_title'] if 'journal_volume' in field: volume = field['journal_volume'] if 'page_start' in field or 'artid' in field: pages = field.get('page_start', '') or field.get('artid', '') try: if journal and (volume != '' or pages != ''): recid = self.record['control_number', ''] record = get_es_record('journals', recid) coden = ','.join( [record['coden'][0], volume, pages]) return coden except: return '' else: return ''
def test_record_enhanced_in_es_and_not_enhanced_in_db(app): record_json = { '$schema': 'http://localhost:5000/schemas/records/hep.json', 'document_type': [ 'article', ], 'control_number': 111, 'titles': [ { 'title': 'Jessica Jones', }, ], '_collections': ['Literature'], 'references': [{'record': {'$ref': 'http://localhost:5000/api/literature/1498589'}}] } record = InspireRecord.create(record_json) record.commit() db.session.commit() es.indices.refresh('records-hep') rec1 = get_db_record('lit', 111) rec2 = get_es_record('lit', 111) assert 'facet_author_name' not in rec1 assert 'facet_author_name' in rec2 _delete_record('lit', 111)
def test_index_after_commit_indexes_also_cites_record_when_citer_is_deleted( mocked_indexing_task, mocked_permission_check, app, ): # this test doesn't use the isolated_app because it needs to commit to # the DB in order to create records versions. json_data = { '$schema': 'http://localhost:5000/schemas/records/hep.json', 'document_type': ['article'], 'titles': [{ 'title': 'This is the record being cited' }], 'control_number': 9999, '_collections': ['Literature'] } cited = InspireRecord.create(data=json_data, skip_files=True) cited.commit() db.session.commit() current_search.flush_and_refresh('records-hep') expected_args = ('lit', 9999, 2) mocked_indexing_task.assert_called_with(*expected_args) # execute mocked task index_modified_citations_from_record(*expected_args) es_rec = get_es_record('lit', 9999) assert es_rec['citation_count'] == 0 assert LiteratureSearch.citations(es_rec).total == 0 citing_json = { '$schema': 'http://localhost:5000/schemas/records/hep.json', 'document_type': ['article'], 'titles': [{ 'title': 'Record citing the first one' }], '_collections': ['Literature'], 'control_number': 8888, 'references': [{ 'record': { '$ref': 'http://localhost:5000/api/literature/9999' }, 'reference': { 'authors': [{ 'full_name': 'Smith, J.' }], } }] } record = InspireRecord.create(data=citing_json, skip_files=True) record.commit() db.session.commit() current_search.flush_and_refresh('records-hep') expected_args = ('lit', record['control_number'], 2) mocked_indexing_task.assert_called_with(*expected_args) # execute mocked task index_modified_citations_from_record(*expected_args) es_rec = get_es_record('lit', 9999) assert es_rec['citation_count'] == 1 assert LiteratureSearch.citations(es_rec).total == 1 record.delete() record.commit() db.session.commit() current_search.flush_and_refresh('records-hep') expected_args = ('lit', record['control_number'], 3) mocked_indexing_task.assert_called_with(*expected_args) # execute mocked task index_modified_citations_from_record(*expected_args) es_rec = get_es_record('lit', 9999) assert es_rec['citation_count'] == 0 assert LiteratureSearch.citations(es_rec).total == 0 _delete_record('lit', record['control_number']) _delete_record('lit', cited['control_number'])
def get_citation_count(recid): record = get_es_record('lit', recid) citation_count = record['citation_count'] return citation_count
def test_regression_index_after_commit_retries_for_new_record_not_yet_in_db( mocked_indexing_task, mocked_permission_check, app, ): # this test doesn't use the isolated_app because it needs to commit to # the DB in order to create records versions. json_data = { '$schema': 'http://localhost:5000/schemas/records/hep.json', 'document_type': ['article'], 'titles': [{'title': 'This is the record being cited'}], 'control_number': 9999, '_collections': ['Literature'] } cited = InspireRecord.create(data=json_data, skip_files=True) cited.commit() db.session.commit() es.indices.refresh('records-hep') expected_args = ('lit', 9999, 2) mocked_indexing_task.assert_called_with(*expected_args) # execute mocked task index_modified_citations_from_record(*expected_args) es_rec = get_es_record('lit', 9999) assert es_rec['citation_count'] == 0 assert LiteratureSearch.citations(es_rec).total == 0 citing_json = { '$schema': 'http://localhost:5000/schemas/records/hep.json', 'document_type': ['article'], 'titles': [{'title': 'Record citing the first one'}], '_collections': ['Literature'], 'control_number': 8888, 'references': [ { 'record': { '$ref': 'http://localhost:5000/api/literature/9999' }, 'reference': { 'authors': [{'full_name': 'Smith, J.'}], } } ] } record = InspireRecord.create(data=citing_json, skip_files=True) record.commit() db.session.commit() es.indices.refresh('records-hep') expected_args = ('lit', record['control_number'], 2) mocked_indexing_task.assert_called_with(*expected_args) # execute mocked task pretending record is not committed yet to DB _delete_record('lit', record['control_number']) with pytest.raises(RecordGetterError): # XXX: celery in eager mode does not retry, so it raises the first time index_modified_citations_from_record(*expected_args) _delete_record('lit', cited['control_number'])
def test_index_after_commit_indexes_also_cites_two_records( mocked_indexing_task, mocked_permission_check, app, ): # this test doesn't use the isolated_app because it needs to commit to # the DB in order to create records versions. json1 = { '$schema': 'http://localhost:5000/schemas/records/hep.json', 'document_type': ['article'], 'titles': [{'title': 'This is the record being cited'}], 'control_number': 9999, '_collections': ['Literature'] } cited1 = InspireRecord.create(data=json1, skip_files=True) cited1.commit() db.session.commit() es.indices.refresh('records-hep') expected_args = ('lit', cited1['control_number'], 2) mocked_indexing_task.assert_called_with(*expected_args) # execute mocked task index_modified_citations_from_record(*expected_args) json2 = { '$schema': 'http://localhost:5000/schemas/records/hep.json', 'document_type': ['article'], 'titles': [{'title': 'This also is the record being cited'}], 'control_number': 9998, '_collections': ['Literature'] } cited2 = InspireRecord.create(data=json2, skip_files=True) cited2.commit() db.session.commit() es.indices.refresh('records-hep') expected_args = ('lit', cited2['control_number'], 2) mocked_indexing_task.assert_called_with(*expected_args) # execute mocked task index_modified_citations_from_record(*expected_args) es_rec1 = get_es_record('lit', 9999) es_rec2 = get_es_record('lit', 9998) assert es_rec1['citation_count'] == 0 assert es_rec2['citation_count'] == 0 assert LiteratureSearch.citations(es_rec1).total == 0 assert LiteratureSearch.citations(es_rec2).total == 0 citing_json = { '$schema': 'http://localhost:5000/schemas/records/hep.json', 'document_type': ['article'], 'titles': [{'title': 'Record citing the first one'}], '_collections': ['Literature'], 'control_number': 8888, 'references': [ { 'reference': { 'authors': [{'full_name': 'Smith, J.'}], } } ] } record = InspireRecord.create(data=citing_json, skip_files=True) record.commit() db.session.commit() es.indices.refresh('records-hep') expected_args = ('lit', record['control_number'], 2) mocked_indexing_task.assert_called_with(*expected_args) # execute mocked task index_modified_citations_from_record(*expected_args) es_rec1 = get_es_record('lit', 9999) es_rec2 = get_es_record('lit', 9998) assert es_rec1['citation_count'] == 0 assert es_rec2['citation_count'] == 0 assert LiteratureSearch.citations(es_rec1).total == 0 assert LiteratureSearch.citations(es_rec2).total == 0 references = { 'references': [ { 'record': { '$ref': 'http://localhost:5000/api/literature/9998' }, }, { 'record': { '$ref': 'http://localhost:5000/api/literature/9999' }, } ] } citing_json.update(references) record.clear() record.update(citing_json) record.commit() db.session.commit() es.indices.refresh('records-hep') expected_args = ('lit', record['control_number'], 3) mocked_indexing_task.assert_called_with(*expected_args) # execute mocked task index_modified_citations_from_record(*expected_args) es_rec1 = get_es_record('lit', 9999) es_rec2 = get_es_record('lit', 9998) assert es_rec1['citation_count'] == 1 assert es_rec2['citation_count'] == 1 assert LiteratureSearch.citations(es_rec1).total == 1 assert LiteratureSearch.citations(es_rec2).total == 1 _delete_record('lit', record['control_number']) _delete_record('lit', cited1['control_number']) _delete_record('lit', cited2['control_number'])
def test_index_after_commit_indexes_also_cites_two_records( mocked_indexing_task, mocked_permission_check, app, ): # this test doesn't use the isolated_app because it needs to commit to # the DB in order to create records versions. json1 = { '$schema': 'http://localhost:5000/schemas/records/hep.json', 'document_type': ['article'], 'titles': [{ 'title': 'This is the record being cited' }], 'control_number': 9999, '_collections': ['Literature'] } cited1 = InspireRecord.create(data=json1, skip_files=True) cited1.commit() db.session.commit() es.indices.refresh('records-hep') expected_args = ('lit', cited1['control_number'], 2) mocked_indexing_task.assert_called_with(*expected_args) # execute mocked task index_modified_citations_from_record(*expected_args) json2 = { '$schema': 'http://localhost:5000/schemas/records/hep.json', 'document_type': ['article'], 'titles': [{ 'title': 'This also is the record being cited' }], 'control_number': 9998, '_collections': ['Literature'] } cited2 = InspireRecord.create(data=json2, skip_files=True) cited2.commit() db.session.commit() es.indices.refresh('records-hep') expected_args = ('lit', cited2['control_number'], 2) mocked_indexing_task.assert_called_with(*expected_args) # execute mocked task index_modified_citations_from_record(*expected_args) es_rec1 = get_es_record('lit', 9999) es_rec2 = get_es_record('lit', 9998) assert es_rec1['citation_count'] == 0 assert es_rec2['citation_count'] == 0 assert get_citations_from_es(es_rec1).total == 0 assert get_citations_from_es(es_rec2).total == 0 citing_json = { '$schema': 'http://localhost:5000/schemas/records/hep.json', 'document_type': ['article'], 'titles': [{ 'title': 'Record citing the first one' }], '_collections': ['Literature'], 'control_number': 8888, 'references': [{ 'reference': { 'authors': [{ 'full_name': 'Smith, J.' }], } }] } record = InspireRecord.create(data=citing_json, skip_files=True) record.commit() db.session.commit() es.indices.refresh('records-hep') expected_args = ('lit', record['control_number'], 2) mocked_indexing_task.assert_called_with(*expected_args) # execute mocked task index_modified_citations_from_record(*expected_args) es_rec1 = get_es_record('lit', 9999) es_rec2 = get_es_record('lit', 9998) assert es_rec1['citation_count'] == 0 assert es_rec2['citation_count'] == 0 assert get_citations_from_es(es_rec1).total == 0 assert get_citations_from_es(es_rec2).total == 0 references = { 'references': [{ 'record': { '$ref': 'http://localhost:5000/api/literature/9998' }, }, { 'record': { '$ref': 'http://localhost:5000/api/literature/9999' }, }] } citing_json.update(references) record.clear() record.update(citing_json) record.commit() db.session.commit() es.indices.refresh('records-hep') expected_args = ('lit', record['control_number'], 3) mocked_indexing_task.assert_called_with(*expected_args) # execute mocked task index_modified_citations_from_record(*expected_args) es_rec1 = get_es_record('lit', 9999) es_rec2 = get_es_record('lit', 9998) assert es_rec1['citation_count'] == 1 assert es_rec2['citation_count'] == 1 assert get_citations_from_es(es_rec1).total == 1 assert get_citations_from_es(es_rec2).total == 1 _delete_record('lit', record['control_number']) _delete_record('lit', cited1['control_number']) _delete_record('lit', cited2['control_number'])
def get_record(self, pid_type, recid): try: return record_getter.get_es_record(pid_type, recid) except record_getter.RecordGetterError: return None
def get_record(self, record_type, recid): try: return record_getter.get_es_record(record_type, recid) except record_getter.RecordGetterError: return None
def test_index_after_commit_indexes_also_cites_record_when_new_citation_is_added( mocked_indexing_task, mocked_permission_check, app, ): # this test doesn't use the isolated_app because it needs to commit to # the DB in order to create records versions. json_data = { '$schema': 'http://localhost:5000/schemas/records/hep.json', 'document_type': ['article'], 'titles': [{ 'title': 'This is the record being cited' }], 'control_number': 9999, '_collections': ['Literature'] } cited = InspireRecord.create(data=json_data, skip_files=True) db.session.commit() es.indices.refresh('records-hep') expected_args = 'lit', cited['control_number'], 1 mocked_indexing_task.assert_called_with(*expected_args) # execute mocked task index_modified_citations_from_record(*expected_args) es_rec = get_es_record('lit', 9999) assert es_rec['citation_count'] == 0 assert get_citations_from_es(es_rec).total == 0 citing_json = { '$schema': 'http://localhost:5000/schemas/records/hep.json', 'document_type': ['article'], 'titles': [{ 'title': 'Record citing the first one' }], '_collections': ['Literature'], 'control_number': 8888, 'references': [{ "reference": { 'authors': [{ 'full_name': 'Smith, J.' }] } }] } record = InspireRecord.create(data=citing_json, skip_files=True) db.session.commit() es.indices.refresh('records-hep') expected_args = 'lit', record['control_number'], 1 mocked_indexing_task.assert_called_with(*expected_args) # execute mocked task index_modified_citations_from_record(*expected_args) es_rec = get_es_record('lit', 9999) assert es_rec['citation_count'] == 0 assert get_citations_from_es(es_rec).total == 0 references = { 'references': [{ "curated_relation": False, "record": { "$ref": "http://localhost:5000/api/literature/9999" }, "reference": { 'authors': [{ 'full_name': 'Smith, J.' }], } }] } citing_json.update(references) record.clear() record.update(citing_json) record.commit() db.session.commit() es.indices.refresh('records-hep') expected_args = 'lit', record['control_number'], 2 mocked_indexing_task.assert_called_with(*expected_args) # execute mocked task index_modified_citations_from_record(*expected_args) es_rec = get_es_record('lit', 9999) assert es_rec['citation_count'] == 1 assert get_citations_from_es(es_rec).total == 1 _delete_record('lit', 8888) _delete_record('lit', 9999)
def test_index_after_commit_indexes_also_cites_record_when_new_citation_is_added( mocked_indexing_task, mocked_permission_check, app, ): # this test doesn't use the isolated_app because it needs to commit to # the DB in order to create records versions. json_data = { '$schema': 'http://localhost:5000/schemas/records/hep.json', 'document_type': ['article'], 'titles': [{'title': 'This is the record being cited'}], 'control_number': 9999, '_collections': ['Literature'] } cited = InspireRecord.create(data=json_data, skip_files=True) db.session.commit() es.indices.refresh('records-hep') expected_args = 'lit', cited['control_number'], 1 mocked_indexing_task.assert_called_with(*expected_args) # execute mocked task index_modified_citations_from_record(*expected_args) es_rec = get_es_record('lit', 9999) assert es_rec['citation_count'] == 0 assert LiteratureSearch.citations(es_rec).total == 0 citing_json = { '$schema': 'http://localhost:5000/schemas/records/hep.json', 'document_type': ['article'], 'titles': [{'title': 'Record citing the first one'}], '_collections': ['Literature'], 'control_number': 8888, 'references': [ {"reference": {'authors': [{'full_name': 'Smith, J.'}]}} ] } record = InspireRecord.create(data=citing_json, skip_files=True) db.session.commit() es.indices.refresh('records-hep') expected_args = 'lit', record['control_number'], 1 mocked_indexing_task.assert_called_with(*expected_args) # execute mocked task index_modified_citations_from_record(*expected_args) es_rec = get_es_record('lit', 9999) assert es_rec['citation_count'] == 0 assert LiteratureSearch.citations(es_rec).total == 0 references = { 'references': [ { "curated_relation": False, "record": { "$ref": "http://localhost:5000/api/literature/9999" }, "reference": { 'authors': [{'full_name': 'Smith, J.'}], } } ] } citing_json.update(references) record.clear() record.update(citing_json) record.commit() db.session.commit() es.indices.refresh('records-hep') expected_args = 'lit', record['control_number'], 2 mocked_indexing_task.assert_called_with(*expected_args) # execute mocked task index_modified_citations_from_record(*expected_args) es_rec = get_es_record('lit', 9999) assert es_rec['citation_count'] == 1 assert LiteratureSearch.citations(es_rec).total == 1 _delete_record('lit', 8888) _delete_record('lit', 9999)
def test_regression_index_after_commit_retries_for_new_record_not_yet_in_db( mocked_indexing_task, mocked_permission_check, app, ): # this test doesn't use the isolated_app because it needs to commit to # the DB in order to create records versions. json_data = { '$schema': 'http://localhost:5000/schemas/records/hep.json', 'document_type': ['article'], 'titles': [{ 'title': 'This is the record being cited' }], 'control_number': 9999, '_collections': ['Literature'] } cited = InspireRecord.create(data=json_data, skip_files=True) cited.commit() db.session.commit() es.indices.refresh('records-hep') expected_args = ('lit', 9999, 2) mocked_indexing_task.assert_called_with(*expected_args) # execute mocked task index_modified_citations_from_record(*expected_args) es_rec = get_es_record('lit', 9999) assert es_rec['citation_count'] == 0 assert get_citations_from_es(es_rec).total == 0 citing_json = { '$schema': 'http://localhost:5000/schemas/records/hep.json', 'document_type': ['article'], 'titles': [{ 'title': 'Record citing the first one' }], '_collections': ['Literature'], 'control_number': 8888, 'references': [{ 'record': { '$ref': 'http://localhost:5000/api/literature/9999' }, 'reference': { 'authors': [{ 'full_name': 'Smith, J.' }], } }] } record = InspireRecord.create(data=citing_json, skip_files=True) record.commit() db.session.commit() es.indices.refresh('records-hep') expected_args = ('lit', record['control_number'], 2) mocked_indexing_task.assert_called_with(*expected_args) # execute mocked task pretending record is not committed yet to DB _delete_record('lit', record['control_number']) with pytest.raises(RecordGetterError): # XXX: celery in eager mode does not retry, so it raises the first time index_modified_citations_from_record(*expected_args) _delete_record('lit', cited['control_number'])
def get_citation_count(recid): record = get_es_record("literature", recid) citation_count = record["citation_count"] return citation_count