def diagnose(bibcodes, json): citation_count = db.get_citation_count(tasks.app) citation_target_count = db.get_citation_target_count(tasks.app) if citation_count != 0 or citation_target_count != 0: logger.error( "Diagnose aborted because the database already contains %s citations and %s citations targets (this is a protection against modifying a database in use)", citation_count, citation_target_count) else: if not bibcodes: bibcodes = [ "1005PhRvC..71c4906H", "1915PA.....23..189P", "2017PASP..129b4005R" ] logger.info('Using default bibcodes for diagnose:\n\t%s', "\n\t".join(bibcodes)) if not json: json = [ "{\"cited\":\"1976NuPhB.113..395J\",\"citing\":\"1005PhRvC..71c4906H\",\"doi\":\"10.1016/0550-3213(76)90133-4\",\"score\":\"1\",\"source\":\"/proj/ads/references/resolved/PhRvC/0071/1005PhRvC..71c4906H.ref.xml.result:17\"}", "{\"cited\":\"...................\",\"citing\":\"2017SSEle.128..141M\",\"score\":\"0\",\"source\":\"/proj/ads/references/resolved/SSEle/0128/10.1016_j.sse.2016.10.029.xref.xml.result:10\",\"url\":\"https://github.com/viennats/viennats-dev\"}", "{\"cited\":\"2013ascl.soft03021B\",\"citing\":\"2017PASP..129b4005R\",\"pid\":\"ascl:1303.021\",\"score\":\"1\",\"source\":\"/proj/ads/references/resolved/PASP/0129/iss972.iop.xml.result:114\"}", ] logger.info('Using default json data for diagnose:\n\t%s', "\n\t".join(json)) input_filename = _build_diagnostics(json_payloads=json, bibcodes=bibcodes) # Process diagnostic data process(input_filename, force=False, diagnose=True)
def test_run(self): # This test modifies the public schema of the database, hence do not run it # if we detect that data exists to avoid affecting production by mistake citation_count = db.get_citation_count(self.app) citation_target_count = db.get_citation_target_count(self.app) if citation_count != 0 or citation_target_count != 0: pytest.skip( "Skipped because this test assumes an empty public schema but the database already contains {} citations and {} citations targets (this is a protection against modifying an already used database)" .format(citation_count, citation_target_count)) else: first_refids_filename = os.path.join( self.app.conf['PROJ_HOME'], "ADSCitationCapture/tests/data/sample-refids1.dat") os.utime( first_refids_filename, (0, 0)) # set the access and modified times to 19700101_000000 expected_citation_change_from_first_file = [ '\n\x132009arXiv0911.4940W\x12\x13...................\x18\x02"/http://github.com/b45ch1/hpsc_hanoi_2009_walter0\x02:\x00', '\n\x132010arXiv1003.5943M\x12\x13...................\x18\x02" http://github.com/matsen/pplacer0\x02:\x00', '\n\x132011arXiv1112.0312C\x12\x132012ascl.soft03003C\x18\x01"\rascl:1203.003(\x010\x02:\x00', '\n\x132013arXiv1310.5912S\x12\x132012ascl.soft.8004S\x18\x01"\x0eascl:1208.80040\x02:\x00', '\n\x132015ApJ...815L..10L\x12\x132015ascl.soft...10J\x18\x01"\rascl:1510.0100\x02:\x00', '\n\x132015arXiv151003579A\x12\x132014spi..book11020F"\x1410.5281/zenodo.110200\x02:\x00', '\n\x132015JCAP...08..043A\x12\x132014zndo.soft11020F"\x1410.5281/zenodo.11020(\x010\x02:\x00', '\n\x132015MNRAS.453..483K\x12\x13...................\x18\x01"\rascl:1208.0040\x02:\x00', '\n\x132016AJ....152..123G\x12\x13...................\x18\x01"\x0eascl:1208.00420\x02:\x00', '\n\x132019ApJ...877L..39C\x12\x13..................."\x1610.5281/zenodo.10491600\x02:\x00', '\n\x132019arXiv190105505T\x12\x13..................."\x1410.5281/zenodo.118130\x02:\x00', '\n\x132019arXiv190105855L\x12\x13..................."\x1410.5281/zenodo.118130\x02:\x00', ] second_refids_filename = os.path.join( self.app.conf['PROJ_HOME'], "ADSCitationCapture/tests/data/sample-refids2.dat") os.utime( second_refids_filename, (24 * 60 * 60, 24 * 60 * 60)) # set the access and modified times to 19700102_000000 expected_citation_change_from_second_file = [ '\n\x132015JCAP...08..043A\x12\x132014zndo.soft11020F"\x1410.5281/zenodo.110200\x03:\x04\x08\x80\xa3\x05', '\n\x132009arXiv0911.4940W\x12\x13...................\x18\x02"/http://github.com/b45ch1/hpsc_hanoi_2009_walter0\x02:\x04\x08\x80\xa3\x05', '\n\x132010arXiv1003.5943M\x12\x13...................\x18\x02" http://github.com/matsen/pplacer0\x02:\x04\x08\x80\xa3\x05', '\n\x132011arXiv1112.0312C\x12\x132012ascl.soft03003C\x18\x01"\rascl:1203.003(\x010\x02:\x04\x08\x80\xa3\x05', '\n\x132013arXiv1310.5912S\x12\x132012ascl.soft.8004S\x18\x01"\x0eascl:1208.80040\x02:\x04\x08\x80\xa3\x05', '\n\x132015ApJ...815L..10L\x12\x132015ascl.soft...10J\x18\x01"\rascl:1510.0100\x02:\x04\x08\x80\xa3\x05', '\n\x132015arXiv150902512A\x12\x132015vsr..conf27878D"\x1410.5281/zenodo.278780\x02:\x04\x08\x80\xa3\x05', '\n\x132015MNRAS.453..483K\x12\x13hola...............\x18\x01"\rascl:1208.004(\x010\x02:\x04\x08\x80\xa3\x05', '\n\x132016AJ....152..123G\x12\x13...................\x18\x01"\x0eascl:1208.00420\x02:\x04\x08\x80\xa3\x05', '\n\x132019arXiv190105855L\x12\x13..................."\x1410.5281/zenodo.118130\x01:\x04\x08\x80\xa3\x05', ] # Process first file i = 0 with TestBase.mock_multiple_targets({ 'task_process_citation_changes': patch.object(tasks.task_process_citation_changes, 'delay', wraps=tasks.task_process_citation_changes.delay), \ 'citation_already_exists': patch.object(db, 'citation_already_exists', wraps=db.citation_already_exists), \ 'get_citation_target_metadata': patch.object(db, 'get_citation_target_metadata', wraps=db.get_citation_target_metadata), \ 'get_citations_by_bibcode': patch.object(db, 'get_citations_by_bibcode', wraps=db.get_citations_by_bibcode), \ 'store_citation_target': patch.object(db, 'store_citation_target', wraps=db.store_citation_target), \ 'store_citation': patch.object(db, 'store_citation', wraps=db.store_citation), \ 'store_event': patch.object(db, 'store_event', wraps=db.store_event), \ 'update_citation': patch.object(db, 'update_citation', wraps=db.update_citation), \ 'mark_citation_as_deleted': patch.object(db, 'mark_citation_as_deleted', wraps=db.mark_citation_as_deleted), \ 'get_citations': patch.object(db, 'get_citations', wraps=db.get_citations), \ 'update_citation_target_metadata': patch.object(db, 'update_citation_target_metadata', wraps=db.update_citation_target_metadata), \ 'get_citation_target_count': patch.object(db, 'get_citation_target_count', wraps=db.get_citation_target_count), \ 'get_citation_count': patch.object(db, 'get_citation_count', wraps=db.get_citation_count), \ 'get_citation_targets_by_bibcode': patch.object(db, 'get_citation_targets_by_bibcode', wraps=db.get_citation_targets_by_bibcode), \ 'get_citation_targets_by_doi': patch.object(db, 'get_citation_targets_by_doi', wraps=db.get_citation_targets_by_doi), \ 'get_citation_targets': patch.object(db, 'get_citation_targets', wraps=db.get_citation_targets), \ 'get_canonical_bibcode': patch.object(api, 'get_canonical_bibcode', return_value=u"2015MNRAS.453..483K"), \ 'get_canonical_bibcodes': patch.object(api, 'get_canonical_bibcodes', return_value=[]), \ 'request_existing_citations': patch.object(api, 'request_existing_citations', return_value=[]), \ 'fetch_metadata': patch.object(doi, 'fetch_metadata', wraps=self._fetch_metadata), \ 'parse_metadata': patch.object(doi, 'parse_metadata', wraps=doi.parse_metadata), \ 'build_bibcode': patch.object(doi, 'build_bibcode', wraps=doi.build_bibcode), \ 'url_is_alive': patch.object(url, 'is_alive', return_value=True), \ 'is_url': patch.object(url, 'is_url', wraps=url.is_url), \ 'citation_change_to_event_data': patch.object(webhook, 'citation_change_to_event_data', wraps=webhook.citation_change_to_event_data), \ 'identical_bibcodes_event_data': patch.object(webhook, 'identical_bibcodes_event_data', wraps=webhook.identical_bibcodes_event_data), \ 'identical_bibcode_and_doi_event_data': patch.object(webhook, 'identical_bibcode_and_doi_event_data', wraps=webhook.identical_bibcode_and_doi_event_data), \ 'webhook_dump_event': patch.object(webhook, 'dump_event', return_value=True), \ 'webhook_emit_event': patch.object(webhook, 'emit_event', return_value=True), \ 'forward_message': patch.object(app.ADSCitationCaptureCelery, 'forward_message', return_value=True)}) as mocked: self.process(first_refids_filename, sqlalchemy_url=self.sqlalchemy_url, schema_prefix=self.schema_prefix) self.assertTrue(mocked['citation_already_exists'].called) self.assertTrue(mocked['get_citation_target_metadata'].called) self.assertTrue(mocked['fetch_metadata'].called) self.assertTrue(mocked['parse_metadata'].called) self.assertTrue(mocked['url_is_alive'].called) self.assertTrue(mocked['get_canonical_bibcode'].called) self.assertTrue(mocked['get_canonical_bibcodes'].called) self.assertTrue(mocked['get_citations_by_bibcode'].called) self.assertTrue(mocked['store_citation_target'].called) self.assertTrue(mocked['store_citation'].called) self.assertFalse(mocked['update_citation'].called) self.assertFalse(mocked['mark_citation_as_deleted'].called) self.assertTrue(mocked['get_citations'].called) self.assertTrue(mocked['forward_message'].called) self.assertFalse( mocked['update_citation_target_metadata'].called) self.assertFalse(mocked['get_citation_target_count'].called) self.assertFalse(mocked['get_citation_count'].called) self.assertFalse( mocked['get_citation_targets_by_bibcode'].called) self.assertFalse(mocked['get_citation_targets_by_doi'].called) self.assertFalse(mocked['get_citation_targets'].called) self.assertFalse(mocked['request_existing_citations'].called) self.assertTrue(mocked['build_bibcode'].called) self.assertFalse(mocked['is_url'].called) self.assertTrue(mocked['citation_change_to_event_data'].called) self.assertTrue(mocked['identical_bibcodes_event_data'].called) self.assertTrue( mocked['identical_bibcode_and_doi_event_data'].called) self.assertTrue(mocked['store_event'].called) self.assertTrue(mocked['webhook_dump_event'].called) self.assertTrue(mocked['webhook_emit_event'].called) for args in mocked[ 'task_process_citation_changes'].call_args_list: citation_changes = args[0][0] for citation_change in citation_changes.changes: #print citation_change.SerializeToString() self.assertEqual( citation_change.SerializeToString(), expected_citation_change_from_first_file[i]) i += 1 # Process second file i = 0 with TestBase.mock_multiple_targets({ 'task_process_citation_changes': patch.object(tasks.task_process_citation_changes, 'delay', wraps=tasks.task_process_citation_changes.delay), \ 'citation_already_exists': patch.object(db, 'citation_already_exists', wraps=db.citation_already_exists), \ 'get_citation_target_metadata': patch.object(db, 'get_citation_target_metadata', wraps=db.get_citation_target_metadata), \ 'get_citations_by_bibcode': patch.object(db, 'get_citations_by_bibcode', wraps=db.get_citations_by_bibcode), \ 'store_citation_target': patch.object(db, 'store_citation_target', wraps=db.store_citation_target), \ 'store_citation': patch.object(db, 'store_citation', wraps=db.store_citation), \ 'store_event': patch.object(db, 'store_event', wraps=db.store_event), \ 'update_citation': patch.object(db, 'update_citation', wraps=db.update_citation), \ 'mark_citation_as_deleted': patch.object(db, 'mark_citation_as_deleted', wraps=db.mark_citation_as_deleted), \ 'get_citations': patch.object(db, 'get_citations', wraps=db.get_citations), \ 'update_citation_target_metadata': patch.object(db, 'update_citation_target_metadata', wraps=db.update_citation_target_metadata), \ 'get_citation_target_count': patch.object(db, 'get_citation_target_count', wraps=db.get_citation_target_count), \ 'get_citation_count': patch.object(db, 'get_citation_count', wraps=db.get_citation_count), \ 'get_citation_targets_by_bibcode': patch.object(db, 'get_citation_targets_by_bibcode', wraps=db.get_citation_targets_by_bibcode), \ 'get_citation_targets_by_doi': patch.object(db, 'get_citation_targets_by_doi', wraps=db.get_citation_targets_by_doi), \ 'get_citation_targets': patch.object(db, 'get_citation_targets', wraps=db.get_citation_targets), \ 'get_canonical_bibcode': patch.object(api, 'get_canonical_bibcode', return_value=u"2015MNRAS.453..483K"), \ 'get_canonical_bibcodes': patch.object(api, 'get_canonical_bibcodes', return_value=[]), \ 'request_existing_citations': patch.object(api, 'request_existing_citations', return_value=[]), \ 'fetch_metadata': patch.object(doi, 'fetch_metadata', wraps=self._fetch_metadata), \ 'parse_metadata': patch.object(doi, 'parse_metadata', wraps=doi.parse_metadata), \ 'build_bibcode': patch.object(doi, 'build_bibcode', wraps=doi.build_bibcode), \ 'url_is_alive': patch.object(url, 'is_alive', return_value=True), \ 'is_url': patch.object(url, 'is_url', wraps=url.is_url), \ 'citation_change_to_event_data': patch.object(webhook, 'citation_change_to_event_data', wraps=webhook.citation_change_to_event_data), \ 'identical_bibcodes_event_data': patch.object(webhook, 'identical_bibcodes_event_data', wraps=webhook.identical_bibcodes_event_data), \ 'identical_bibcode_and_doi_event_data': patch.object(webhook, 'identical_bibcode_and_doi_event_data', wraps=webhook.identical_bibcode_and_doi_event_data), \ 'webhook_dump_event': patch.object(webhook, 'dump_event', return_value=True), \ 'webhook_emit_event': patch.object(webhook, 'emit_event', return_value=True), \ 'forward_message': patch.object(app.ADSCitationCaptureCelery, 'forward_message', return_value=True)}) as mocked: self.process(second_refids_filename, sqlalchemy_url=self.sqlalchemy_url, schema_prefix=self.schema_prefix) self.assertTrue(mocked['citation_already_exists'].called) self.assertTrue(mocked['get_citation_target_metadata'].called) self.assertTrue(mocked['fetch_metadata'].called) self.assertTrue(mocked['parse_metadata'].called) self.assertTrue(mocked['url_is_alive'].called) self.assertTrue(mocked['get_canonical_bibcode'].called) self.assertTrue(mocked['get_canonical_bibcodes'].called) self.assertTrue(mocked['get_citations_by_bibcode'].called) self.assertTrue(mocked['store_citation_target'].called) self.assertTrue(mocked['store_citation'].called) self.assertTrue(mocked['update_citation'].called) self.assertTrue(mocked['mark_citation_as_deleted'].called) self.assertTrue(mocked['get_citations'].called) self.assertTrue(mocked['forward_message'].called) self.assertFalse( mocked['update_citation_target_metadata'].called) self.assertFalse(mocked['get_citation_target_count'].called) self.assertFalse(mocked['get_citation_count'].called) self.assertFalse( mocked['get_citation_targets_by_bibcode'].called) self.assertFalse(mocked['get_citation_targets_by_doi'].called) self.assertFalse(mocked['get_citation_targets'].called) self.assertFalse(mocked['request_existing_citations'].called) self.assertTrue(mocked['build_bibcode'].called) self.assertFalse(mocked['is_url'].called) self.assertTrue(mocked['citation_change_to_event_data'].called) self.assertTrue(mocked['identical_bibcodes_event_data'].called) self.assertTrue( mocked['identical_bibcode_and_doi_event_data'].called) self.assertTrue(mocked['store_event'].called) self.assertTrue(mocked['webhook_dump_event'].called) self.assertTrue(mocked['webhook_emit_event'].called) for args in mocked[ 'task_process_citation_changes'].call_args_list: citation_changes = args[0][0] for citation_change in citation_changes.changes: #print citation_change.SerializeToString() self.assertEqual( citation_change.SerializeToString(), expected_citation_change_from_second_file[i]) i += 1