Example #1
0
def diagnose(bibcodes, json):
    citation_count = db.get_citation_count(tasks.app)
    citation_target_count = db.get_citation_target_count(tasks.app)
    if citation_count != 0 or citation_target_count != 0:
        logger.error(
            "Diagnose aborted because the database already contains %s citations and %s citations targets (this is a protection against modifying a database in use)",
            citation_count, citation_target_count)
    else:
        if not bibcodes:
            bibcodes = [
                "1005PhRvC..71c4906H", "1915PA.....23..189P",
                "2017PASP..129b4005R"
            ]
            logger.info('Using default bibcodes for diagnose:\n\t%s',
                        "\n\t".join(bibcodes))

        if not json:
            json = [
                "{\"cited\":\"1976NuPhB.113..395J\",\"citing\":\"1005PhRvC..71c4906H\",\"doi\":\"10.1016/0550-3213(76)90133-4\",\"score\":\"1\",\"source\":\"/proj/ads/references/resolved/PhRvC/0071/1005PhRvC..71c4906H.ref.xml.result:17\"}",
                "{\"cited\":\"...................\",\"citing\":\"2017SSEle.128..141M\",\"score\":\"0\",\"source\":\"/proj/ads/references/resolved/SSEle/0128/10.1016_j.sse.2016.10.029.xref.xml.result:10\",\"url\":\"https://github.com/viennats/viennats-dev\"}",
                "{\"cited\":\"2013ascl.soft03021B\",\"citing\":\"2017PASP..129b4005R\",\"pid\":\"ascl:1303.021\",\"score\":\"1\",\"source\":\"/proj/ads/references/resolved/PASP/0129/iss972.iop.xml.result:114\"}",
            ]
            logger.info('Using default json data for diagnose:\n\t%s',
                        "\n\t".join(json))

        input_filename = _build_diagnostics(json_payloads=json,
                                            bibcodes=bibcodes)

        # Process diagnostic data
        process(input_filename, force=False, diagnose=True)
    def test_run(self):
        # This test modifies the public schema of the database, hence do not run it
        # if we detect that data exists to avoid affecting production by mistake
        citation_count = db.get_citation_count(self.app)
        citation_target_count = db.get_citation_target_count(self.app)
        if citation_count != 0 or citation_target_count != 0:
            pytest.skip(
                "Skipped because this test assumes an empty public schema but the database already contains {} citations and {} citations targets (this is a protection against modifying an already used database)"
                .format(citation_count, citation_target_count))
        else:
            first_refids_filename = os.path.join(
                self.app.conf['PROJ_HOME'],
                "ADSCitationCapture/tests/data/sample-refids1.dat")
            os.utime(
                first_refids_filename,
                (0, 0))  # set the access and modified times to 19700101_000000
            expected_citation_change_from_first_file = [
                '\n\x132009arXiv0911.4940W\x12\x13...................\x18\x02"/http://github.com/b45ch1/hpsc_hanoi_2009_walter0\x02:\x00',
                '\n\x132010arXiv1003.5943M\x12\x13...................\x18\x02" http://github.com/matsen/pplacer0\x02:\x00',
                '\n\x132011arXiv1112.0312C\x12\x132012ascl.soft03003C\x18\x01"\rascl:1203.003(\x010\x02:\x00',
                '\n\x132013arXiv1310.5912S\x12\x132012ascl.soft.8004S\x18\x01"\x0eascl:1208.80040\x02:\x00',
                '\n\x132015ApJ...815L..10L\x12\x132015ascl.soft...10J\x18\x01"\rascl:1510.0100\x02:\x00',
                '\n\x132015arXiv151003579A\x12\x132014spi..book11020F"\x1410.5281/zenodo.110200\x02:\x00',
                '\n\x132015JCAP...08..043A\x12\x132014zndo.soft11020F"\x1410.5281/zenodo.11020(\x010\x02:\x00',
                '\n\x132015MNRAS.453..483K\x12\x13...................\x18\x01"\rascl:1208.0040\x02:\x00',
                '\n\x132016AJ....152..123G\x12\x13...................\x18\x01"\x0eascl:1208.00420\x02:\x00',
                '\n\x132019ApJ...877L..39C\x12\x13..................."\x1610.5281/zenodo.10491600\x02:\x00',
                '\n\x132019arXiv190105505T\x12\x13..................."\x1410.5281/zenodo.118130\x02:\x00',
                '\n\x132019arXiv190105855L\x12\x13..................."\x1410.5281/zenodo.118130\x02:\x00',
            ]
            second_refids_filename = os.path.join(
                self.app.conf['PROJ_HOME'],
                "ADSCitationCapture/tests/data/sample-refids2.dat")
            os.utime(
                second_refids_filename,
                (24 * 60 * 60, 24 * 60 *
                 60))  # set the access and modified times to 19700102_000000
            expected_citation_change_from_second_file = [
                '\n\x132015JCAP...08..043A\x12\x132014zndo.soft11020F"\x1410.5281/zenodo.110200\x03:\x04\x08\x80\xa3\x05',
                '\n\x132009arXiv0911.4940W\x12\x13...................\x18\x02"/http://github.com/b45ch1/hpsc_hanoi_2009_walter0\x02:\x04\x08\x80\xa3\x05',
                '\n\x132010arXiv1003.5943M\x12\x13...................\x18\x02" http://github.com/matsen/pplacer0\x02:\x04\x08\x80\xa3\x05',
                '\n\x132011arXiv1112.0312C\x12\x132012ascl.soft03003C\x18\x01"\rascl:1203.003(\x010\x02:\x04\x08\x80\xa3\x05',
                '\n\x132013arXiv1310.5912S\x12\x132012ascl.soft.8004S\x18\x01"\x0eascl:1208.80040\x02:\x04\x08\x80\xa3\x05',
                '\n\x132015ApJ...815L..10L\x12\x132015ascl.soft...10J\x18\x01"\rascl:1510.0100\x02:\x04\x08\x80\xa3\x05',
                '\n\x132015arXiv150902512A\x12\x132015vsr..conf27878D"\x1410.5281/zenodo.278780\x02:\x04\x08\x80\xa3\x05',
                '\n\x132015MNRAS.453..483K\x12\x13hola...............\x18\x01"\rascl:1208.004(\x010\x02:\x04\x08\x80\xa3\x05',
                '\n\x132016AJ....152..123G\x12\x13...................\x18\x01"\x0eascl:1208.00420\x02:\x04\x08\x80\xa3\x05',
                '\n\x132019arXiv190105855L\x12\x13..................."\x1410.5281/zenodo.118130\x01:\x04\x08\x80\xa3\x05',
            ]

            # Process first file
            i = 0
            with TestBase.mock_multiple_targets({
                    'task_process_citation_changes': patch.object(tasks.task_process_citation_changes, 'delay', wraps=tasks.task_process_citation_changes.delay), \
                    'citation_already_exists': patch.object(db, 'citation_already_exists', wraps=db.citation_already_exists), \
                    'get_citation_target_metadata': patch.object(db, 'get_citation_target_metadata', wraps=db.get_citation_target_metadata), \
                    'get_citations_by_bibcode': patch.object(db, 'get_citations_by_bibcode', wraps=db.get_citations_by_bibcode), \
                    'store_citation_target': patch.object(db, 'store_citation_target', wraps=db.store_citation_target), \
                    'store_citation': patch.object(db, 'store_citation', wraps=db.store_citation), \
                    'store_event': patch.object(db, 'store_event', wraps=db.store_event), \
                    'update_citation': patch.object(db, 'update_citation', wraps=db.update_citation), \
                    'mark_citation_as_deleted': patch.object(db, 'mark_citation_as_deleted', wraps=db.mark_citation_as_deleted), \
                    'get_citations': patch.object(db, 'get_citations', wraps=db.get_citations), \
                    'update_citation_target_metadata': patch.object(db, 'update_citation_target_metadata', wraps=db.update_citation_target_metadata), \
                    'get_citation_target_count': patch.object(db, 'get_citation_target_count', wraps=db.get_citation_target_count), \
                    'get_citation_count': patch.object(db, 'get_citation_count', wraps=db.get_citation_count), \
                    'get_citation_targets_by_bibcode': patch.object(db, 'get_citation_targets_by_bibcode', wraps=db.get_citation_targets_by_bibcode), \
                    'get_citation_targets_by_doi': patch.object(db, 'get_citation_targets_by_doi', wraps=db.get_citation_targets_by_doi), \
                    'get_citation_targets': patch.object(db, 'get_citation_targets', wraps=db.get_citation_targets), \
                    'get_canonical_bibcode': patch.object(api, 'get_canonical_bibcode', return_value=u"2015MNRAS.453..483K"), \
                    'get_canonical_bibcodes': patch.object(api, 'get_canonical_bibcodes', return_value=[]), \
                    'request_existing_citations': patch.object(api, 'request_existing_citations', return_value=[]), \
                    'fetch_metadata': patch.object(doi, 'fetch_metadata', wraps=self._fetch_metadata), \
                    'parse_metadata': patch.object(doi, 'parse_metadata', wraps=doi.parse_metadata), \
                    'build_bibcode': patch.object(doi, 'build_bibcode', wraps=doi.build_bibcode), \
                    'url_is_alive': patch.object(url, 'is_alive', return_value=True), \
                    'is_url': patch.object(url, 'is_url', wraps=url.is_url), \
                    'citation_change_to_event_data': patch.object(webhook, 'citation_change_to_event_data', wraps=webhook.citation_change_to_event_data), \
                    'identical_bibcodes_event_data': patch.object(webhook, 'identical_bibcodes_event_data', wraps=webhook.identical_bibcodes_event_data), \
                    'identical_bibcode_and_doi_event_data': patch.object(webhook, 'identical_bibcode_and_doi_event_data', wraps=webhook.identical_bibcode_and_doi_event_data), \
                    'webhook_dump_event': patch.object(webhook, 'dump_event', return_value=True), \
                    'webhook_emit_event': patch.object(webhook, 'emit_event', return_value=True), \
                    'forward_message': patch.object(app.ADSCitationCaptureCelery, 'forward_message', return_value=True)}) as mocked:
                self.process(first_refids_filename,
                             sqlalchemy_url=self.sqlalchemy_url,
                             schema_prefix=self.schema_prefix)
                self.assertTrue(mocked['citation_already_exists'].called)
                self.assertTrue(mocked['get_citation_target_metadata'].called)
                self.assertTrue(mocked['fetch_metadata'].called)
                self.assertTrue(mocked['parse_metadata'].called)
                self.assertTrue(mocked['url_is_alive'].called)
                self.assertTrue(mocked['get_canonical_bibcode'].called)
                self.assertTrue(mocked['get_canonical_bibcodes'].called)
                self.assertTrue(mocked['get_citations_by_bibcode'].called)
                self.assertTrue(mocked['store_citation_target'].called)
                self.assertTrue(mocked['store_citation'].called)
                self.assertFalse(mocked['update_citation'].called)
                self.assertFalse(mocked['mark_citation_as_deleted'].called)
                self.assertTrue(mocked['get_citations'].called)
                self.assertTrue(mocked['forward_message'].called)
                self.assertFalse(
                    mocked['update_citation_target_metadata'].called)
                self.assertFalse(mocked['get_citation_target_count'].called)
                self.assertFalse(mocked['get_citation_count'].called)
                self.assertFalse(
                    mocked['get_citation_targets_by_bibcode'].called)
                self.assertFalse(mocked['get_citation_targets_by_doi'].called)
                self.assertFalse(mocked['get_citation_targets'].called)
                self.assertFalse(mocked['request_existing_citations'].called)
                self.assertTrue(mocked['build_bibcode'].called)
                self.assertFalse(mocked['is_url'].called)
                self.assertTrue(mocked['citation_change_to_event_data'].called)
                self.assertTrue(mocked['identical_bibcodes_event_data'].called)
                self.assertTrue(
                    mocked['identical_bibcode_and_doi_event_data'].called)
                self.assertTrue(mocked['store_event'].called)
                self.assertTrue(mocked['webhook_dump_event'].called)
                self.assertTrue(mocked['webhook_emit_event'].called)

                for args in mocked[
                        'task_process_citation_changes'].call_args_list:
                    citation_changes = args[0][0]
                    for citation_change in citation_changes.changes:
                        #print citation_change.SerializeToString()
                        self.assertEqual(
                            citation_change.SerializeToString(),
                            expected_citation_change_from_first_file[i])
                        i += 1

            # Process second file
            i = 0
            with TestBase.mock_multiple_targets({
                    'task_process_citation_changes': patch.object(tasks.task_process_citation_changes, 'delay', wraps=tasks.task_process_citation_changes.delay), \
                    'citation_already_exists': patch.object(db, 'citation_already_exists', wraps=db.citation_already_exists), \
                    'get_citation_target_metadata': patch.object(db, 'get_citation_target_metadata', wraps=db.get_citation_target_metadata), \
                    'get_citations_by_bibcode': patch.object(db, 'get_citations_by_bibcode', wraps=db.get_citations_by_bibcode), \
                    'store_citation_target': patch.object(db, 'store_citation_target', wraps=db.store_citation_target), \
                    'store_citation': patch.object(db, 'store_citation', wraps=db.store_citation), \
                    'store_event': patch.object(db, 'store_event', wraps=db.store_event), \
                    'update_citation': patch.object(db, 'update_citation', wraps=db.update_citation), \
                    'mark_citation_as_deleted': patch.object(db, 'mark_citation_as_deleted', wraps=db.mark_citation_as_deleted), \
                    'get_citations': patch.object(db, 'get_citations', wraps=db.get_citations), \
                    'update_citation_target_metadata': patch.object(db, 'update_citation_target_metadata', wraps=db.update_citation_target_metadata), \
                    'get_citation_target_count': patch.object(db, 'get_citation_target_count', wraps=db.get_citation_target_count), \
                    'get_citation_count': patch.object(db, 'get_citation_count', wraps=db.get_citation_count), \
                    'get_citation_targets_by_bibcode': patch.object(db, 'get_citation_targets_by_bibcode', wraps=db.get_citation_targets_by_bibcode), \
                    'get_citation_targets_by_doi': patch.object(db, 'get_citation_targets_by_doi', wraps=db.get_citation_targets_by_doi), \
                    'get_citation_targets': patch.object(db, 'get_citation_targets', wraps=db.get_citation_targets), \
                    'get_canonical_bibcode': patch.object(api, 'get_canonical_bibcode', return_value=u"2015MNRAS.453..483K"), \
                    'get_canonical_bibcodes': patch.object(api, 'get_canonical_bibcodes', return_value=[]), \
                    'request_existing_citations': patch.object(api, 'request_existing_citations', return_value=[]), \
                    'fetch_metadata': patch.object(doi, 'fetch_metadata', wraps=self._fetch_metadata), \
                    'parse_metadata': patch.object(doi, 'parse_metadata', wraps=doi.parse_metadata), \
                    'build_bibcode': patch.object(doi, 'build_bibcode', wraps=doi.build_bibcode), \
                    'url_is_alive': patch.object(url, 'is_alive', return_value=True), \
                    'is_url': patch.object(url, 'is_url', wraps=url.is_url), \
                    'citation_change_to_event_data': patch.object(webhook, 'citation_change_to_event_data', wraps=webhook.citation_change_to_event_data), \
                    'identical_bibcodes_event_data': patch.object(webhook, 'identical_bibcodes_event_data', wraps=webhook.identical_bibcodes_event_data), \
                    'identical_bibcode_and_doi_event_data': patch.object(webhook, 'identical_bibcode_and_doi_event_data', wraps=webhook.identical_bibcode_and_doi_event_data), \
                    'webhook_dump_event': patch.object(webhook, 'dump_event', return_value=True), \
                    'webhook_emit_event': patch.object(webhook, 'emit_event', return_value=True), \
                    'forward_message': patch.object(app.ADSCitationCaptureCelery, 'forward_message', return_value=True)}) as mocked:
                self.process(second_refids_filename,
                             sqlalchemy_url=self.sqlalchemy_url,
                             schema_prefix=self.schema_prefix)
                self.assertTrue(mocked['citation_already_exists'].called)
                self.assertTrue(mocked['get_citation_target_metadata'].called)
                self.assertTrue(mocked['fetch_metadata'].called)
                self.assertTrue(mocked['parse_metadata'].called)
                self.assertTrue(mocked['url_is_alive'].called)
                self.assertTrue(mocked['get_canonical_bibcode'].called)
                self.assertTrue(mocked['get_canonical_bibcodes'].called)
                self.assertTrue(mocked['get_citations_by_bibcode'].called)
                self.assertTrue(mocked['store_citation_target'].called)
                self.assertTrue(mocked['store_citation'].called)
                self.assertTrue(mocked['update_citation'].called)
                self.assertTrue(mocked['mark_citation_as_deleted'].called)
                self.assertTrue(mocked['get_citations'].called)
                self.assertTrue(mocked['forward_message'].called)
                self.assertFalse(
                    mocked['update_citation_target_metadata'].called)
                self.assertFalse(mocked['get_citation_target_count'].called)
                self.assertFalse(mocked['get_citation_count'].called)
                self.assertFalse(
                    mocked['get_citation_targets_by_bibcode'].called)
                self.assertFalse(mocked['get_citation_targets_by_doi'].called)
                self.assertFalse(mocked['get_citation_targets'].called)
                self.assertFalse(mocked['request_existing_citations'].called)
                self.assertTrue(mocked['build_bibcode'].called)
                self.assertFalse(mocked['is_url'].called)
                self.assertTrue(mocked['citation_change_to_event_data'].called)
                self.assertTrue(mocked['identical_bibcodes_event_data'].called)
                self.assertTrue(
                    mocked['identical_bibcode_and_doi_event_data'].called)
                self.assertTrue(mocked['store_event'].called)
                self.assertTrue(mocked['webhook_dump_event'].called)
                self.assertTrue(mocked['webhook_emit_event'].called)

                for args in mocked[
                        'task_process_citation_changes'].call_args_list:
                    citation_changes = args[0][0]
                    for citation_change in citation_changes.changes:
                        #print citation_change.SerializeToString()
                        self.assertEqual(
                            citation_change.SerializeToString(),
                            expected_citation_change_from_second_file[i])
                        i += 1