def test_ignore_checksums_metrics(self):
     """verify ingore_checksums works with datalinks updates"""
     self._reset_checksum('metricstest')  # put bibcode in database
     r = Mock()
     r.return_value = (['metricstest'], None)
     with patch.object(self.app, 'get_record', return_value={'bibcode': 'metricstest',
                                                             'bib_data_updated': get_date(),
                                                             'metrics': {'refereed': False, 'author_num': 2},
                                                             'processed': get_date('2025'),
                                                             'metrics_checksum': '0x424cb03e'}), \
             patch.object(self.app, 'update_metrics_db', return_value = (['metricstest'], None)) as u:
         # update with matching checksum and then update and ignore checksums
         tasks.task_index_records(['metricstest'],
                                  update_solr=False,
                                  update_metrics=True,
                                  update_links=False,
                                  force=True,
                                  ignore_checksums=False)
         self.assertEquals(u.call_count, 0)
         tasks.task_index_records(['metricstest'],
                                  update_solr=False,
                                  update_metrics=True,
                                  update_links=False,
                                  force=True,
                                  ignore_checksums=True)
         self.assertEquals(u.call_count, 1)
    def test_ignore_checksums_solr(self):
        """verify ingore_checksums works with solr updates"""
        self._reset_checksum('foo')  # put bibcode in database
        with patch.object(self.app, 'get_record') as getter, \
            patch.object(self.app, 'update_processed_timestamp', return_value=None) as update_timestamp,\
            patch('adsmp.solr_updater.update_solr', return_value=[200]) as update_solr:
            getter.return_value = {
                'bibcode': 'foo',
                'metrics_updated': get_date('1972-04-02'),
                'bib_data_updated': get_date('1972-04-01'),
                'solr_checksum': '0xf2708ee8'
            }

            # update with matching checksum and then update and ignore checksums
            tasks.task_index_records(['foo'],
                                     force=True,
                                     update_metrics=False,
                                     update_links=False,
                                     ignore_checksums=False)
            self.assertEquals(update_solr.call_count, 0)
            tasks.task_index_records(['foo'],
                                     force=True,
                                     update_metrics=False,
                                     update_links=False,
                                     ignore_checksums=True)
            self.assertEquals(update_solr.call_count, 1)
 def test_ignore_checksums_datalinks(self):
     """verify ingore_checksums works with datalinks updates"""
     self._reset_checksum('linkstest')  # put bibcode in database
     r = Mock()
     r.status_code = 200
     with patch.object(self.app, 'get_record', return_value={'bibcode': 'linkstest',
                                                             'nonbib_data': {'data_links_rows': [{'baz': 0}]},
                                                             'bib_data_updated': get_date(),
                                                             'nonbib_data_updated': get_date(),
                                                             'processed': get_date('2025'),
                                                             'links_checksum': '0x80e85169'}), \
                                                     patch('requests.put', return_value = r, new_callable=CopyingMock) as p:
         # update with matching checksum and then update and ignore checksums
         tasks.task_index_records(['linkstest'],
                                  update_solr=False,
                                  update_metrics=False,
                                  update_links=True,
                                  force=True,
                                  ignore_checksums=False)
         self.assertEquals(p.call_count, 0)
         tasks.task_index_records(['linkstest'],
                                  update_solr=False,
                                  update_metrics=False,
                                  update_links=True,
                                  force=True,
                                  ignore_checksums=True)
         self.assertEquals(p.call_count, 1)
    def test_task_index_links(self):
        """verify data is sent to links microservice update endpoint"""
        r = Mock()
        r.status_code = 200

        # just make sure we have the entry in a database
        tasks.task_update_record(DenormalizedRecord(bibcode='linkstest'))

        with patch.object(self.app, 'get_record', return_value={'bibcode': 'linkstest',
                                                                'nonbib_data': {'data_links_rows': [{'baz': 0}]},
                                                                'bib_data_updated': get_date(),
                                                                'nonbib_data_updated': get_date(),
                                                                'processed': get_date('2025')}), \
             patch('requests.put', return_value = r, new_callable=CopyingMock) as p:
            tasks.task_index_records(['linkstest'],
                                     update_solr=False,
                                     update_metrics=False,
                                     update_links=True,
                                     force=True)
            p.assert_called_with('http://localhost:8080/update',
                                 data=json.dumps([{
                                     'bibcode':
                                     'linkstest',
                                     'data_links_rows': [{
                                         'baz': 0
                                     }]
                                 }]),
                                 headers={'Authorization': 'Bearer api_token'})

        rec = self.app.get_record(bibcode='linkstest')
        self.assertEquals(rec['datalinks_checksum'], '0x80e85169')
        self.assertEquals(rec['solr_checksum'], None)
        self.assertEquals(rec['metrics_checksum'], None)
    def test_avoid_duplicates(self):

        # just make sure we have the entry in a database
        self._reset_checksum('foo')
        self._reset_checksum('bar')


        with patch.object(self.app, 'get_record') as getter, \
            patch.object(self.app, 'update_processed_timestamp', return_value=None) as update_timestamp,\
            patch('adsmp.solr_updater.update_solr', return_value=[200]) as update_solr:

            getter.return_value = {
                'bibcode': 'foo',
                'bib_data_updated': get_date('1972-04-01')
            }
            tasks.task_index_records(['foo'], force=True)

            self.assertEquals(update_solr.call_count, 1)
            self._check_checksum('foo', solr='0xf2708ee8')

            # now change metrics (solr shouldn't be called)
            getter.return_value = {
                'bibcode': 'foo',
                'metrics_updated': get_date('1972-04-02'),
                'bib_data_updated': get_date('1972-04-01'),
                'solr_checksum': '0xf2708ee8'
            }
            tasks.task_index_records(['foo'], force=True)
            self.assertEquals(update_solr.call_count, 1)
    def test_task_index_records(self):
        self.assertRaises(
            Exception, lambda: tasks.task_index_records(
                ['foo', 'bar'], update_solr=False, update_metrics=False))

        with patch.object(tasks.logger, 'error', return_value=None) as logger:
            tasks.task_index_records(['non-existent'])
            logger.assert_called_with(u"The bibcode %s doesn't exist!",
                                      'non-existent')
 def test_index_metrics_no_data(self):
     """verify indexing works where there is no metrics data"""
     with patch.object(self.app, 'get_record', return_value={'bibcode': 'noMetrics',
                                                             'nonbib_data': {'boost': 1.2},
                                                             'bib_data_updated': get_date(),
                                                             'nonbib_data_updated': get_date(),
                                                             'processed': get_date('2025')}), \
                     patch('adsmp.app.ADSMasterPipelineCelery.update_remote_targets', new_callable=CopyingMock) as u:
         tasks.task_index_records(['noMetrics'], ignore_checksums=True)
         u.assert_not_called()
Beispiel #8
0
 def test_index_metrics_no_data(self):
     """verify indexing works where there is no metrics data"""
     n = datetime.now()
     future_year = n.year + 1
     with patch.object(self.app, 'get_record', return_value={'bibcode': 'noMetrics',
                                                             'nonbib_data': {'boost': 1.2},
                                                             'bib_data_updated': get_date(),
                                                             'nonbib_data_updated': get_date(),
                                                             'processed': get_date(str(future_year))}), \
          patch('adsmp.tasks.task_index_metrics.apply_async', wraps=unwind_task_index_metrics_apply_async) as x:
         tasks.task_index_records(['noMetrics'], ignore_checksums=True)
         x.assert_not_called()
Beispiel #9
0
 def test_task_index_links_no_data(self):
     """verify data links works when no data_links_rows is present"""
     n = datetime.now()
     future_year = n.year + 1
     with patch.object(self.app, 'get_record', return_value={'bibcode': 'linkstest',
                                                             'nonbib_data': {'boost': 1.2},
                                                             'bib_data_updated': get_date(),
                                                             'nonbib_data_updated': get_date(),
                                                             'processed': get_date(str(future_year))}), \
          patch('adsmp.tasks.task_index_data_links_resolver.apply_async', wraps=unwind_task_index_data_links_resolver_apply_async), \
          patch('requests.put', new_callable=CopyingMock) as p:
         tasks.task_index_records(['linkstest'], update_solr=False, update_metrics=False, update_links=True, force=True)
         p.assert_not_called()
 def test_task_index_links_no_data(self):
     """verify data links works when no data_links_rows is present"""
     with patch.object(self.app, 'get_record', return_value={'bibcode': 'linkstest',
                                                             'nonbib_data': {'boost': 1.2},
                                                             'bib_data_updated': get_date(),
                                                             'nonbib_data_updated': get_date(),
                                                             'processed': get_date('2025')}), \
                      patch('requests.put', new_callable=CopyingMock) as p:
         tasks.task_index_records(['linkstest'],
                                  update_solr=False,
                                  update_metrics=False,
                                  update_links=True,
                                  force=True)
         p.assert_not_called()
Beispiel #11
0
 def test_ignore_checksums_metrics(self):
     """verify ingore_checksums works with metrics updates"""
     self._reset_checksum('metricstest')  # put bibcode in database
     r = Mock()
     r.return_value = (['metricstest'], None)
     n = datetime.now()
     future_year = n.year + 1
     with patch.object(self.app, 'get_record', return_value={'bibcode': 'metricstest',
                                                             'bib_data_updated': get_date(),
                                                             'metrics': {'refereed': False, 'author_num': 2},
                                                             'processed': get_date(str(future_year)),
                                                             'metrics_checksum': '0x424cb03e'}), \
             patch('adsmp.tasks.task_index_metrics.apply_async', wraps=unwind_task_index_metrics_apply_async), \
             patch.object(self.app, 'index_metrics', return_value = (['metricstest'], None)) as u:
         # update with matching checksum and then update and ignore checksums
         tasks.task_index_records(['metricstest'], update_solr=False, update_metrics=True, update_links=False, force=True,
                                  ignore_checksums=False)
         self.assertEqual(u.call_count, 0)
         tasks.task_index_records(['metricstest'], update_solr=False, update_metrics=True, update_links=False, force=True,
                                  ignore_checksums=True)
         self.assertEqual(u.call_count, 1)
    def test_task_update_solr(self):
        # just make sure we have the entry in a database
        self._reset_checksum('foobar')

        with patch.object(self.app, 'mark_processed', return_value=None) as update_timestamp,\
            patch('adsmp.solr_updater.update_solr', return_value=[200]) as update_solr, \
            patch.object(self.app, 'get_record', return_value={'bibcode': 'foobar',
                                                               'bib_data_updated': get_date(),
                                                               'nonbib_data_updated': get_date(),
                                                               'orcid_claims_updated': get_date(),
                                                               'processed': get_date('2012'),}), \
            patch('adsmp.tasks.task_index_records.apply_async', return_value=None) as task_index_records:

            self.assertFalse(update_solr.called)
            tasks.task_index_records('2015ApJ...815..133S')
            self.assertTrue(update_solr.called)
            self.assertTrue(update_timestamp.called)

        self._check_checksum('foobar', solr=True)
        self._reset_checksum('foobar')

        with patch.object(self.app, 'update_processed_timestamp', return_value=None) as update_timestamp,\
            patch('adsmp.solr_updater.update_solr', return_value=[200]) as update_solr, \
            patch.object(self.app, 'get_record', return_value={'bibcode': 'foobar',
                                                               'bib_data_updated': get_date(),
                                                               'nonbib_data_updated': get_date(),
                                                               'orcid_claims_updated': get_date(),
                                                               'processed': get_date('2025'),}), \
            patch('adsmp.tasks.task_index_records.apply_async', return_value=None) as task_index_records:

            self.assertFalse(update_solr.called)
            tasks.task_index_records('2015ApJ...815..133S')
            self.assertFalse(update_solr.called)
            self.assertFalse(update_timestamp.called)

        self._check_checksum('foobar', solr=None)
        self._reset_checksum('foobar')



        with patch.object(self.app, 'mark_processed', return_value=None) as update_timestamp,\
            patch('adsmp.solr_updater.update_solr', return_value=[200]) as update_solr, \
            patch.object(self.app, 'get_record', return_value={'bibcode': 'foobar',
                                                               'bib_data_updated': get_date(),
                                                               'nonbib_data_updated': get_date(),
                                                               'orcid_claims_updated': get_date(),
                                                               'processed': get_date('2025'),}), \
            patch('adsmp.tasks.task_index_records.apply_async', return_value=None) as task_index_records:

            self.assertFalse(update_solr.called)
            tasks.task_index_records('2015ApJ...815..133S', force=True)
            self.assertTrue(update_solr.called)
            self.assertTrue(update_timestamp.called)

        self._check_checksum('foobar', solr=True)
        self._reset_checksum('foobar')



        with patch.object(self.app, 'update_processed_timestamp', return_value=None) as update_timestamp,\
            patch('adsmp.solr_updater.update_solr', return_value=None) as update_solr, \
            patch.object(self.app, 'get_record', return_value={'bibcode': 'foobar',
                                                               'bib_data_updated': None,
                                                               'nonbib_data_updated': get_date(),
                                                               'orcid_claims_updated': get_date(),
                                                               'processed': None,}), \
            patch('adsmp.tasks.task_index_records.apply_async', return_value=None) as task_index_records:

            self.assertFalse(update_solr.called)
            tasks.task_index_records('2015ApJ...815..133S')
            self.assertFalse(update_solr.called)
            self.assertFalse(update_timestamp.called)

        self._check_checksum('foobar', solr=None)
        self._reset_checksum('foobar')


        with patch.object(self.app, 'mark_processed', return_value=None) as update_timestamp,\
            patch('adsmp.solr_updater.update_solr', return_value=[200]) as update_solr, \
            patch.object(self.app, 'get_record', return_value={'bibcode': 'foobar',
                                                               'bib_data_updated': get_date(),
                                                               'nonbib_data_updated': None,
                                                               'orcid_claims_updated': get_date(),
                                                               'processed': None,}), \
            patch('adsmp.tasks.task_index_records.apply_async', return_value=None) as task_index_records:

            self.assertFalse(update_solr.called)
            tasks.task_index_records('2015ApJ...815..133S', force=True)
            self.assertTrue(update_solr.called)
            self.assertTrue(update_timestamp.called)
            self.assertFalse(task_index_records.called)



        with patch.object(self.app, 'update_processed_timestamp', return_value=None) as update_timestamp,\
            patch('adsmp.solr_updater.update_solr', return_value=[200]) as update_solr, \
            patch.object(self.app, 'get_record', return_value={'bibcode': 'foobar',
                                                               'bib_data_updated': None,
                                                               'nonbib_data_updated': None,
                                                               'orcid_claims_updated': None,
                                                               'fulltext_claims_updated': get_date(),
                                                               'processed': None,}), \
            patch('adsmp.tasks.task_index_records.apply_async', return_value=None) as task_index_records:

            self.assertFalse(update_solr.called)
            tasks.task_index_records('2015ApJ...815..133S')
            self.assertFalse(update_solr.called)
            self.assertFalse(update_timestamp.called)
Beispiel #13
0
    def test_task_update_solr(self):
        # just make sure we have the entry in a database
        self._reset_checksum('foobar')
        
        with patch.object(self.app, 'mark_processed', return_value=None) as mp,\
            patch('adsmp.solr_updater.update_solr', return_value=[200]) as update_solr, \
            patch('adsmp.tasks.task_index_solr.apply_async', wraps=unwind_task_index_solr_apply_async), \
            patch.object(self.app, 'get_record', return_value={'bibcode': 'foobar',
                                                               'augments_updated': get_date(),
                                                               'bib_data': {},
                                                               'metrics': {},
                                                               'bib_data_updated': get_date(),
                                                               'nonbib_data_updated': get_date(),
                                                               'orcid_claims_updated': get_date(),
                                                               'processed': get_date('2012')}), \
            patch('adsmp.tasks.task_index_records.apply_async', return_value=None) as task_index_records:

            self.assertFalse(update_solr.called)
            tasks.task_index_records('2015ApJ...815..133S')
            self.assertTrue(update_solr.called)
            self.assertTrue(mp.called)

        # self._check_checksum('foobar', solr=True)
        self._reset_checksum('foobar')

        n = datetime.now()
        future_year = n.year + 1
        with patch('adsmp.solr_updater.update_solr', return_value=[200]) as update_solr, \
             patch('adsmp.tasks.task_index_solr.apply_async', wraps=unwind_task_index_solr_apply_async), \
             patch.object(self.app, 'get_record', return_value={'bibcode': 'foobar',
                                                                'augments_updated': get_date(),
                                                                'bib_data_updated': get_date(),
                                                                'nonbib_data_updated': get_date(),
                                                                'orcid_claims_updated': get_date(),
                                                                'processed': get_date(str(future_year))}), \
            patch('adsmp.tasks.task_index_records.apply_async', return_value=None) as task_index_records:

            self.assertFalse(update_solr.called)
            tasks.task_index_records('2015ApJ...815..133S')
            self.assertFalse(update_solr.called)

            
        self._check_checksum('foobar', solr=None)
        self._reset_checksum('foobar')

        with patch.object(self.app, 'mark_processed', return_value=None) as mp,\
            patch('adsmp.solr_updater.update_solr', return_value=[200]) as update_solr, \
            patch('adsmp.tasks.task_index_solr.apply_async', wraps=unwind_task_index_solr_apply_async), \
            patch.object(self.app, 'get_record', return_value={'bibcode': 'foobar',
                                                               'augments_updated': get_date(),
                                                               'bib_data_updated': get_date(),
                                                               'bib_data': {},
                                                               'metrics': {},
                                                               'nonbib_data_updated': get_date(),
                                                               'orcid_claims_updated': get_date(),
                                                               'processed': get_date(str(future_year))}), \
            patch('adsmp.tasks.task_index_records.apply_async', return_value=None) as task_index_records:

            self.assertFalse(update_solr.called)
            tasks.task_index_records('2015ApJ...815..133S', force=True)
            self.assertTrue(update_solr.called)
            self.assertTrue(mp.called)
            
        # self._check_checksum('foobar', solr=True)
        self._reset_checksum('foobar')

        with patch('adsmp.solr_updater.update_solr', return_value=None) as update_solr, \
            patch('adsmp.tasks.task_index_solr.apply_async', wraps=unwind_task_index_solr_apply_async), \
            patch.object(self.app, 'get_record', return_value={'bibcode': 'foobar',
                                                               'augments_updated': get_date(),
                                                               'bib_data_updated': None,
                                                               'nonbib_data_updated': get_date(),
                                                               'orcid_claims_updated': get_date(),
                                                               'processed': None}), \
            patch('adsmp.tasks.task_index_records.apply_async', return_value=None) as task_index_records:

            self.assertFalse(update_solr.called)
            tasks.task_index_records('2015ApJ...815..133S')
            self.assertFalse(update_solr.called)
        
        self._check_checksum('foobar', solr=None)
        self._reset_checksum('foobar')

        with patch.object(self.app, 'mark_processed', return_value=None) as mp,\
            patch('adsmp.solr_updater.update_solr', return_value=[200]) as update_solr, \
            patch('adsmp.tasks.task_index_solr.apply_async', wraps=unwind_task_index_solr_apply_async), \
            patch.object(self.app, 'get_record', return_value={'bibcode': 'foobar',
                                                               'augments_updated': get_date(),
                                                               'bib_data_updated': get_date(),
                                                               'bib_data': {},
                                                               'metrics': {},
                                                               'nonbib_data_updated': None,
                                                               'orcid_claims_updated': get_date(),
                                                               'processed': None}), \
            patch('adsmp.tasks.task_index_records.apply_async', return_value=None) as task_index_records:

            self.assertFalse(update_solr.called)
            tasks.task_index_records('2015ApJ...815..133S', force=True)
            self.assertTrue(update_solr.called)
            self.assertTrue(mp.called)
            self.assertFalse(task_index_records.called)
            
        with patch('adsmp.solr_updater.update_solr', return_value=[200]) as update_solr, \
            patch('adsmp.tasks.task_index_solr.apply_async', wraps=unwind_task_index_solr_apply_async), \
            patch.object(self.app, 'get_record', return_value={'bibcode': 'foobar',
                                                               'augments_updated': get_date(),
                                                               'bib_data_updated': None,
                                                               'nonbib_data_updated': None,
                                                               'orcid_claims_updated': None,
                                                               'fulltext_claims_updated': get_date(),
                                                               'processed': None}), \
            patch('adsmp.tasks.task_index_records.apply_async', return_value=None) as task_index_records:

            self.assertFalse(update_solr.called)
            tasks.task_index_records('2015ApJ...815..133S')
            self.assertFalse(update_solr.called)

        with patch.object(self.app, 'mark_processed', return_value=None) as mp,\
            patch('adsmp.solr_updater.update_solr', return_value=[200]) as update_solr, \
            patch('adsmp.tasks.task_index_solr.apply_async', wraps=unwind_task_index_solr_apply_async), \
            patch.object(self.app, 'get_record', return_value={'bibcode': 'foobar',
                                                               'augments_updated': get_date(),
                                                               'bib_data_updated': get_date('2012'),
                                                               'bib_data': {},
                                                               'metrics': {},
                                                               'nonbib_data_updated': get_date('2012'),
                                                               'orcid_claims_updated': get_date('2012'),
                                                               'processed': get_date('2014')}), \
            patch('adsmp.tasks.task_index_records.apply_async', return_value=None) as task_index_records:

            self.assertFalse(update_solr.called)
            tasks.task_index_records('2015ApJ...815..133S')
            self.assertTrue(update_solr.called)
            self.assertTrue(mp.called)

        # self._check_checksum('foobar', solr=True)
        self._reset_checksum('foobar')