def _reset_checksum(self, bibcode):
     with self.app.session_scope() as session:
         r = session.query(Records).filter_by(bibcode=bibcode).first()
         if r is None:
             r = Records(bibcode=bibcode)
             session.add(r)
         r.solr_checksum = None
         r.metrics_checksum = None
         r.datalinks_checksum = None
         session.commit()
    def test_reindex_failed_bibcodes(self):
        # init database
        with self.app.session_scope() as session:
            session.add(
                Records(bibcode='bibcode1', status='success', bib_data='{}'))
            session.add(
                Records(bibcode='bibcode2',
                        status='solr-failed',
                        bib_data='{}'))
            session.add(
                Records(bibcode='bibcode3',
                        status='links-failed',
                        bib_data='{}'))
            session.add(
                Records(bibcode='bibcode4', status='retrying', bib_data='{}'))
            session.add(Records(bibcode='bibcode5', fulltext='foobar'))

        # execute reindex_failed_bibcodes from run.py
        with patch('adsmp.tasks.task_index_records.apply_async',
                   return_value=None) as queue_bibcodes:
            reindex_failed_bibcodes(self.app)
            self.assertEqual(1, queue_bibcodes.call_count)
            queue_bibcodes.assert_called_with(args=([u'bibcode2',
                                                     u'bibcode3'], ),
                                              kwargs={
                                                  'force': True,
                                                  'ignore_checksums': True,
                                                  'update_links': True,
                                                  'update_metrics': True,
                                                  'update_solr': True,
                                                  'update_processed': True,
                                                  'priority': 0
                                              },
                                              priority=0)

        # verify database was updated propery
        with self.app.session_scope() as session:
            rec = session.query(Records).filter_by(bibcode='bibcode1').first()
            self.assertEqual(rec.status, 'success')
            rec = session.query(Records).filter_by(bibcode='bibcode2').first()
            self.assertEqual(rec.status, 'retrying')
            rec = session.query(Records).filter_by(bibcode='bibcode3').first()
            self.assertEqual(rec.status, 'retrying')
            rec = session.query(Records).filter_by(bibcode='bibcode4').first()
            self.assertEqual(rec.status, 'retrying')
            rec = session.query(Records).filter_by(bibcode='bibcode5').first()
            self.assertEqual(rec.status, None)
Example #3
0
    def test_reindex_failed(self):
        # init database
        with self.app.session_scope() as session:
            session.add(
                Records(bibcode='bibcode1', status='success', bib_data='{}'))
            session.add(
                Records(bibcode='bibcode2',
                        status='solr-failed',
                        bib_data='{}'))
            session.add(
                Records(bibcode='bibcode3',
                        status='links-failed',
                        bib_data='{}'))
            session.add(
                Records(bibcode='bibcode4', status='retrying', bib_data='{}'))
            session.add(Records(bibcode='bibcode5', fulltext='foobar'))

        # execute reindex_failed from run.py
        with patch('adsmp.tasks.task_index_records.delay',
                   return_value=None) as queue_bibcodes:
            reindex_failed(self.app)
            self.assertEqual(1, queue_bibcodes.call_count)
            queue_bibcodes.assert_called_with([u'bibcode2', u'bibcode3'],
                                              force=True,
                                              ignore_checksums=True,
                                              update_links=True,
                                              update_metrics=True,
                                              update_solr=True,
                                              update_timestamps=True)

        # verify database was updated propery
        with self.app.session_scope() as session:
            rec = session.query(Records).filter_by(bibcode='bibcode1').first()
            self.assertEqual(rec.status, 'success')
            rec = session.query(Records).filter_by(bibcode='bibcode2').first()
            self.assertEqual(rec.status, 'retrying')
            rec = session.query(Records).filter_by(bibcode='bibcode3').first()
            self.assertEqual(rec.status, 'retrying')
            rec = session.query(Records).filter_by(bibcode='bibcode4').first()
            self.assertEqual(rec.status, 'retrying')
            rec = session.query(Records).filter_by(bibcode='bibcode5').first()
            self.assertEqual(rec.status, None)
Example #4
0
    def update_storage(self, bibcode, type, payload):
        """Update the document in the database, every time
        empty the solr/metrics processed timestamps.

        returns the sql record as a json object or an error string """

        if not isinstance(payload, basestring):
            payload = json.dumps(payload)

        with self.session_scope() as session:
            r = session.query(Records).filter_by(bibcode=bibcode).first()
            if r is None:
                r = Records(bibcode=bibcode)
                session.add(r)
            now = adsputils.get_date()
            oldval = None
            if type == 'metadata' or type == 'bib_data':
                oldval = r.bib_data
                r.bib_data = payload
                r.bib_data_updated = now
            elif type == 'nonbib_data':
                oldval = r.nonbib_data
                r.nonbib_data = payload
                r.nonbib_data_updated = now
            elif type == 'orcid_claims':
                oldval = r.orcid_claims
                r.orcid_claims = payload
                r.orcid_claims_updated = now
            elif type == 'fulltext':
                oldval = 'not-stored'
                r.fulltext = payload
                r.fulltext_updated = now
            elif type == 'metrics':
                oldval = 'not-stored'
                r.metrics = payload
                r.metrics_updated = now
            elif type == 'augment':
                # payload contains new value for affilation fields
                # r.augments holds a dict, save it in database
                oldval = 'not-stored'
                r.augments = payload
                r.augments_updated = now
            else:
                raise Exception('Unknown type: %s' % type)
            session.add(ChangeLog(key=bibcode, type=type, oldvalue=oldval))

            r.updated = now
            out = r.toJSON()
            try:
                session.commit()
                return out
            except exc.IntegrityError:
                self.logger.exception(
                    'error in app.update_storage while updating database for bibcode {}, type {}'
                    .format(bibcode, type))
                session.rollback()
                raise