Example #1
0
    def test_get_most_recent_move_from_api(self, patched_site):
        patched_site.logevents.side_effect = lambda *args, **kwargs: self.le_multi
        move_data = logic_page.get_move_data(self.wp10db, self.wikidb, 0,
                                             b'Some Moved Article',
                                             datetime(1970, 1, 1))

        self.assertIsNotNone(move_data)
        self.assertEqual(self.expected_ns, move_data['dest_ns'])
        self.assertEqual(self.expected_title.encode('utf-8'),
                         move_data['dest_title'])
        self.assertEqual(self.expected_dt, move_data['timestamp_dt'])
Example #2
0
 def test_get_single_move_too_old_from_api(self, patched_site):
     patched_site.logevents.side_effect = lambda *args, **kwargs: self.le_return
     move_data = logic_page.get_move_data(self.wp10db, self.wikidb, 0,
                                          b'Some Moved Article',
                                          datetime(2014, 1, 1))
     self.assertIsNone(move_data)
Example #3
0
 def test_no_redirect_no_move(self, unused_patched_site):
     move_data = logic_page.get_move_data(self.wp10db, self.wikidb, 0,
                                          b'Some Moved Article',
                                          datetime(1970, 1, 1))
     self.assertIsNone(move_data)
Example #4
0
def process_unseen_articles(wikidb, wp10db, project, old_ratings, seen):
    denom = len(old_ratings.keys())
    ratio = len(seen) / denom if denom != 0 else 'NaN'

    logger.debug('Looking for unseen articles, ratio was: %s', ratio)
    in_seen = 0
    skipped = 0
    processed = 0
    n = 0
    for ref, old_rating in old_ratings.items():
        if ref in seen:
            in_seen += 1
            continue

        # By default, we evaluate both assessment kinds.
        kind = AssessmentKind.BOTH
        if old_rating.r_quality == NOT_A_CLASS or old_rating.r_quality is None:
            # The quality rating is not set, so just evaluate importance
            kind = AssessmentKind.IMPORTANCE
            if (old_rating.r_importance == NOT_A_CLASS
                    or old_rating.r_importance is None):
                # The importance rating is also not set, so don't do anything.
                skipped += 1
                continue

        logger.debug('Processing unseen article %s', ref.decode('utf-8'))
        processed += 1
        ns, title = ref.decode('utf-8').split(':', 1)
        ns = int(ns.encode('utf-8'))
        title = title.encode('utf-8')

        move_data = logic_page.get_move_data(wp10db, wikidb, ns, title,
                                             project.timestamp_dt)
        if move_data is not None:
            logic_page.update_page_moved(wp10db, project, ns, title,
                                         move_data['dest_ns'],
                                         move_data['dest_title'],
                                         move_data['timestamp_dt'])

        # Mark this article as having NOT_A_CLASS for it's quality or importance.
        # This probably means the article was deleted, but could in fact mean that
        # we just failed to find its move data. Either way, the new article would
        # have already been picked up by the assessment updater, assuming it was
        # tagged correctly.
        rating = Rating(r_project=project.p_project,
                        r_namespace=ns,
                        r_article=title,
                        r_score=0)
        if kind in (AssessmentKind.QUALITY, AssessmentKind.BOTH):
            rating.quality = NOT_A_CLASS.encode('utf-8')
            if move_data:
                rating.set_quality_timestamp_dt(move_data['timestamp_dt'])
            else:
                rating.r_quality_timestamp = GLOBAL_TIMESTAMP_WIKI
        if kind in (AssessmentKind.IMPORTANCE, AssessmentKind.BOTH):
            rating.importance = NOT_A_CLASS.encode('utf-8')
            if move_data:
                rating.set_importance_timestamp_dt(move_data['timestamp_dt'])
            else:
                rating.r_importance_timestamp = GLOBAL_TIMESTAMP_WIKI

        logic_rating.insert_or_update(wp10db, rating, kind)

        if kind in (AssessmentKind.QUALITY, AssessmentKind.BOTH):
            logic_rating.add_log_for_rating(wp10db, rating,
                                            AssessmentKind.QUALITY,
                                            old_rating.r_quality)
        if kind in (AssessmentKind.IMPORTANCE, AssessmentKind.BOTH):
            logic_rating.add_log_for_rating(wp10db, rating,
                                            AssessmentKind.IMPORTANCE,
                                            old_rating.r_importance)

        n += 1
        if n >= MAX_ARTICLES_BEFORE_COMMIT:
            wp10db.ping()
            wp10db.commit()
    logger.info('End, committing db')
    wp10db.ping()
    wp10db.commit()

    logger.debug('SEEN REPORT:\nin seen: %s\nskipped: %s\nprocessed: %s',
                 in_seen, skipped, processed)