Exemple #1
0
    def test_does_not_add_existing_log(self):
        logic_page.update_page_moved(self.wp10db, self.project, self.old_ns,
                                     self.old_article, self.new_ns,
                                     self.new_article, self.dt)

        logic_page.update_page_moved(self.wp10db, self.project, self.old_ns,
                                     self.old_article, self.new_ns,
                                     self.new_article, self.dt)

        all_logs = get_all_logs(self.wp10db)
        self.assertEqual(1, len(all_logs))
Exemple #2
0
    def test_new_move(self):
        logic_page.update_page_moved(self.wp10db, self.project, self.old_ns,
                                     self.old_article, self.new_ns,
                                     self.new_article, self.dt)

        with self.wp10db.cursor() as cursor:
            cursor.execute(
                '''
          SELECT * FROM moves
          WHERE m_old_article = %(old_article)s
      ''', {'old_article': self.old_article})
            move = Move(**cursor.fetchone())

        self.assertIsNotNone(move)
        self.assertEqual(self.old_ns, move.m_old_namespace)
        self.assertEqual(self.old_article, move.m_old_article)
        self.assertEqual(self.new_ns, move.m_new_namespace)
        self.assertEqual(self.new_article, move.m_new_article)
        self.assertEqual(self.timestamp_db, move.m_timestamp)
Exemple #3
0
    def test_new_move_log(self):
        logic_page.update_page_moved(self.wp10db, self.project, self.old_ns,
                                     self.old_article, self.new_ns,
                                     self.new_article, self.dt)

        with self.wp10db.cursor() as cursor:
            cursor.execute(
                '''
          SELECT * FROM logging
          WHERE l_article = %(old_article)s
      ''', {'old_article': self.old_article})
            log = Log(**cursor.fetchone())

        self.assertIsNotNone(log)
        self.assertEqual(self.old_ns, log.l_namespace)
        self.assertEqual(self.old_article, log.l_article)
        self.assertEqual(b'moved', log.l_action)
        self.assertEqual(b'', log.l_old)
        self.assertEqual(b'', log.l_new)
        self.assertEqual(self.timestamp_db, log.l_revision_timestamp)
Exemple #4
0
def process_unseen_articles(wikidb, wp10db, project, old_ratings, seen):
    denom = len(old_ratings.keys())
    ratio = len(seen) / denom if denom != 0 else 'NaN'

    logger.debug('Looking for unseen articles, ratio was: %s', ratio)
    in_seen = 0
    skipped = 0
    processed = 0
    n = 0
    for ref, old_rating in old_ratings.items():
        if ref in seen:
            in_seen += 1
            continue

        # By default, we evaluate both assessment kinds.
        kind = AssessmentKind.BOTH
        if old_rating.r_quality == NOT_A_CLASS or old_rating.r_quality is None:
            # The quality rating is not set, so just evaluate importance
            kind = AssessmentKind.IMPORTANCE
            if (old_rating.r_importance == NOT_A_CLASS
                    or old_rating.r_importance is None):
                # The importance rating is also not set, so don't do anything.
                skipped += 1
                continue

        logger.debug('Processing unseen article %s', ref.decode('utf-8'))
        processed += 1
        ns, title = ref.decode('utf-8').split(':', 1)
        ns = int(ns.encode('utf-8'))
        title = title.encode('utf-8')

        move_data = logic_page.get_move_data(wp10db, wikidb, ns, title,
                                             project.timestamp_dt)
        if move_data is not None:
            logic_page.update_page_moved(wp10db, project, ns, title,
                                         move_data['dest_ns'],
                                         move_data['dest_title'],
                                         move_data['timestamp_dt'])

        # Mark this article as having NOT_A_CLASS for it's quality or importance.
        # This probably means the article was deleted, but could in fact mean that
        # we just failed to find its move data. Either way, the new article would
        # have already been picked up by the assessment updater, assuming it was
        # tagged correctly.
        rating = Rating(r_project=project.p_project,
                        r_namespace=ns,
                        r_article=title,
                        r_score=0)
        if kind in (AssessmentKind.QUALITY, AssessmentKind.BOTH):
            rating.quality = NOT_A_CLASS.encode('utf-8')
            if move_data:
                rating.set_quality_timestamp_dt(move_data['timestamp_dt'])
            else:
                rating.r_quality_timestamp = GLOBAL_TIMESTAMP_WIKI
        if kind in (AssessmentKind.IMPORTANCE, AssessmentKind.BOTH):
            rating.importance = NOT_A_CLASS.encode('utf-8')
            if move_data:
                rating.set_importance_timestamp_dt(move_data['timestamp_dt'])
            else:
                rating.r_importance_timestamp = GLOBAL_TIMESTAMP_WIKI

        logic_rating.insert_or_update(wp10db, rating, kind)

        if kind in (AssessmentKind.QUALITY, AssessmentKind.BOTH):
            logic_rating.add_log_for_rating(wp10db, rating,
                                            AssessmentKind.QUALITY,
                                            old_rating.r_quality)
        if kind in (AssessmentKind.IMPORTANCE, AssessmentKind.BOTH):
            logic_rating.add_log_for_rating(wp10db, rating,
                                            AssessmentKind.IMPORTANCE,
                                            old_rating.r_importance)

        n += 1
        if n >= MAX_ARTICLES_BEFORE_COMMIT:
            wp10db.ping()
            wp10db.commit()
    logger.info('End, committing db')
    wp10db.ping()
    wp10db.commit()

    logger.debug('SEEN REPORT:\nin seen: %s\nskipped: %s\nprocessed: %s',
                 in_seen, skipped, processed)