def test_does_not_add_existing_log(self): logic_page.update_page_moved(self.wp10db, self.project, self.old_ns, self.old_article, self.new_ns, self.new_article, self.dt) logic_page.update_page_moved(self.wp10db, self.project, self.old_ns, self.old_article, self.new_ns, self.new_article, self.dt) all_logs = get_all_logs(self.wp10db) self.assertEqual(1, len(all_logs))
def test_new_move(self): logic_page.update_page_moved(self.wp10db, self.project, self.old_ns, self.old_article, self.new_ns, self.new_article, self.dt) with self.wp10db.cursor() as cursor: cursor.execute( ''' SELECT * FROM moves WHERE m_old_article = %(old_article)s ''', {'old_article': self.old_article}) move = Move(**cursor.fetchone()) self.assertIsNotNone(move) self.assertEqual(self.old_ns, move.m_old_namespace) self.assertEqual(self.old_article, move.m_old_article) self.assertEqual(self.new_ns, move.m_new_namespace) self.assertEqual(self.new_article, move.m_new_article) self.assertEqual(self.timestamp_db, move.m_timestamp)
def test_new_move_log(self): logic_page.update_page_moved(self.wp10db, self.project, self.old_ns, self.old_article, self.new_ns, self.new_article, self.dt) with self.wp10db.cursor() as cursor: cursor.execute( ''' SELECT * FROM logging WHERE l_article = %(old_article)s ''', {'old_article': self.old_article}) log = Log(**cursor.fetchone()) self.assertIsNotNone(log) self.assertEqual(self.old_ns, log.l_namespace) self.assertEqual(self.old_article, log.l_article) self.assertEqual(b'moved', log.l_action) self.assertEqual(b'', log.l_old) self.assertEqual(b'', log.l_new) self.assertEqual(self.timestamp_db, log.l_revision_timestamp)
def process_unseen_articles(wikidb, wp10db, project, old_ratings, seen): denom = len(old_ratings.keys()) ratio = len(seen) / denom if denom != 0 else 'NaN' logger.debug('Looking for unseen articles, ratio was: %s', ratio) in_seen = 0 skipped = 0 processed = 0 n = 0 for ref, old_rating in old_ratings.items(): if ref in seen: in_seen += 1 continue # By default, we evaluate both assessment kinds. kind = AssessmentKind.BOTH if old_rating.r_quality == NOT_A_CLASS or old_rating.r_quality is None: # The quality rating is not set, so just evaluate importance kind = AssessmentKind.IMPORTANCE if (old_rating.r_importance == NOT_A_CLASS or old_rating.r_importance is None): # The importance rating is also not set, so don't do anything. skipped += 1 continue logger.debug('Processing unseen article %s', ref.decode('utf-8')) processed += 1 ns, title = ref.decode('utf-8').split(':', 1) ns = int(ns.encode('utf-8')) title = title.encode('utf-8') move_data = logic_page.get_move_data(wp10db, wikidb, ns, title, project.timestamp_dt) if move_data is not None: logic_page.update_page_moved(wp10db, project, ns, title, move_data['dest_ns'], move_data['dest_title'], move_data['timestamp_dt']) # Mark this article as having NOT_A_CLASS for it's quality or importance. # This probably means the article was deleted, but could in fact mean that # we just failed to find its move data. Either way, the new article would # have already been picked up by the assessment updater, assuming it was # tagged correctly. rating = Rating(r_project=project.p_project, r_namespace=ns, r_article=title, r_score=0) if kind in (AssessmentKind.QUALITY, AssessmentKind.BOTH): rating.quality = NOT_A_CLASS.encode('utf-8') if move_data: rating.set_quality_timestamp_dt(move_data['timestamp_dt']) else: rating.r_quality_timestamp = GLOBAL_TIMESTAMP_WIKI if kind in (AssessmentKind.IMPORTANCE, AssessmentKind.BOTH): rating.importance = NOT_A_CLASS.encode('utf-8') if move_data: rating.set_importance_timestamp_dt(move_data['timestamp_dt']) else: rating.r_importance_timestamp = GLOBAL_TIMESTAMP_WIKI logic_rating.insert_or_update(wp10db, rating, kind) if kind in (AssessmentKind.QUALITY, AssessmentKind.BOTH): logic_rating.add_log_for_rating(wp10db, rating, AssessmentKind.QUALITY, old_rating.r_quality) if kind in (AssessmentKind.IMPORTANCE, AssessmentKind.BOTH): logic_rating.add_log_for_rating(wp10db, rating, AssessmentKind.IMPORTANCE, old_rating.r_importance) n += 1 if n >= MAX_ARTICLES_BEFORE_COMMIT: wp10db.ping() wp10db.commit() logger.info('End, committing db') wp10db.ping() wp10db.commit() logger.debug('SEEN REPORT:\nin seen: %s\nskipped: %s\nprocessed: %s', in_seen, skipped, processed)