def run(dbcache, users, revert_cutoff, revert_radius, no_headers): if no_headers: headers=None else: headers=HEADERS output = tsv.Writer(sys.stdout, headers=headers) for user in users: sys.stderr.write("{0}, {1}: ".format(user.wiki, user.user_id)) db = dbcache.get_db(user.wiki) day_revisions = 0 day_deleted_revisions = 0 day_main_revisions = 0 day_deleted_main_revisions = 0 day_reverted_main_revisions = 0 day_productive_edits = 0 week_revisions = 0 week_deleted_revisions = 0 week_main_revisions = 0 week_deleted_main_revisions = 0 week_reverted_main_revisions = 0 week_productive_edits = 0 week_sessions = 0 week_session_seconds = 0 registration = Timestamp(user.user_registration) end_of_life = Timestamp(int(registration) + 60*60*24*7) # One week user_revisions = db.all_revisions.query( user_id=user.user_id, direction="newer", before=end_of_life, include_page=True ) user_events = chain( [(user.user_id, registration, ('registration', registration, None))], ( ( rev['rev_user'], rev['rev_timestamp'], ('revision', Timestamp(rev['rev_timestamp']), rev) ) for rev in user_revisions ) ) for _, events in sessions.sessions(user_events): for event_type, timestamp, payload in events: if event_type == "revision": rev = payload day = Timestamp(rev['rev_timestamp']) - registration <= 60*60*24 # one day week_revisions += 1 day_revisions += day week_deleted_revisions += rev['archived'] day_deleted_revisions += rev['archived'] * day if rev['page_namespace'] == 0: week_main_revisions += 1 day_main_revisions += day rev_timestamp = Timestamp(rev['rev_timestamp']) cutoff_timestamp = Timestamp(int(rev_timestamp) + revert_cutoff) if rev['archived']: week_deleted_main_revisions += 1 day_deleted_main_revisions += day sys.stderr.write("a") else: revert = reverts.database.check_row(db, rev, radius=revert_radius, before=cutoff_timestamp) if revert != None: # Reverted edit! week_reverted_main_revisions += 1 day_reverted_main_revisions += day sys.stderr.write("r") else: day_productive_edits += day week_productive_edits += 1 sys.stderr.write(".") else: sys.stderr.write("_") week_sessions += 1 week_session_seconds += events[-1][1] - events[0][1] sys.stderr.write("\n") output.write([ user.wiki, user.bucket, user.first_event, user.user_id, user.user_registration, day_revisions, day_main_revisions, day_reverted_main_revisions, day_productive_edits, week_revisions, week_main_revisions, week_reverted_main_revisions, week_sessions, week_session_seconds, week_productive_edits ])
def run(users, revert_cutoff, revert_radius, dbuser, host, defaults_file): output = tsv.Writer(sys.stdout, headers=HEADERS) for wiki, users in groupby(users, lambda u:u.wiki): db = database.DB(connection(wiki, host, dbuser, defaults_file)) for user in users: sys.stderr.write("{0}, {1}: ".format(wiki, user.user_id)) day_revisions = 0 day_deleted_revisions = 0 day_main_revisions = 0 day_deleted_main_revisions = 0 day_reverted_main_revisions = 0 day_productive_edits = 0 week_revisions = 0 week_deleted_revisions = 0 week_main_revisions = 0 week_deleted_main_revisions = 0 week_reverted_main_revisions = 0 week_productive_edits = 0 week_sessions = 0 week_session_seconds = 0 registration = Timestamp(user.user_registration) end_of_life = registration + 60*60*24*7 # One week after reg. user_revisions = db.all_revisions.query( user_id=user.user_id, direction="newer", before=end_of_life, include_page=True ) user_events = chain( [ ( user.user_id, registration, ('registration', registration, None) ) ], ( ( rev['rev_user'], rev['rev_timestamp'], ('revision', Timestamp(rev['rev_timestamp']), rev) ) for rev in user_revisions ) ) for _, events in sessions.sessions(user_events): for event_type, timestamp, payload in events: if event_type == "revision": rev = payload day = Timestamp(rev['rev_timestamp']) - registration <= 60*60*24 # one day week_revisions += 1 day_revisions += day week_deleted_revisions += rev['archived'] day_deleted_revisions += rev['archived'] * day if rev['page_namespace'] == 0: week_main_revisions += 1 day_main_revisions += day rev_timestamp = Timestamp(rev['rev_timestamp']) cutoff_timestamp = Timestamp(int(rev_timestamp) + revert_cutoff) if rev['archived']: week_deleted_main_revisions += 1 day_deleted_main_revisions += day sys.stderr.write("a") else: revert = reverts.database.check( db, rev_id=rev['rev_id'], page_id=rev['page_id'], radius=revert_radius, before=int(Timestamp(rev['rev_timestamp'])) + revert_cutoff ) if revert != None: # Reverted edit! week_reverted_main_revisions += 1 day_reverted_main_revisions += day sys.stderr.write("r") else: day_productive_edits += day week_productive_edits += 1 sys.stderr.write(".") else: sys.stderr.write("_") week_sessions += 1 week_session_seconds += events[-1][1] - events[0][1] sys.stderr.write("\n") output.write([ wiki, user.user_id, day_revisions, day_deleted_revisions, day_main_revisions, day_deleted_main_revisions, day_reverted_main_revisions, day_productive_edits, week_revisions, week_deleted_revisions, week_main_revisions, week_deleted_main_revisions, week_reverted_main_revisions, week_productive_edits, week_sessions, week_session_seconds ])