コード例 #1
0
def run(dbcache, users, revert_cutoff, revert_radius, no_headers):
	
	
	if no_headers:
		headers=None
	else:
		headers=HEADERS
	
	output = tsv.Writer(sys.stdout, headers=headers)
	
	for user in users:
		sys.stderr.write("{0}, {1}: ".format(user.wiki, user.user_id))
		
		db = dbcache.get_db(user.wiki)
		
		day_revisions = 0
		day_deleted_revisions = 0
		day_main_revisions = 0
		day_deleted_main_revisions = 0
		day_reverted_main_revisions = 0
		day_productive_edits = 0
		week_revisions = 0
		week_deleted_revisions = 0
		week_main_revisions = 0
		week_deleted_main_revisions = 0
		week_reverted_main_revisions = 0
		week_productive_edits = 0
		week_sessions = 0
		week_session_seconds = 0
		
		registration = Timestamp(user.user_registration)
		end_of_life = Timestamp(int(registration) + 60*60*24*7) # One week
		
		user_revisions = db.all_revisions.query(
			user_id=user.user_id,
			direction="newer",
			before=end_of_life,
			include_page=True
		)
		
		user_events = chain(
			[(user.user_id, registration, ('registration', registration, None))],
			(
				(
					rev['rev_user'],
					rev['rev_timestamp'],
					('revision', Timestamp(rev['rev_timestamp']), rev)
				)
				for rev in user_revisions
			)
		)
		
		for _, events in sessions.sessions(user_events):
			
			for event_type, timestamp, payload in events:
				
				if event_type == "revision":
					rev = payload
					day = Timestamp(rev['rev_timestamp']) - registration <= 60*60*24 # one day
					
					week_revisions += 1
					day_revisions += day
					
					week_deleted_revisions += rev['archived']
					day_deleted_revisions += rev['archived'] * day
					
					if rev['page_namespace'] == 0:
						week_main_revisions += 1
						day_main_revisions += day
						
						rev_timestamp = Timestamp(rev['rev_timestamp'])
						cutoff_timestamp = Timestamp(int(rev_timestamp) + revert_cutoff)
						
						if rev['archived']:
							week_deleted_main_revisions += 1
							day_deleted_main_revisions += day
							sys.stderr.write("a")
						else:
							revert = reverts.database.check_row(db, rev,
							        radius=revert_radius,
							        before=cutoff_timestamp)
							
							if revert != None: # Reverted edit!
								week_reverted_main_revisions += 1
								day_reverted_main_revisions += day
								sys.stderr.write("r")
							else:
								day_productive_edits += day
								week_productive_edits += 1
								sys.stderr.write(".")
					else:
						sys.stderr.write("_")
				
			
			week_sessions += 1
			week_session_seconds += events[-1][1] - events[0][1]
			
			
		
		sys.stderr.write("\n")
		output.write([
			user.wiki,
			user.bucket,
			user.first_event,
			user.user_id,
			user.user_registration,
			day_revisions,
			day_main_revisions,
			day_reverted_main_revisions,
			day_productive_edits,
			week_revisions,
			week_main_revisions,
			week_reverted_main_revisions,
			week_sessions,
			week_session_seconds,
			week_productive_edits
		])
コード例 #2
0
def run(users, revert_cutoff, revert_radius, dbuser, host, defaults_file):
    
    output = tsv.Writer(sys.stdout, headers=HEADERS)
    
    for wiki, users in groupby(users, lambda u:u.wiki):
        db = database.DB(connection(wiki, host, dbuser, defaults_file))
        
        for user in users:
            sys.stderr.write("{0}, {1}: ".format(wiki, user.user_id))
            
            day_revisions = 0
            day_deleted_revisions = 0
            day_main_revisions = 0
            day_deleted_main_revisions = 0
            day_reverted_main_revisions = 0
            day_productive_edits = 0
            week_revisions = 0
            week_deleted_revisions = 0
            week_main_revisions = 0
            week_deleted_main_revisions = 0
            week_reverted_main_revisions = 0
            week_productive_edits = 0
            week_sessions = 0
            week_session_seconds = 0
            
            registration = Timestamp(user.user_registration)
            end_of_life = registration + 60*60*24*7 # One week after reg.
            
            user_revisions = db.all_revisions.query(
                user_id=user.user_id,
                direction="newer",
                before=end_of_life,
                include_page=True
            )
            
            user_events = chain(
                [
                    (
                        user.user_id,
                        registration,
                        ('registration', registration, None)
                    )
                ],
                (
                    (
                        rev['rev_user'],
                        rev['rev_timestamp'],
                        ('revision', Timestamp(rev['rev_timestamp']), rev)
                    )
                    for rev in user_revisions
                )
            )
            
            for _, events in sessions.sessions(user_events):
                
                for event_type, timestamp, payload in events:
                    
                    if event_type == "revision":
                        rev = payload
                        day = Timestamp(rev['rev_timestamp']) - registration <= 60*60*24 # one day
                        
                        week_revisions += 1
                        day_revisions += day
                        
                        week_deleted_revisions += rev['archived']
                        day_deleted_revisions += rev['archived'] * day
                        
                        if rev['page_namespace'] == 0:
                            week_main_revisions += 1
                            day_main_revisions += day
                            
                            rev_timestamp = Timestamp(rev['rev_timestamp'])
                            cutoff_timestamp = Timestamp(int(rev_timestamp) + revert_cutoff)
                            
                            if rev['archived']:
                                week_deleted_main_revisions += 1
                                day_deleted_main_revisions += day
                                sys.stderr.write("a")
                            else:
                                revert = reverts.database.check(
                                    db, rev_id=rev['rev_id'],
                                    page_id=rev['page_id'],
                                    radius=revert_radius,
                                    before=int(Timestamp(rev['rev_timestamp'])) + revert_cutoff
                                )
                                
                                if revert != None: # Reverted edit!
                                    week_reverted_main_revisions += 1
                                    day_reverted_main_revisions += day
                                    sys.stderr.write("r")
                                else:
                                    day_productive_edits += day
                                    week_productive_edits += 1
                                    sys.stderr.write(".")
                        else:
                            sys.stderr.write("_")
                    
                
                week_sessions += 1
                week_session_seconds += events[-1][1] - events[0][1]
                
                
            
            sys.stderr.write("\n")
            output.write([
                wiki,
                user.user_id,
                day_revisions,
                day_deleted_revisions,
                day_main_revisions,
                day_deleted_main_revisions,
                day_reverted_main_revisions,
                day_productive_edits,
                week_revisions,
                week_deleted_revisions,
                week_main_revisions,
                week_deleted_main_revisions,
                week_reverted_main_revisions,
                week_productive_edits,
                week_sessions,
                week_session_seconds
            ])