# normalize score by post counts for i in range(len(dayHours)): for j in range(len(dayHours[i])): if dayHours[i][j] == 0 or not votes: continue dayHours[i][j] /= float(dayCounts[i][j]) # output CSV style data for day in dayHours: dayStr = '' # build up for hourValue in day: dayStr += str(hourValue) + ', ' print(dayStr[:-2]) print('') # add new line # get a general statistical summary allYaks = Yak.select() # no where() specified summarizeYaks(allYaks, 'All Collected Yaks') # get statistical summaries by school stanford = Yak.select().where(Yak.school == 1) alabama = Yak.select().where(Yak.school == 2) berkeley = Yak.select().where(Yak.school == 3) dartmouth = Yak.select().where(Yak.school == 4) haverford = Yak.select().where(Yak.school == 5) boulder = Yak.select().where(Yak.school == 6) oxford = Yak.select().where(Yak.school == 7) summarizeYaks(stanford, 'Stanford University') summarizeYaks(alabama, 'University of Alabama') summarizeYaks(berkeley, 'UC Berkeley') summarizeYaks(dartmouth, 'Dartmouth College')
def getCursorForBreakdown(school): schoolYakIDs = [] # read in with open('data/samples/schools/' + str(school) + '.txt', 'r') as file: schoolYakIDs = [int(line.rstrip('\n')) for line in file] return Yak.select().where(Yak.id << schoolYakIDs)
# File: sampling.py # Sampling Random Content from peewee import fn from models import Yak # sample school Yaks for i in range(1, 8): yaks = Yak.select().where(Yak.school == i) \ .order_by(fn.Random()).limit(100) # 100 random yakIDs = [yak.id for yak in yaks] # write sampled yak IDs to a sample text file filename = 'data/samples/schools/' + str(i) + '.txt' with open(filename, 'w') as file: for yakID in yakIDs: # per line file.write('%s\n' % str(yakID)) # top yaks are simply the top 100 yaks ordered by their final like count topYaks = Yak.select().order_by(Yak.finalLikeCount.desc()).limit(100) lowYaks = Yak.select().where(Yak.finalLikeCount == -4) \ .order_by(fn.Random()).limit(100) # 100 random low yaks # write top yak IDs to a sample file filename = 'data/samples/topYaks.txt' yakIDs = [yak.id for yak in topYaks] with open(filename, 'w') as file: for yakID in yakIDs: # per line file.write('%s\n' % str(yakID)) # write low yak IDs to a sample file