def getCursorForBreakdown(school): schoolYakIDs = [] # read in with open('data/samples/schools/' + str(school) + '.txt', 'r') as file: schoolYakIDs = [int(line.rstrip('\n')) for line in file] return Yak.select().where(Yak.id << schoolYakIDs)
# File: sampling.py # Sampling Random Content from peewee import fn from models import Yak # sample school Yaks for i in range(1, 8): yaks = Yak.select().where(Yak.school == i) \ .order_by(fn.Random()).limit(100) # 100 random yakIDs = [yak.id for yak in yaks] # write sampled yak IDs to a sample text file filename = 'data/samples/schools/' + str(i) + '.txt' with open(filename, 'w') as file: for yakID in yakIDs: # per line file.write('%s\n' % str(yakID)) # top yaks are simply the top 100 yaks ordered by their final like count topYaks = Yak.select().order_by(Yak.finalLikeCount.desc()).limit(100) lowYaks = Yak.select().where(Yak.finalLikeCount == -4) \ .order_by(fn.Random()).limit(100) # 100 random low yaks # write top yak IDs to a sample file filename = 'data/samples/topYaks.txt' yakIDs = [yak.id for yak in topYaks] with open(filename, 'w') as file: for yakID in yakIDs: # per line file.write('%s\n' % str(yakID)) # write low yak IDs to a sample file
# normalize score by post counts for i in range(len(dayHours)): for j in range(len(dayHours[i])): if dayHours[i][j] == 0 or not votes: continue dayHours[i][j] /= float(dayCounts[i][j]) # output CSV style data for day in dayHours: dayStr = '' # build up for hourValue in day: dayStr += str(hourValue) + ', ' print(dayStr[:-2]) print('') # add new line # get a general statistical summary allYaks = Yak.select() # no where() specified summarizeYaks(allYaks, 'All Collected Yaks') # get statistical summaries by school stanford = Yak.select().where(Yak.school == 1) alabama = Yak.select().where(Yak.school == 2) berkeley = Yak.select().where(Yak.school == 3) dartmouth = Yak.select().where(Yak.school == 4) haverford = Yak.select().where(Yak.school == 5) boulder = Yak.select().where(Yak.school == 6) oxford = Yak.select().where(Yak.school == 7) summarizeYaks(stanford, 'Stanford University') summarizeYaks(alabama, 'University of Alabama') summarizeYaks(berkeley, 'UC Berkeley') summarizeYaks(dartmouth, 'Dartmouth College')
'data/samples/schools/5.txt', 'data/samples/schools/6.txt', 'data/samples/schools/7.txt', 'data/samples/topYaks.txt', 'data/samples/lowYaks.txt'] # iterate files and IDs yakIDs = [] # read in for filename in files: with open(filename, 'r') as file: yakIDs += [int(line.rstrip('\n')) for line in file] # start the coding pos = 0 # allow seek? while pos < len(yakIDs): yak = Yak.get(Yak.id == yakIDs[pos]) # quick skip if yak.isCoded: pos += 1 continue system('clear') print('Progress: ' + str(pos) + '/' + str(len(yakIDs))) print('#' + str(yak.id) + ': ' + yak.content.encode('utf-8')) print('\nPost Upvotes: ' + str(yak.finalLikeCount)) print('Posting Time: ' + str(yak.time)) print('Posting Location: ' + location[yak.school.id - 1]) # code voice print('\n\tVoice:')