Ejemplo n.º 1
0
def getCursorForBreakdown(school):
  schoolYakIDs = [] # read in
  with open('data/samples/schools/' + str(school) + '.txt', 'r') as file:
    schoolYakIDs = [int(line.rstrip('\n')) for line in file]
  return Yak.select().where(Yak.id << schoolYakIDs)
Ejemplo n.º 2
0
# File: sampling.py
# Sampling Random Content

from peewee import fn
from models import Yak

# sample school Yaks
for i in range(1, 8):
  yaks = Yak.select().where(Yak.school == i) \
    .order_by(fn.Random()).limit(100) # 100 random
  yakIDs = [yak.id for yak in yaks]

  # write sampled yak IDs to a sample text file
  filename = 'data/samples/schools/' + str(i) + '.txt'
  with open(filename, 'w') as file:
    for yakID in yakIDs: # per line
      file.write('%s\n' % str(yakID))

# top yaks are simply the top 100 yaks ordered by their final like count
topYaks = Yak.select().order_by(Yak.finalLikeCount.desc()).limit(100)
lowYaks = Yak.select().where(Yak.finalLikeCount == -4) \
  .order_by(fn.Random()).limit(100) # 100 random low yaks

# write top yak IDs to a sample file
filename = 'data/samples/topYaks.txt'
yakIDs = [yak.id for yak in topYaks]
with open(filename, 'w') as file:
  for yakID in yakIDs: # per line
    file.write('%s\n' % str(yakID))

# write low yak IDs to a sample file
Ejemplo n.º 3
0
  # normalize score by post counts
  for i in range(len(dayHours)):
    for j in range(len(dayHours[i])):
      if dayHours[i][j] == 0 or not votes: continue
      dayHours[i][j] /= float(dayCounts[i][j])

  # output CSV style data
  for day in dayHours:
    dayStr = '' # build up
    for hourValue in day:
      dayStr += str(hourValue) + ', '
    print(dayStr[:-2])
  print('') # add new line

# get a general statistical summary
allYaks = Yak.select() # no where() specified
summarizeYaks(allYaks, 'All Collected Yaks')

# get statistical summaries by school
stanford = Yak.select().where(Yak.school == 1)
alabama = Yak.select().where(Yak.school == 2)
berkeley = Yak.select().where(Yak.school == 3)
dartmouth = Yak.select().where(Yak.school == 4)
haverford = Yak.select().where(Yak.school == 5)
boulder = Yak.select().where(Yak.school == 6)
oxford = Yak.select().where(Yak.school == 7)

summarizeYaks(stanford, 'Stanford University')
summarizeYaks(alabama, 'University of Alabama')
summarizeYaks(berkeley, 'UC Berkeley')
summarizeYaks(dartmouth, 'Dartmouth College')
Ejemplo n.º 4
0
    		 'data/samples/schools/5.txt',
    		 'data/samples/schools/6.txt',
    		 'data/samples/schools/7.txt',
    		 'data/samples/topYaks.txt',
    		 'data/samples/lowYaks.txt']

# iterate files and IDs
yakIDs = [] # read in
for filename in files:
  with open(filename, 'r') as file:
    yakIDs += [int(line.rstrip('\n')) for line in file]

# start the coding
pos = 0 # allow seek?
while pos < len(yakIDs):
  yak = Yak.get(Yak.id == yakIDs[pos])

  # quick skip
  if yak.isCoded:
    pos += 1
    continue

  system('clear')
  print('Progress: ' + str(pos) + '/' + str(len(yakIDs)))
  print('#' + str(yak.id) + ': ' + yak.content.encode('utf-8'))
  print('\nPost Upvotes: ' + str(yak.finalLikeCount))
  print('Posting Time: ' + str(yak.time))
  print('Posting Location: ' + location[yak.school.id - 1])

  # code voice
  print('\n\tVoice:')