Example #1
0
import fileinput
import sys
import time

from liwc import countcat
from web import isurl, isquestion, get_files

print 'id\tts\tpos\tneg\tword\tis_url\tquestion\tclicked'
for line in get_files():
  if len(line) == 4:
    id, query, ts, _ = line
    clicked = 1
  else:
    id, query, ts, _, _ = line
    clicked = 0
  if id == 'AnonID':
    continue
  # emotion
  counts = countcat(query)
  pos = counts[0]; neg = counts[1]; word = counts[2]
  # internet competence
  is_url = int(isurl(query))
  is_question = 1
  ts = int(time.mktime(time.strptime(ts, '%Y-%m-%d %H:%M:%S')))
  print '\t'.join([str(x) for x in [id, ts, pos, neg, word, is_url, is_question, clicked]])

Example #2
0
#!/usr/bin/env python
from liwc import countcat, header
import csv
"""
Combine project data and output from liwc (i.e. output of 
wordcount_liwc.py)
"""
(_projectid,_teacher_acctid,title,short_description,need_statement,essay,
 paragraph1,paragraph2,paragraph3,paragraph4) = range(10)

essays = open('../data/essays.csv')
out = open('../data/liwc_out', 'w')

headers = ['_projectid'] + header()
out.write('\t'.join(headers) + '\n')
essays.readline() # get rid of headers
for line in csv.reader(essays):
  lst = countcat(' '.join(line[short_description:]))
  out.write('\t'.join(str(x) for x in [line[_projectid]] + lst) + '\n')