コード例 #1
0
def main():
	controlTweets = None
	cohortTweets = None
	with dbUtils.setup_mysql_cxn() as cxn:
		curs = cxn.cursor()
		curs.execute(SELECT_CONTROL_TWEETS)
		controlTweets = dbUtils.get_named_rows(curs)
		curs.execute(SELECT_COHORT_TWEETS)
		cohortTweets = dbUtils.get_named_rows(curs) 
	make_cdfs("control", controlTweets)
	make_cdfs("cohort", cohortTweets)
コード例 #2
0
def write_csv (fname, results):
  fields = list(results[0].keys())
  fields.sort()
  with open(fname, "w+") as f_out:
    writer = csv.writer(f_out, delimiter="\t")
    writer.writerow(fields)
    for row in results:
      row_elems = [row[k] if k in row else 0 for k in fields]
      writer.writerow(row_elems)


if __name__ == "__main__":
  processor = TweetProcessor()
  results = []
  with dbUtils.setup_mysql_cxn() as cxn:
    curs = cxn.cursor();
    curs.execute(dbUtils.GET_USERS_W_COHORT)
    users = dbUtils.get_named_rows(curs)
    for user in users:
      
      curs.execute(dbUtils.GET_TWEETS_TEMPL % user['twitter_user_id'])
      tweets = dbUtils.get_named_rows(curs)
      result = defaultdict(lambda: 0)
      for tweet in tweets:
        result = processor.process_tweet(tweet['tweet_text'], result)
      ntweets = len(tweets)
      if ntweets > 0:
        for k, v in result.items():
          result[k] = float(v) / float(ntweets)
      result = add_time_model(tweets, result)