def output_html(): target_file = get_target_file() print "processing:", target_file fi = file(target_file) from lr import learn, make_feature_matrix lr = learn() lines = fi.readlines() X = make_feature_matrix(lines) ps = lr.predict_proba(X)[:, 1] data = [] for line, p in zip(lines, ps): if line.startswith("RT "): continue if p < args.threshold: continue if p > args.upper_limit: continue items = line.split("\t") url = "https://twitter.com/{1}/status/{2}".format(*items) data.append(dict(url=url, score=p, text=items[0])) print len(data) render(data, target_file)
def add_train_data(): target_file = get_target_file() print "processing:", target_file fi = file(target_file) from lr import learn, make_feature_matrix lr = learn() lines = fi.readlines() X = make_feature_matrix(lines) ps = lr.predict_proba(X)[:, 1] data = [] for line, p in zip(lines, ps): if line.startswith("RT "): continue if p < args.threshold: continue if p > args.upper_limit: continue print line print p items = line.split("\t") url = "https://twitter.com/{1}/status/{2}".format(*items) print url ret = raw_input("negative(z), neutral(x), positive(c)>") if ret == "c": fo = file("positive.txt", "a") fo.write(line) fo.close() elif ret == "z": fo = file("negative.txt", "a") fo.write(line) fo.close() print
def main(): lr = learn() latest_id = crawl() output_html(lr) while True: time.sleep(60) try: latest_id = crawl(previous_latest=latest_id) finally: print 'latest:', latest_id print 'rendering' output_html(lr) print 'rendered'