Esempio n. 1
0
def output_html():
    target_file = get_target_file()
    print "processing:", target_file
    fi = file(target_file)
    from lr import learn, make_feature_matrix

    lr = learn()

    lines = fi.readlines()
    X = make_feature_matrix(lines)
    ps = lr.predict_proba(X)[:, 1]

    data = []
    for line, p in zip(lines, ps):
        if line.startswith("RT "):
            continue
        if p < args.threshold:
            continue
        if p > args.upper_limit:
            continue
        items = line.split("\t")
        url = "https://twitter.com/{1}/status/{2}".format(*items)
        data.append(dict(url=url, score=p, text=items[0]))
    print len(data)
    render(data, target_file)
Esempio n. 2
0
def add_train_data():
    target_file = get_target_file()
    print "processing:", target_file
    fi = file(target_file)
    from lr import learn, make_feature_matrix

    lr = learn()

    lines = fi.readlines()
    X = make_feature_matrix(lines)
    ps = lr.predict_proba(X)[:, 1]

    data = []
    for line, p in zip(lines, ps):
        if line.startswith("RT "):
            continue
        if p < args.threshold:
            continue
        if p > args.upper_limit:
            continue
        print line
        print p
        items = line.split("\t")
        url = "https://twitter.com/{1}/status/{2}".format(*items)
        print url
        ret = raw_input("negative(z), neutral(x), positive(c)>")
        if ret == "c":
            fo = file("positive.txt", "a")
            fo.write(line)
            fo.close()
        elif ret == "z":
            fo = file("negative.txt", "a")
            fo.write(line)
            fo.close()
        print
Esempio n. 3
0
def main():
    lr = learn()
    latest_id = crawl()
    output_html(lr)
    while True:
        time.sleep(60)
        try:
            latest_id = crawl(previous_latest=latest_id)
        finally:
            print 'latest:', latest_id
            print 'rendering'
            output_html(lr)
            print 'rendered'