def test():
    con = MySQLdb.connect(db='twit_manager', host="61.43.139.70",user='******', passwd='1rmdwjd', charset="UTF8")
    cur = con.cursor(cursorclass=MySQLdb.cursors.DictCursor)
    cur.execute("select * from tweet_data order by no desc limit 10000")

    qq2 = JSONRPCProxy("http://61.43.139.70:11001")
    for tt in cur.fetchall():
        print qq2.opn_classify(tt['tweet'])
        print tt['tweet']
Beispiel #2
0
def make_polar_train_data():
    server = JSONRPCProxy("http://office2.buzzni.com:10100")

    eng = ["animation", "baseball", "basketball", "book", "car", "childcare", "commercial", "economy", "entertainment",
           "fashion", "food", "game", "health", "infotech", "love", "movie", "pet", "science", "politics", "soccer", "travel", "volley", "world"]

    for x in eng:
        pros=[]
        cons=[]

        fp = open("/home/xorox90/tp/"+x, "r")
        fp2 = open("/home/xorox90/tp/polar/p_"+x, "a")
        fp3 = open("/home/xorox90/tp/polar/c_"+x, "a")

        for line in fp.read().split("\n"):
            idx=0
            while(True):
                try:
                    #print line
                    polarity = server.opn_classify(line)

                    if polarity==None or (polarity>2 and polarity <4):
                        break
                    elif(polarity <= 2):
                        cons.append(str(polarity) + " " +  line)
                    else:
                        pros.append(str(polarity) + " " + line)

                    break

                except Exception, err:
                    traceback.print_exc()
                    idx+=1
                    if(idx==3):
                        break



        print x
        for x in pros:
            fp2.write(x + "\n")

        for x in cons:
            fp3.writelines(x + "\n")
qq2 = JSONRPCProxy("http://office2.buzzni.com:10100")

con = MySQLdb.connect(db='twit_manager', host="61.43.139.70",user='******', passwd='1rmdwjd', charset="UTF8")
cur = con.cursor(cursorclass=MySQLdb.cursors.DictCursor)
cur.execute("select * from tweet_data order by no desc limit 10000")

arr = []
for tt in cur.fetchall():
    arr.append(tt['tweet'])

print len(arr)

dic = load_dic()
for line in arr:
    x.append(apply_dic(dic, line))
    polarity = qq2.opn_classify(line)
    if polarity==None or (polarity>2 and polarity <4):
        gold.append(0)
    elif(polarity<=2):
        gold.append(2)
    else:
        gold.append(1)

    index+=1


ans = svm.classify(vectors=x)

match=0
total=0
for i in range(0,len(ans)):