コード例 #1
0
from UserAnalyser import *
from TimeAnalyser import *
import math
import sys
import pickle
#Frequency over the common

if __name__ == "__main__":
    keywords = open(sys.argv[2], 'r').readline().strip("\n").split(",")
    userstream = Tweetstream(jsonfilee=sys.argv[3],
                             jsonformat=False,
                             keywords=keywords)
    topicstream = Tweetstream(jsonfilee=sys.argv[1],
                              jsonformat=False,
                              keywords=keywords)
    ua = UserAnalyser(sys.argv[4], keywords=keywords)
    ua.load_usersVectors()
    ua.load_idf()
    ua.load_usersScore()
    rank = dict()

    # normalizar pelo numero de kw no topic vector
    for t in userstream:
        score = 0.0
        if t['user_id'] in ua.usersScore:
            rank[t['id']] = ua.usersScore[t['user_id']]
        else:
            rank[t['id']] = 0
        #prinit score, nwindow
    pickle.dump(rank, open(sys.argv[4] + "_rank_USER_noNorm.pick", 'w'),
                pickle.HIGHEST_PROTOCOL)
コード例 #2
0
from UserAnalyser import *
from Tweetstream import *
import sys

l = [
    119372344, 44175574, 126477271, 252422812, 26299276, 560049049, 388473707,
    1217370608, 842373594, 139960190, 45584847, 63573676, 274629513, 94176291,
    68545824, 98997286, 139177492, 14201313, 118852899, 269567535
]

if __name__ == "__main__":
    keywords = open(sys.argv[3], 'r').readline().strip("\n").split(",")
    ts = Tweetstream(jsonfilee=sys.argv[2],
                     jsonformat=False,
                     keywords=keywords)
    ua = UserAnalyser(sys.argv[1], tweetstream=ts)
    ua.compute_usersVectors()
    ua.compute_idf()
    ua.compute_usersScore()
#	for u in l:
#		print ua.usersVectors[u]
#		print ua.usersScore[u]