from UserAnalyser import * from TimeAnalyser import * import math import sys import pickle #Frequency over the common if __name__ == "__main__": keywords = open(sys.argv[2], 'r').readline().strip("\n").split(",") userstream = Tweetstream(jsonfilee=sys.argv[3], jsonformat=False, keywords=keywords) topicstream = Tweetstream(jsonfilee=sys.argv[1], jsonformat=False, keywords=keywords) ua = UserAnalyser(sys.argv[4], keywords=keywords) ua.load_usersVectors() ua.load_idf() ua.load_usersScore() rank = dict() # normalizar pelo numero de kw no topic vector for t in userstream: score = 0.0 if t['user_id'] in ua.usersScore: rank[t['id']] = ua.usersScore[t['user_id']] else: rank[t['id']] = 0 #prinit score, nwindow pickle.dump(rank, open(sys.argv[4] + "_rank_USER_noNorm.pick", 'w'), pickle.HIGHEST_PROTOCOL)
from UserAnalyser import * from Tweetstream import * import sys l = [ 119372344, 44175574, 126477271, 252422812, 26299276, 560049049, 388473707, 1217370608, 842373594, 139960190, 45584847, 63573676, 274629513, 94176291, 68545824, 98997286, 139177492, 14201313, 118852899, 269567535 ] if __name__ == "__main__": keywords = open(sys.argv[3], 'r').readline().strip("\n").split(",") ts = Tweetstream(jsonfilee=sys.argv[2], jsonformat=False, keywords=keywords) ua = UserAnalyser(sys.argv[1], tweetstream=ts) ua.compute_usersVectors() ua.compute_idf() ua.compute_usersScore() # for u in l: # print ua.usersVectors[u] # print ua.usersScore[u]