def processReplyRetweetHashtag(self, topicDict=None): if not topicDict: print "processReplyRetweetHashtag: topic dict is none. all topics are included." with open(self.infile, "r") as f: linenum = 0 while 1: oneline = f.readline() if not oneline: break linenum += 1 if len(oneline) is 0 or oneline[0] is not 'T': continue tline = oneline uline = f.readline() wline = f.readline() lp = lineParser(tline, uline, wline) self.G.add_node(lp.user) self.G.add_nodes_from(lp.refuser) for refusr in lp.refuser: if not self.G.has_edge(lp.user, refusr): self.G.add_edge(lp.user, refusr, weight=1) else: self.G[lp.user][refusr]['weight'] += 1 for ht in lp.hashtag: if topicDict and not topicDict.has_key(ht): continue nx.set_node_attributes(self.G, ht, {lp.user:True})
def todayTopTenHashtag(self): with open(self.infile, "r") as f: linenum = 0 hashtags = collections.Counter() while 1: oneline = f.readline() if not oneline: break linenum += 1 if len(oneline) is 0 or oneline[0] is not 'T': continue tline = oneline uline = f.readline() wline = f.readline() lp = lineParser(tline, uline, wline) if not len(lp.hashtag): continue for ht in lp.hashtag: hashtags[ht] += 1 self.todayTopic = dict(hashtags.most_common(30))