def report(self): # for tags below the threshold, replace with "-OTHER" # which is not necessary if threshold is 0 if self.threshold > 0: countkeys = self.counts.keys() for countkey in countkeys: if self.counts[countkey] < self.threshold: # for a tag whose count is below the threshold, transfer its # count to tag "-OTHER" and delete it if self.keepother: self.counts['-OTHER'] += self.counts[countkey] del self.counts[countkey] else: # otherwise add it to list of keepers self.keepers.add(countkey) if self.keepother: self.keepers.add('-OTHER') # keepers now has a complete set of surviving tags # now process hashtags in tweets again, replacing any tag not in keepers with -OTHER self.counts = Counter() for savetweet in self.savetweets: # cleantags gathers unique, lower-cased tags for this tweet cleantags = set() for tag in savetweet: if self.threshold == 0 or tag in self.keepers: cleantags.add(tag) else: if self.keepother: cleantags.add('-OTHER') # sort tags and remove tags that are in the exclude set cleantags = sorted(cleantags.difference(self.exclude)) # generate all pairs for c in itertools.combinations(cleantags, 2): self.addlink(c[0], c[1]) if self.reciprocal: self.addlink(c[1], c[0]) # if this tag is the only one we're including from this tweet, # then there won't be any combinations, and so it won't have # been added to self.nodes by addlink: so add it. # add to tweet count for this tag for tag in cleantags: if tag in self.nodes: self.nodes[tag]["tweetcount"] += 1 else: self.addsingle(tag) data = LinkNodesProfiler.report(self) return data
def report(self): # for tags below the threshold, replace with "-OTHER" # which is not necessary if threshold is 0 if self.threshold > 0: countkeys = self.counts.keys() for countkey in countkeys: if self.counts[countkey] < self.threshold: # for a tag whose count is below the threshold, transfer its # count to tag "-OTHER" and delete it if self.keepother: self.counts["-OTHER"] += self.counts[countkey] del self.counts[countkey] else: # otherwise add it to list of keepers self.keepers.add(countkey) if self.keepother: self.keepers.add("-OTHER") # keepers now has a complete set of surviving tags # now process hashtags in tweets again, replacing any tag not in keepers with -OTHER self.counts = Counter() for savetweet in self.savetweets: # cleantags gathers unique, lower-cased tags for this tweet cleantags = set() for tag in savetweet: if self.threshold == 0 or tag in self.keepers: cleantags.add(tag) else: if self.keepother: cleantags.add("-OTHER") # sort tags and remove tags that are in the exclude set cleantags = sorted(cleantags.difference(self.exclude)) # generate all pairs for c in itertools.combinations(cleantags, 2): self.addlink(c[0], c[1]) if self.reciprocal: self.addlink(c[1], c[0]) # if this tag is the only one we're including from this tweet, # then there won't be any combinations, and so it won't have # been added to self.nodes by addlink: so add it. # add to tweet count for this tag for tag in cleantags: if tag in self.nodes: self.nodes[tag]["tweetcount"] += 1 else: self.addsingle(tag) data = LinkNodesProfiler.report(self) return data;
def report(self): return LinkNodesProfiler.report(self)