コード例 #1
0
ファイル: d3cotags.py プロジェクト: wenzi/twarc-report
    def report(self):
        # for tags below the threshold, replace with "-OTHER"
        # which is not necessary if threshold is 0
        if self.threshold > 0:
            countkeys = self.counts.keys()
            for countkey in countkeys:
                if self.counts[countkey] < self.threshold:
                    # for a tag whose count is below the threshold, transfer its
                    # count to tag "-OTHER" and delete it
                    if self.keepother:
                        self.counts['-OTHER'] += self.counts[countkey]
                    del self.counts[countkey]
                else:
                    # otherwise add it to list of keepers
                    self.keepers.add(countkey)
            if self.keepother:
                self.keepers.add('-OTHER')
            # keepers now has a complete set of surviving tags

        # now process hashtags in tweets again, replacing any tag not in keepers with -OTHER
        self.counts = Counter()
        for savetweet in self.savetweets:

            # cleantags gathers unique, lower-cased tags for this tweet
            cleantags = set()

            for tag in savetweet:
                if self.threshold == 0 or tag in self.keepers:
                    cleantags.add(tag)
                else:
                    if self.keepother:
                        cleantags.add('-OTHER')

            # sort tags and remove tags that are in the exclude set
            cleantags = sorted(cleantags.difference(self.exclude))

            # generate all pairs
            for c in itertools.combinations(cleantags, 2):
                self.addlink(c[0], c[1])
                if self.reciprocal:
                    self.addlink(c[1], c[0])

            # if this tag is the only one we're including from this tweet,
            # then there won't be any combinations, and so it won't have
            # been added to self.nodes by addlink: so add it.

            # add to tweet count for this tag
            for tag in cleantags:
                if tag in self.nodes:
                    self.nodes[tag]["tweetcount"] += 1
                else:
                    self.addsingle(tag)

        data = LinkNodesProfiler.report(self)
        return data
コード例 #2
0
ファイル: d3cotags.py プロジェクト: jeffreymoro/twarc-report
    def report(self):
        # for tags below the threshold, replace with "-OTHER"
        # which is not necessary if threshold is 0
        if self.threshold > 0:
            countkeys = self.counts.keys()
            for countkey in countkeys:
                if self.counts[countkey] < self.threshold:
                    # for a tag whose count is below the threshold, transfer its
                    # count to tag "-OTHER" and delete it
                    if self.keepother:
                        self.counts["-OTHER"] += self.counts[countkey]
                    del self.counts[countkey]
                else:
                    # otherwise add it to list of keepers
                    self.keepers.add(countkey)
            if self.keepother:
                self.keepers.add("-OTHER")
            # keepers now has a complete set of surviving tags

        # now process hashtags in tweets again, replacing any tag not in keepers with -OTHER
        self.counts = Counter()
        for savetweet in self.savetweets:
        
            # cleantags gathers unique, lower-cased tags for this tweet
            cleantags = set()
        
            for tag in savetweet:
                if self.threshold == 0 or tag in self.keepers:
                    cleantags.add(tag)
                else:
                    if self.keepother:
                        cleantags.add("-OTHER")
                
            # sort tags and remove tags that are in the exclude set 
            cleantags = sorted(cleantags.difference(self.exclude))
            
            # generate all pairs
            for c in itertools.combinations(cleantags, 2):
                self.addlink(c[0], c[1])
                if self.reciprocal:
                    self.addlink(c[1], c[0])
            
            # if this tag is the only one we're including from this tweet,
            # then there won't be any combinations, and so it won't have
            # been added to self.nodes by addlink: so add it.

            # add to tweet count for this tag
            for tag in cleantags:
                if tag in self.nodes:
                    self.nodes[tag]["tweetcount"] += 1
                else:
                    self.addsingle(tag)
                
        data = LinkNodesProfiler.report(self)
        return data;        
コード例 #3
0
ファイル: d3cotags.py プロジェクト: wenzi/twarc-report
 def __init__(self, opts):
     LinkNodesProfiler.__init__(self, opts)
     self.savetweets = []
     self.counts = Counter()
     self.keepers = set()
コード例 #4
0
ファイル: d3graph.py プロジェクト: ruebot/twarc-report
 def report(self):
     return LinkNodesProfiler.report(self)
コード例 #5
0
ファイル: d3graph.py プロジェクト: ruebot/twarc-report
 def __init__(self, opts):
     LinkNodesProfiler.__init__(self, opts)
コード例 #6
0
ファイル: d3graph.py プロジェクト: jeffreymoro/twarc-report
 def report(self):
     return LinkNodesProfiler.report(self)
コード例 #7
0
ファイル: d3graph.py プロジェクト: jeffreymoro/twarc-report
 def __init__(self, opts):
     LinkNodesProfiler.__init__(self, opts)
コード例 #8
0
ファイル: d3cotags.py プロジェクト: jeffreymoro/twarc-report
 def __init__(self, opts):
     LinkNodesProfiler.__init__(self, opts)
     self.savetweets = []
     self.counts = Counter()
     self.keepers = set()