Example #1
0
 def initialize(self, conf, ctx):
     # Initialize redis client
     self.r = redis.Redis(host='localhost', port=6379, db=0)
     self.k = 50 #k value for topK
     
     #Initialize count Min Sketch with default delta and epsilon values
     self.sketch = Sketch(10**-7, 0.005, self.k)
Example #2
0
class TopK(Bolt):
    """
    This bolt will update Top K words to find trending topics at a given time. 
    We use two data structures to implement topk: 
    - Using countmin sketch for maintaining the frequency of each word and finding top-k
    - Using Redis to store top-k in sorted sets
    """

    def initialize(self, conf, ctx):
        # Initialize redis client
        self.r = redis.Redis(host='localhost', port=6379, db=0)
        self.k = 50 #k value for topK
        
        #Initialize count Min Sketch with default delta and epsilon values
        self.sketch = Sketch(10**-7, 0.005, self.k)

    def process(self, tup):
        word = tup.values[0]
        #self.log("Topk: %s" % word)
        #Add the word to count min sketch sketch.
        self.sketch.update(word, 1)

        #If total words in top-k less than freq, add the word directly
        if self.r.zcount("top-k", "-inf", "+inf") < self.k:
            self.r.zadd("top-k", word, self.sketch.get(word))
        else:
            w, freq = self.r.zrange("top-k", 0, 0, withscores= True)[0]

            #If word freq is higher than least freq of top-k list
            #Replace the word by current word
            if freq and freq < self.sketch.get(word):
                self.r.zrem("top-k", w)
                self.r.zadd("top-k", word, self.sketch.get(word))