def nextTuple(self):

        race_time = self.myindex  # one record per second

        # I put sleep to simulate the event. It will be 3x faster than the normal race
        # We need at least 80 seconds data to start anomaly detection. I don't want to wait for first 80 seconds
        if race_time > 80:
            time.sleep(0.35)

        row_data = self.data.loc[self.myindex].values

        # We can only emit a list. List inside list is not allowed
        # Example emit_data: ['word', 9, 74.08, 75.02, 73.76, 77.67, 81.24, 74.63, 76.59, 74.61, 72.88, 71.91]
        # the text 'word' does not matter. If there are multiple multiple bolts to take the text, it will be equally splitted to bolts based on the text
        # for instance: 'apple' to bolt_instance1, 'banana' to bolt_instance2
        # I want them to emitted to a single bolt. Otherwise, I have to consider data order.
        emit_data = ["word"]
        emit_data.append(race_time)
        emit_data = emit_data + row_data.tolist()

        self.myindex += 1

        # start from the beginning if race ends
        if self.myindex == len(self.data):
            self.myindex = 0

        storm.logInfo("Emiting %s" % str(emit_data))
        storm.emit(emit_data)
예제 #2
0
    def process(self, tup):
        '''
        TODO:
        Task: keep track of the top N words
        Hint: implement efficient algorithm so that it won't be shutdown before task finished
        the algorithm we used when we developed the auto-grader is maintaining a N size min-heap
        '''
        word = tup.values[0]
        count = float(tup.values[1])

        new_word_count = WordCount(word, count)

        if word in self._top_N_map:
            if count > self._top_N_map[word].count:
                self._top_N_map[word].count = count
                heapq.heapify(self._top_N_heap)

        #adding new elements if the element size is less than 10
        elif len(self._top_N_heap) < self._N:
            self._top_N_map[word] = new_word_count
            heapq.heappush(self._top_N_heap, new_word_count)

        #find smallest word and replace it with new word
        else:
            smallest_word_count = self._top_N_heap[0]

            if count > smallest_word_count.count:
                del (self._top_N_map[smallest_word_count.word])
                self._top_N_map[word] = new_word_count
                heapq.heapreplace(self._top_N_heap, new_word_count)
                storm.logInfo("Add word: %s, count: %d" % (word, count))

        storm.emit(["top-N", self.printvalues()])
        pass
예제 #3
0
    def process(self, tup):
        # Load data from tuple
        data = tup.values[0]

        # Analyze data
        if data['source'] == 'twitter':
            weight = .995
            sentiment = data['data']['sentiment']
            overall_sentiment[
                0] = overall_sentiment[0] * weight + sentiment * (1 - weight)
        elif data['source'] == 'reddit':
            weight = .85
            sentiment = data['data']['sentiment']
            overall_sentiment[
                0] = overall_sentiment[0] * weight + sentiment * (1 - weight)
        elif data['source'] == 'news':
            weight = .85
            sentiment = data['data']['sentiment']
            overall_sentiment[
                0] = overall_sentiment[0] * weight + sentiment * (1 - weight)

        # Get today's date
        today = date.today()

        # Store analyzed results in DynamoDB
        table = dynamodb.Table(config['dynamodb']['analysis'])
        table.put_item(
            Item={
                'date': str(today),
                'timestamp': str(time.time()),
                'sentiment': Decimal(str(overall_sentiment[0]))
            })
        # Emit for downstream bolts
        storm.emit([data])
예제 #4
0
 def nextTuple(self):
     time.sleep(0.2)
     # TODO
     # Task: randomly generate sentence from sentences string array
     sentence = random.chioce(SENTENCE)
     storm.logInfo("Emitting %s" % sentence)
     storm.emit([sentence])
예제 #5
0
    def process(self, tup):
        # TODO:
        # Task: keep track of the top N words

        word = tup.values[0]
        count = int(tup.values[1])

        new_word_count = WordCountTuple(word, count)

        if word in self._top_N_map:
            if count > self._top_N_map[word].count:
                self._top_N_map[word].count = count
                heapq.heapify(self._top_N_heap)
                storm.logInfo("Update word: %s, count: %d" % (word, count))
        elif len(self._top_N_heap) < self._N:
            self._top_N_map[word] = new_word_count
            heapq.heappush(self._top_N_heap, new_word_count)
            storm.logInfo("Add word: %s, count: %d" % (word, count))
        else:
            smallest_word_count = self._top_N_heap[0]
            storm.logInfo(
                "Current smallest word: %s, count: %d" %
                (smallest_word_count.word, smallest_word_count.count))

            if count > smallest_word_count.count:
                del (self._top_N_map[smallest_word_count.word])
                self._top_N_map[word] = new_word_count
                heapq.heapreplace(self._top_N_heap, new_word_count)
                storm.logInfo("Add word: %s, count: %d" % (word, count))

        storm.logInfo("Top N: %s" % self.report())
        storm.emit(["top-N", self.report()])
예제 #6
0
    def process(self, tup):
        # Load data from tuple
        data = tup.values[0]
        data = json.loads(data)

        # Get today's date
        today = date.today()

        # Analyze data
        sentiment = get_sentiment_score(data['title'], data['description'])

        # Store analyzed results in DynamoDB
        table = dynamodb.Table(config['dynamodb']['news'])
        parsed_data = {
            'date': str(today),
            'timestamp': str(data['publishedAt']),
            'title': data['title'],
            'description':
            data['description'] if data['description'] != '' else ' ',
            'sentiment': Decimal(str(sentiment))
        }
        table.put_item(Item=parsed_data)

        # Emit for downstream bolts
        storm.emit([{'source': 'news', 'data': parsed_data}])
예제 #7
0
 def nextTuple(self):
     docs = self.get_docs()
     uuid = docs[0]['uuid']
     tup = [docs[0]['href'][18:]]
     self.buffer[uuid] = (tup, 0)
     emit(tup, id=uuid)
     sleep(1.0)
예제 #8
0
 def process(self, tup):
     data = tup.values[0]
     tweet = ast.literal_eval(data)
     output = real_time_batch_processing(tweet)
     table = conn_db.Table("realtime_db")
     table.put_item(Item=output)
     storm.emit([output])
 def process(self,tup):
     if tup.values:
         words = tup.values[0]
         if words:
             storm.emit([words])
     else:
         pass
 def process(self, tup):
     tweet = tup.values[0]
     tweet['tweettext'] = tweet['text']
     del tweet['text']
     tweet['sentimentscore'] = str(
         self.checkForHateWords(tweet['tweettext']))
     storm.emit([tweet])
예제 #11
0
 def process(self, tup):
   text = tup.values[1]
   #language = langid.classify(text)[0]
   #l = LangID()
   #l.train()
   language = self.l.classify(text)
   storm.emit([tup.values[0], language])
예제 #12
0
    def process(self, tup):
        segments = tup.values[1].rstrip('/').rsplit('/', 1)
        path = segments[0] if self.match(segments[-1]) else '/'.join(segments)

        event = dict(
            timestamp=tup.values[0],
            path=path
            )

        kwargs = dict(
            id=tup.values[2]
            )

        try:
            # TODO: Retrieve users from all indicies.
            events = self.es.get(self.index, kwargs['id'], 'user',
                                 preference='_primary')
            kwargs['version'] = events['_version']
            body = {'events': events['_source']['events'] + [event]}
        except NotFoundError:
            kwargs['op_type'] = 'create'
            body = {'events': [event]}

        try:
            body['rank'] = math.log10(len(body['events'])) / 2
            self.es.index(self.index, 'user', body, **kwargs)
            paths = list(event['path'] for event in body['events'])
            emit([kwargs['id'], paths])
            ack(tup)
        except TransportError:
            fail(tup)
예제 #13
0
    def process(self, tup):

        f = open("/root/Japanese.txt")
        self.stopwords = f.read().split('\n')
        f.close()
        self.m = MeCab.Tagger("-Ochasen")
        r = redis.Redis(host='localhost', port=6379, db=0)

        pid = os.getpid()
        base_path = '/var/log/takatoshi/'
        logging.basicConfig(filename=base_path + __file__ + '.log',
                            level=logging.DEBUG)
        logging.debug(datetime.now())
        logging.debug("abs path of py file: " + os.path.abspath(__file__))

        sentence = tup.values[0]
        ts = tup.values[1]

        res = self.m.parse(sentence).splitlines()[:-1]

        output = set()

        for _ in res:
            fa = _.split('\t')
            if u"名詞" in fa[3] and not fa[0] in self.stopwords:
                output.add(fa[0])

        if len(output):
            r.incr('total_' + str(ts))
            for _ in output:
                logging.debug("getting noun : " + _)
                storm.emit([_, ts])
 def __do_emit_updated(self, is_updated=True):
     #if not self.last_output or is_updated:
     #    self.last_output = ', '.join(list(self.the_top.keys()))
     #storm.emit(['top-N', self.last_output])
     #output = ', '.join(list(self.the_top.keys()))
     output = ', '.join([cand.word for cand in self.top_cands])
     storm.emit(['top-N', output])
예제 #15
0
    def process(self, tup):
        sentence = tup.values[0]
        sentence = re.sub(r"[,.;!\?]", "", sentence) # get rid of punctuation and num
        words = jieba.cut(sentence, cut_all=True)

        for word in words:
            storm.emit([word])
    def process(self, tup):
        # IDEA: do language classification here already?
        # IDEA: keep classifiers for several languages in a map
        # IDEA: batch N classifications (of the same language) for speed?

        url, meta, text, content, outlinks = tup.values
        metadata = Metadata(meta)
        out = [url, metadata, text, content, outlinks]

        try:
            lang = metadata['n52.language'][0]
        except KeyError:
            lang = 'unknown'

        if lang != 'en':
            msg = 'ignoring tuple, as document language {} is not supported'.format(
                lang)
            logging.debug(msg)
            storm.logDebug(msg)
        else:
            confidence, clazz = self.classify([text])[0]
            metadata["n52.classify.class"] = clazz
            metadata["n52.classify.confidence"] = confidence
            logging.debug([confidence, clazz, text[100:200], url])

        storm.emit(out, anchors=[tup])
예제 #17
0
 def process(self, tup):
     words = tup.values[0].split('.')
     cluster, user, job, task, pid, cpu, mem = tuple(words)
     storm.emit(words)
     logger.info(
         'cluster:%s,username:%s,jobname:%s,taskname:%s,pid:%s,cpu:%s,mem:%s',
         cluster, user, job, task, pid, cpu, mem)
예제 #18
0
    def nextTuple(self):
        try: 
            html = urllib2.urlopen(self.url_animals_tl).read()
            soup = BeautifulSoup(html)
            scripts = soup.find_all('script')
            code = scripts[len(scripts) - 1]

            content = code.contents[0].strip()
            prefix = 'P.start.start('
            i = content.find(prefix)
            if i != -1:
                json_str = content[i + len(prefix) : len(content) - 2]
                json_obj = json.loads(json_str)
                pins = json_obj['tree']['children'][3]['children'][0]['children'][0]['children']
                
                for pin in pins:
                    pin_id = pin['options']['pin_id']
                    if 'module' in pin['children'][1]['options']:
                        module = pin['children'][1]['options']['module']
                    orig_link = pin['data']['link']
                    orig_host = urlparse(pin['data']['link']).hostname
                    images = pin['data']['images']
                    if 'orig' in images:
                        pass
                    storm.emit([pin_id, orig_link, orig_host, json.dumps(pin, indent=4, sort_keys=True), self.category])
                
            time.sleep(2)
        except StopIteration:
            pass
        except urllib2.HTTPError, err:
            if err.code == 404:
                pass
 def process(self, tup):
     # Split the inbound sentence at spaces
     words = tup.values[0].split(" ")
     # Loop over words and emit
     for word in words:
       storm.logInfo("Emitting %s" % word)
       storm.emit([word])
예제 #20
0
 def process(self,tup):
     # storm.log("Dispel4Py ------> %s: Received block." % (self.script.id, ))
     try:
         inputname = self.inputmapping[tup.component][tup.stream]
         storm.log("Dispel4Py ------> %s: Received block at input '%s'" % (self.script.id, inputname, ))
         # inputs = tup.values
         inputs = decode_types(tup.values)
         outputs = self.script.process( { inputname : inputs })
         # storm.log("Dispel4Py ------> %s: Processing complete." % self.scriptname)
         
         if outputs is None:
             return
         for streamname, output in outputs.iteritems():
             result = output if isinstance(output, list) else [output]
             try:
                 storm.emit(result, stream=streamname)
                 storm.log("Dispel4Py ------> %s: Emitted to stream %s: %s" % (self.script.id, streamname, str(result)[:200]))
             except TypeError:
                 # encode manually
                 encoded = encode_types(result)
                 storm.emit(encoded, stream=streamname)
                 storm.log("Dispel4Py ------> %s: Emitted to stream %s" % (self.script.id, streamname))
             # except:
             #     storm.log("%s: %s" % (self.script.id, traceback.format_exc()))
     except:
         storm.log("Dispel4Py ------> %s: %s" % (self.script.id, traceback.format_exc(), ))
예제 #21
0
 def process(self, tup):
     word = tup.values[0];
     if (random() < 0.75):
         storm.emit([word + 'lalala'], anchors=[tup])
         storm.ack(tup)
     else:
         storm.log(word + ' randomly skipped!')
    def process(self, tup):
        race_time = tup.values[1]
        row_data = [tup.values[i + 2] for i in range(10)]
        storm.logInfo("Inference Bolt data: race_time: %s, data: %s" %
                      (str(race_time), str(row_data)))

        # restart the event
        if race_time == 0:
            self.speed_data = []
            self.next_step_data = row_data

        # we need to have 80 seconds data at least to start detection
        elif race_time <= self.time_step:
            self.speed_data.append(self.next_step_data)
            self.next_step_data = row_data

        #normal case
        else:
            self.speed_data.append(self.next_step_data)
            self.speed_data.pop()
            self.next_step_data = row_data
            input_data = np.expand_dims(
                self.scaler.transform(np.array(self.speed_data).T), 2)
            prediction = self.scaler.inverse_transform(
                self.model.predict(input_data))
            anomaly_score = np.abs(prediction[:, 0] -
                                   np.array(self.next_step_data)).tolist()

            #storm.logInfo("race_time:%s speed:%s anomaly_score %s" % (str(race_time), str(self.next_step_data), str(anomaly_score)))
            emit_data = ["word"]
            emit_data.append(race_time)
            emit_data = emit_data + self.next_step_data + anomaly_score

            storm.logInfo("Inference Bolt emiting: %s" % str(emit_data))
            storm.emit(emit_data)
예제 #23
0
 def nextTuple(self):
     docs = self.get_docs()
     uuid = docs[0]['uuid']
     tup = [docs[0]['href'][18:]]
     self.buffer[uuid] = (tup, 0)
     emit(tup, id=uuid)
     sleep(1.0)
예제 #24
0
    def process(self, tup):
        # Load data from tuple
        data = tup.values[0]
        data = json.loads(data)

        # Initialize if deque is empty
        if len(past_prices) == 0:
            past_prices.extend([float(data['price'].replace(',', ''))] *
                               LOOK_BACK)

        # Append to rolling deque and compute price
        else:
            past_prices.append(float(data['price'].replace(',', '')))

        # Check if enough elements exist
        if len(past_prices) == LOOK_BACK:

            prediction = predict(MODEL,
                                 np.array(past_prices).reshape(1, LOOK_BACK))

            # Store predicted results in DynamoDB
            table = dynamodb.Table(config['dynamodb']['prediction'])
            table.put_item(
                Item={
                    'timestamp': shift_future(data['timestamp'],
                                              FUTURE_OFFSET),
                    'price': Decimal(str(prediction)),
                })
        # Emit for downstream bolts
        storm.emit([data])
예제 #25
0
 def process(self, tup):
     dict_data = tup.values[0]
     sensor_type = tup.values[1]
     ordered_dict = self.order_dict(dict_data)
     #转列表
     values_list = list(ordered_dict.values())
     time_list = list(ordered_dict.keys())
     #训练长度
     train_len = int(len(values_list) * 0.85)
     #预测长度
     predict_len = int(len(values_list) * 0.15)
     #训练数据
     train_values = values_list[:train_len]
     #训练时间序列
     train_time = time_list[:train_len]
     #预测时间序列
     predict_time = time_list[-predict_len:]
     #列表转ndarray
     nd_values = np.array(train_values, dtype=np.float32)
     nd_time = np.array(train_time, dtype=np.float32)
     #训练
     predict = self.train_predict(nd_time, nd_values, predict_len)
     #转字典(转json)
     predict_dict = dict(
         zip(predict_time, map(lambda x: round(x, 4), predict[0])))
     storm.emit([dict_data, predict_dict, sensor_type])
예제 #26
0
 def process(self, tup):
     word = tup.values[0]
     if (random() < 0.75):
         storm.emit([word + 'lalala'], anchors=[tup])
         storm.ack(tup)
     else:
         storm.log(word + ' randomly skipped!')
예제 #27
0
 def nextTuple(self):
     try:
         input_tuple = None
         try:
             if self.counter >= self.script._num_iterations:
                 return
         except:
             try:
                 input_tuple = self.script._static_input.pop(0)
             except AttributeError:
                 # there is no static input
                 pass
             except IndexError:
                 # static input is empty - no more processing
                 return
         outputs = self.script.process(input_tuple)
         if outputs is None:
             return
         for streamname, output in outputs.iteritems():
             result = output if isinstance(output, list) else [output]
             storm.emit(result, stream=streamname, id=self.counter)
             storm.log("Dispel4Py ------> %s: emitted tuple %s to stream %s" % (self.script.id, result, streamname))
             self.counter += 1
     except:
         # logging the error but it should be passed to client somehow
         storm.log("Dispel4Py ------> %s: %s" % (self.scriptname, traceback.format_exc(), ))
예제 #28
0
    def process(self, tuple):
        '''
        Must fulfil the following contract expressed in the Java wrapper:

        declarer.declare(new Fields(TopologyFields.AUTHOR_SCREEN_NAME, TopologyFields.CREATED_AT,
                TopologyFields.FAV_COUNT, TopologyFields.HASHTAGS_TEXTS, TopologyFields.IN_REPLY_TO_SCREEN_NAME, 
                TopologyFields.LANG, TopologyFields.RETWEET_COUNT, TopologyFields.RETWEETED, 
                TopologyFields.SOURCE, TopologyFields.PLACE, TopologyFields.POSSIBLY_SENSITIVE,
                TopologyFields.TEXT, TopologyFields.TOPIC_NAME));
        '''
        place, topic_name, query = tuple.values
        try: 
            tweets = list(get_tweets.get_tweets_for_trends(self._twitter_api, [{"query" : query}], popular = True, tweet_processor = self._storm_tweet_processor))[0]["tweets"]
        except tweepy.TweepError as te:
            # We have hit the REST API Rate limit for Twitter https://dev.twitter.com/docs/rate-limiting/1.1, no more tweets for some time
            log_tweeter_error(te, sleep_time=self._rate_limit_sleep_time)
            return 

        for pt in tweets:
            # Here we add the trending topic name, and take the place name from those
            # used internally by get_tweets, instead of the from place names returned by twitter
            tup = [pt['author_screen_name'], pt['created_at'], 
                   pt['favorite_count'], pt['hashtags_texts'], pt['in_reply_to_screen_name'],
                   pt['lang'], pt['retweet_count'], pt['retweeted'],
                   pt['source'], place, pt['possibly_sensitive'],
                   pt['text'], topic_name]
            storm.emit(tup)
예제 #29
0
 def nextTuple(self):
     # 停止一段时间(设置状态位)
     time.sleep(15)
     batch = 10
     bases = ts.get_stock_basics()
     code_list = bases.index
     total = code_list.__len__()
     batch_size = total // batch
     pool = multiprocessing.Pool(processes=batch)
     results = []
     for i in range(batch + 1):
         begin_index = i * batch_size
         end_index = (i + 1) * batch_size
         if end_index > total:
             end_index = total
         batch_data = code_list.tolist().__getslice__(
             begin_index, end_index)
         res = pool.apply_async(ts.get_realtime_quotes, (batch_data, ))
         results.append(res)
         # get_stock_hist_data_batch(code_list = batch_data,start=start,end=end,sh_df=sh_df,sz_df=sz_df,cyb_df=cyb_df,table_name=table_name)
     pool.close()
     pool.join()
     #等待执行完毕
     for item in results:
         for i, row in item.iterrows():
             code = row['code']
             sentence = random.choice(SENTENCES)
             storm.logInfo("Emiting %s" % sentence)
             storm.logInfo("Emiting code:%s row:%s" % (code, row))
             storm.emit([code, row])
  def emit(self):

    if not self.entity:
      self.entity = self.getLastEntity()

    # We always emit tuple = (entity, "{json string}")
    storm.emit([self.entity,json.dumps(self.d)])
예제 #31
0
    def process(self, tuple):
        id_tweet, text = tuple.values
        storm.logInfo("LT3BOLTINFO")
        storm.logInfo(text)

        json = get_res(text.encode('utf-8'))

        ''.join(json)
        json = json.split('\n')[-2]
        json_string = json.replace("'", '"')

        data = simplejson.loads(json_string)
        data['id'] = str(id_tweet)
        data['source'] = "LT3"
        data['info'] = text

        if (data['relevance_boolean'] == 1 and data['severity_boolean'] == 1):
            data['flag'] = "LT3"
        else:
            data['flag'] = "none"

        del data['relevance_boolean']
        del data['severity_boolean']

        json_string = simplejson.dumps(data)

        storm.emit([json_string])
예제 #32
0
 def nextTuple(self):
     try:
         input_tuple = None
         try:
             input_tuple = self.script._static_input.pop(0)
         except AttributeError:
             # there is no static input
             if self.counter >= self.script._num_iterations:
                 return
         except IndexError:
             # static input is empty - no more processing
             return
         storm.log("Dispel4Py ------> %s: input %s" % (
             self.scriptname,
             input_tuple,
         ))
         outputs = self.script.process(input_tuple)
         if outputs is None:
             return
         for streamname, output in outputs.iteritems():
             result = output if isinstance(output, list) else [output]
             storm.emit(result, stream=streamname, id=self.counter)
             storm.log(
                 "Dispel4Py ------> %s: emitted tuple %s to stream %s" %
                 (self.script.id, result, streamname))
             self.counter += 1
     except:
         # logging the error but it should be passed to client somehow
         storm.log("Dispel4Py ------> %s: %s" % (
             self.scriptname,
             traceback.format_exc(),
         ))
예제 #33
0
 def process(self, tup):
     #TO DO: Add check for empty values
     if tup.values[0]:
         words = tup.values[0].split(" ")
         if words:
             for word in words:
                 storm.emit([word])
예제 #34
0
    def process(self, tup):
        # storm.log("Dispel4Py ------> %s: Received block." % (self.script.id, ))
        try:
            inputname = self.inputmapping[tup.component][tup.stream]
            storm.log("Dispel4Py ------> %s: Received block at input '%s'" % (
                self.script.id,
                inputname,
            ))
            # inputs = tup.values
            inputs = decode_types(tup.values)
            outputs = self.script.process({inputname: inputs})
            # storm.log("Dispel4Py ------> %s: Processing complete." % self.scriptname)

            if outputs is None:
                return
            for streamname, output in outputs.iteritems():
                result = output if isinstance(output, list) else [output]
                try:
                    storm.emit(result, stream=streamname)
                    storm.log(
                        "Dispel4Py ------> %s: Emitted to stream %s: %s" %
                        (self.script.id, streamname, str(result)[:200]))
                except TypeError:
                    # encode manually
                    encoded = encode_types(result)
                    storm.emit(encoded, stream=streamname)
                    storm.log("Dispel4Py ------> %s: Emitted to stream %s" %
                              (self.script.id, streamname))
                # except:
                #     storm.log("%s: %s" % (self.script.id, traceback.format_exc()))
        except:
            storm.log("Dispel4Py ------> %s: %s" % (
                self.script.id,
                traceback.format_exc(),
            ))
예제 #35
0
 def process(self, tup):
     url = tup.values[0]
     storm.log("HARing "+url)
     output = self.get_har_with_image(url)
     if output is not "FAIL":
         storm.emit(output, anchors=[tup])
         storm.ack(tup)
예제 #36
0
	def process(self, tup):
		sentence = tup.values[0].lower()
		words = sentence.split(" ")
		bow = numpy.zeros((self._n,))
		for w in words:
			bow[self.map[w]]+=1   	
		storm.emit([bow.tolist()])
예제 #37
0
 def nextTuple(self):
     id = "periodic_%s" % str(uuid4())
     body = json.dumps({'message_id': CHECK_METRIC_ALARM_MSG_ID})
     message = "Periodic monitoring message sent [%s] %s"
     self.log(message % (id, body))
     emit([None, body], id=id)
     time.sleep(60)
예제 #38
0
 def process(self, tup):
     # Split the inbound sentence at spaces
     words = tup.values[0].split(" ")
     # Loop over words and emit
     for word in words:
         storm.logInfo("Emitting-----> %s" % word)
         storm.emit([word])
예제 #39
0
    def process(self, tup):
        segments = tup.values[1].rstrip('/').rsplit('/', 1)
        path = segments[0] if self.match(segments[-1]) else '/'.join(segments)

        event = dict(timestamp=tup.values[0], path=path)

        kwargs = dict(id=tup.values[2])

        try:
            # TODO: Retrieve users from all indicies.
            events = self.es.get(self.index,
                                 kwargs['id'],
                                 'user',
                                 preference='_primary')
            kwargs['version'] = events['_version']
            body = {'events': events['_source']['events'] + [event]}
        except NotFoundError:
            kwargs['op_type'] = 'create'
            body = {'events': [event]}

        try:
            body['rank'] = math.log10(len(body['events'])) / 2
            self.es.index(self.index, 'user', body, **kwargs)
            paths = list(event['path'] for event in body['events'])
            emit([kwargs['id'], paths])
            ack(tup)
        except TransportError:
            fail(tup)
  def process(self, tup):
    # ********** Read input arguments **********

    try:
      input_args = json.loads(tup.values[0])
    except:
      input_args = tup.values[0]
    ret_info = tup.values[1]

    # ********** Calculate results **********

    # Construct required variables
    if "query_type" not in input_args:
      return storm.emit([json.dumps({"ok": False, "msg": "No `query_type` param provided!"}), ret_info])
    query_type = input_args["query_type"].lower()

    if query_type == "collections":
      answer = self.handler.describe_collections('cenote', input_args["PROJECT_ID"])
      return storm.emit([json.dumps({"ok": True, "msg": answer}), ret_info])

    timeframe_start = ""
    timeframe_end = ""
    if "timeframe_start" in input_args:
      timeframe_start = input_args["timeframe_start"]
    if "timeframe_end" in input_args:
      timeframe_end = input_args["timeframe_end"]
    info = {
      "cenote": {
        "url": "/projects/" + input_args["PROJECT_ID"] + "/queries/" + input_args["event_collection"] + "/extraction",
        "timeframe_start": timeframe_start,
        "timeframe_end": timeframe_end
      }
    }
    columns = None
    if "target_property" in input_args:
      columns = input_args["target_property"].split(",")

    # Execute corresponding query
    if query_type == "extraction":
      answer = self.reader.read_data("cenote", columns, json.dumps(info))
    elif query_type in ["count", "min", "max", "sum", "average", "median"]:
      answer = self.reader.perform_operation("cenote", columns, query_type, json.dumps(info))
    elif query_type == "percentile":
      info["cenote"]["percentile"] = int(input_args["percentile"])
      answer = self.reader.perform_operation("cenote", columns, query_type, json.dumps(info))
    else:
      answer = {"data": "Not implemented yet!"}

    # Return results
    if "response" in answer and answer["response"] == 200:
      # Hack-ia to turn "system.<someoperation>(<column>)" to "<column>"
      answer = json.loads(re.sub(r'system\.\w*\(|\)', "", json.dumps(answer, default=datetimeParser)))
      return storm.emit([json.dumps({"ok": True, "msg": answer["data"]}), ret_info])
    else:
      try:
        problem = answer["exception"]
      except:
        problem = answer["data"]
      return storm.emit([json.dumps({"ok": False, "msg": problem}), ret_info])
예제 #41
0
 def process(self, tup):
     '''We serialize the input and output by json for convenience.'''
     try:
         data = array(json.loads(tup.values[1]))
         result = self.model.predict(data)
         storm.emit([tup.values[0], json.dumps(result.tolist())])
     except:
         traceback.print_exc(file=open('/tmp/trace_svm_bolt.txt', 'a'))
예제 #42
0
 def process(self, tup):
     word = tup.values[0]
     count = self.words.get(word)
     if count == None:
         count = 0
     count = count + 1
     self.words[word] = count
     storm.emit([word, str(count)])
예제 #43
0
 def fail(self, msg_id):
     tup, retries = self.buffer[msg_id]
     if retries >= 5:
         del self.buffer[msg_id]
         log('[RabbitMQSpout] Message %s failed for good.' % msg_id)
     else:
         self.buffer[msg_id] = (tup, retries + 1)
         emit(tup, id=msg_id)
예제 #44
0
 def fail(self, cnt_id):
     tup, retries = self.buffer[cnt_id]
     if retries >= 5:
         del self.buffer[cnt_id]
         log('[ZonAPISpout] Message %s failed for good.' % cnt_id)
     else:
         self.buffer[cnt_id] = (tup, retries + 1)
         emit(tup, id=cnt_id)
예제 #45
0
 def process(self, tup):
     #TO DO: Add check for empty values
     if tup.values[0]:
     #"Try-Expect" will cause storm workers die
         words = tup.values[0].split(" ")
         if words:
             for word in words:
                 storm.emit([word])
예제 #46
0
 def process(self, tup):
     print "SplitSentenceBolt.process:tup " + str(tup)
     print "SplitSentenceBolt.process:tup.values[0] " + str(tup.values[0])
     words = tup.values[0].split(" ")
     print "SplitSentenceBolt.process:words " + str(words)
     for word in words:
       storm.emit([word])
       print "SplitSentenceBolt.process:emit " + str(word)
 def nextTuple(self):
     time.sleep(0.2)
     # TODO
     # Task: randomly generate sentence from sentences string array
     # Note: only generate one sentence in this function
     sentence = random.choice(SENTENCES)
     storm.logInfo("Emiting %s" % sentence)
     storm.emit([sentence])
예제 #48
0
    def nextTuple(self):
        try:
            line = self.f.readline()
            storm.emit([line], id=self.tuple_id)
            self.tuple_id += 1

        except EOFError:
            exit()
예제 #49
0
 def process(self,tuple):
     tweetid = tuple.values[0]
     tweet_date = tuple.values[1]
     tweettext = tuple.values[2]
     country = tuple.values[3]
     hashtags = tuple.values[4]
     tweet_text = tuple.values[5]
     score = afinn.score(tweettext)
     storm.emit([tweetid, tweet_date, score, country, hashtags, tweet_text])
예제 #50
0
 def process(self, tuple):
     id = tuple.values[0]
     url = tuple.values[1]
     # open("/home/roy/output.txt", "a").write(url + '\n')
     if url in TWEETERS_DB:
         tweeters = TWEETERS_DB[url]
         # open("/home/roy/output.txt", "a").write(str(tweeters) + '\n')
         for tweeter in tweeters:
             storm.emit([id, tweeter])
 def process(self, tup):
     # Get the word from the inbound tuple
     word = tup.values[0]
     # Increment the counter
     self._counter[word] +=1
     count = self._counter[word]
     storm.logInfo("Emitting %s:%s" % (word, count))
     # Emit the word and count
     storm.emit([word, count])
예제 #52
0
 def nextTuple(self):
     if self.count < 2:
         word = choice(words)
         id = str(uuid4())
         self.pending[id] = word
         storm.rpcMetrics("my-custom-shellspout-metric", 1)
         self.count = self.count + 1
         storm.log("TesterSpout update my-custom-shellspout-metric "+str(self.count))
         storm.emit([word], id=id)
예제 #53
0
 def process(self, tuple):
     id = tuple.values[0]
     tweeter = tuple.values[1]
     # open("/home/roy/output.txt", "a").write(str(tuple) + '\n')
     if tweeter in FOLLOWERS_DB:
         followers = FOLLOWERS_DB[tweeter]
         # open("/home/roy/output.txt", "a").write(str(followers) + '\n')
         for follower in followers:
             storm.emit([id, follower])
예제 #54
0
	def process(self, tuple):
		word = tuple.values[0]
		if word not in self.counts:
			count = 0
		else:
			count = self.counts[word]
		count += 1
		self.counts[word] = count
		storm.emit([word, count])
예제 #55
0
 def write(self, output):
     result = output if isinstance(output, list) else [output]
     try:
         storm.emit(result, stream=self.streamname)
         storm.log("Dispel4Py ------> Emitted to stream %s." % (self.scriptname, self.streamname))
     except TypeError:
         # encode manually
         encoded = encode_types(result)
         storm.emit(encoded, stream=self.streamname)
         storm.log("Dispel4Py ------> Emitted to stream %s." % (self.scriptname, self.streamname))
예제 #56
0
 def process(self, tup):
     word = tup.values[0]
     if self.counts.has_key(word):
         count = self.counts[word]
     else:
         count = 0
     count += 1
     self.counts[word] = count
     storm.log(str(word)+" "+str(count))
     storm.emit([word, count])
예제 #57
0
 def process(self, tuple):
     place = tuple.values[0]
     try: 
         trends = get_tweets.get_trending_topics_text(self._twitter_api, place)
     except tweepy.TweepError as te:
         # We have hit the REST API Rate limit for Twitter https://dev.twitter.com/docs/rate-limiting/1.1, no more tweets for some time
         log_tweeter_error(te, sleep_time=self._rate_limit_sleep_time)
         return 
     for trend in trends:
         storm.emit([place, trend['name'], trend['query']])