Exemple #1
0
 def nextTuple(self):
     # 停止一段时间(设置状态位)
     time.sleep(15)
     batch = 10
     bases = ts.get_stock_basics()
     code_list = bases.index
     total = code_list.__len__()
     batch_size = total // batch
     pool = multiprocessing.Pool(processes=batch)
     results = []
     for i in range(batch + 1):
         begin_index = i * batch_size
         end_index = (i + 1) * batch_size
         if end_index > total:
             end_index = total
         batch_data = code_list.tolist().__getslice__(
             begin_index, end_index)
         res = pool.apply_async(ts.get_realtime_quotes, (batch_data, ))
         results.append(res)
         # get_stock_hist_data_batch(code_list = batch_data,start=start,end=end,sh_df=sh_df,sz_df=sz_df,cyb_df=cyb_df,table_name=table_name)
     pool.close()
     pool.join()
     #等待执行完毕
     for item in results:
         for i, row in item.iterrows():
             code = row['code']
             sentence = random.choice(SENTENCES)
             storm.logInfo("Emiting %s" % sentence)
             storm.logInfo("Emiting code:%s row:%s" % (code, row))
             storm.emit([code, row])
 def process(self, tup):
     # Split the inbound sentence at spaces
     words = tup.values[0].split(" ")
     # Loop over words and emit
     for word in words:
       storm.logInfo("Emitting %s" % word)
       storm.emit([word])
 def process(self, tup):
     race_time = tup.values[1]
     speed_data =  [tup.values[i+2] for i in range(10)]
     anomaly_score = [tup.values[i+10] for i in range(10)]
     requests.post("http://127.0.0.1:5000", data=json.dumps({'race_time':race_time, 'speed_data': speed_data, 'anomaly_score': anomaly_score}))
     storm.logInfo("SendWebsServer Bolt data: race_time: %s, speed_data: %s, anomaly_data:%s" % (str(race_time),str(speed_data), str(anomaly_score)))
     storm.logInfo("SendWebsServer Bolt r: %s" % str(r))
Exemple #4
0
    def process(self, tup):
        # TODO:
        # Task: keep track of the top N words

        word = tup.values[0]
        count = int(tup.values[1])

        new_word_count = WordCountTuple(word, count)

        if word in self._top_N_map:
            if count > self._top_N_map[word].count:
                self._top_N_map[word].count = count
                heapq.heapify(self._top_N_heap)
                storm.logInfo("Update word: %s, count: %d" % (word, count))
        elif len(self._top_N_heap) < self._N:
            self._top_N_map[word] = new_word_count
            heapq.heappush(self._top_N_heap, new_word_count)
            storm.logInfo("Add word: %s, count: %d" % (word, count))
        else:
            smallest_word_count = self._top_N_heap[0]
            storm.logInfo(
                "Current smallest word: %s, count: %d" %
                (smallest_word_count.word, smallest_word_count.count))

            if count > smallest_word_count.count:
                del (self._top_N_map[smallest_word_count.word])
                self._top_N_map[word] = new_word_count
                heapq.heapreplace(self._top_N_heap, new_word_count)
                storm.logInfo("Add word: %s, count: %d" % (word, count))

        storm.logInfo("Top N: %s" % self.report())
        storm.emit(["top-N", self.report()])
    def initialize(self, conf, context):
        self._conf = conf
        self._context = context
        storm.logInfo("Inference bolt instance starting...")
        self.overall_rank = []
        self.last_laptime = []
        self.track_status = []
        self.pit_stop_count = []
        self.completed_laps = []
        self.elapsed_time = []
        self.best_laptime = []
        self.time_behind_leader = []
        self.time_behind_prec = []
        self.overall_best_laptime = []
        self.last_pitted_lap = []
        self.start_position = []
        self.laps_led = []
        self.best_lap = []
        self.laps_behind_leader = []
        self.laps_behind_prec = []
        self.time_step = 10

        #############################################################################################
        ## change 'sakkas' with your username
        self.model = tf.keras.models.load_model(
            '~/Storm/lap_time.prediction.h5')
Exemple #6
0
    def process(self, tup):
        '''
        TODO:
        Task: keep track of the top N words
        Hint: implement efficient algorithm so that it won't be shutdown before task finished
        the algorithm we used when we developed the auto-grader is maintaining a N size min-heap
        '''
        word = tup.values[0]
        count = float(tup.values[1])

        new_word_count = WordCount(word, count)

        if word in self._top_N_map:
            if count > self._top_N_map[word].count:
                self._top_N_map[word].count = count
                heapq.heapify(self._top_N_heap)

        #adding new elements if the element size is less than 10
        elif len(self._top_N_heap) < self._N:
            self._top_N_map[word] = new_word_count
            heapq.heappush(self._top_N_heap, new_word_count)

        #find smallest word and replace it with new word
        else:
            smallest_word_count = self._top_N_heap[0]

            if count > smallest_word_count.count:
                del (self._top_N_map[smallest_word_count.word])
                self._top_N_map[word] = new_word_count
                heapq.heapreplace(self._top_N_heap, new_word_count)
                storm.logInfo("Add word: %s, count: %d" % (word, count))

        storm.emit(["top-N", self.printvalues()])
        pass
    def process(self, tup):
        race_time = tup.values[1]
        row_data = [tup.values[i + 2] for i in range(10)]
        storm.logInfo("Inference Bolt data: race_time: %s, data: %s" %
                      (str(race_time), str(row_data)))

        # restart the event
        if race_time == 0:
            self.speed_data = []
            self.next_step_data = row_data

        # we need to have 80 seconds data at least to start detection
        elif race_time <= self.time_step:
            self.speed_data.append(self.next_step_data)
            self.next_step_data = row_data

        #normal case
        else:
            self.speed_data.append(self.next_step_data)
            self.speed_data.pop()
            self.next_step_data = row_data
            input_data = np.expand_dims(
                self.scaler.transform(np.array(self.speed_data).T), 2)
            prediction = self.scaler.inverse_transform(
                self.model.predict(input_data))
            anomaly_score = np.abs(prediction[:, 0] -
                                   np.array(self.next_step_data)).tolist()

            #storm.logInfo("race_time:%s speed:%s anomaly_score %s" % (str(race_time), str(self.next_step_data), str(anomaly_score)))
            emit_data = ["word"]
            emit_data.append(race_time)
            emit_data = emit_data + self.next_step_data + anomaly_score

            storm.logInfo("Inference Bolt emiting: %s" % str(emit_data))
            storm.emit(emit_data)
    def nextTuple(self):

        race_time = self.myindex  # one record per second

        # I put sleep to simulate the event. It will be 3x faster than the normal race
        # We need at least 80 seconds data to start anomaly detection. I don't want to wait for first 80 seconds
        if race_time > 80:
            time.sleep(0.35)

        row_data = self.data.loc[self.myindex].values

        # We can only emit a list. List inside list is not allowed
        # Example emit_data: ['word', 9, 74.08, 75.02, 73.76, 77.67, 81.24, 74.63, 76.59, 74.61, 72.88, 71.91]
        # the text 'word' does not matter. If there are multiple multiple bolts to take the text, it will be equally splitted to bolts based on the text
        # for instance: 'apple' to bolt_instance1, 'banana' to bolt_instance2
        # I want them to emitted to a single bolt. Otherwise, I have to consider data order.
        emit_data = ["word"]
        emit_data.append(race_time)
        emit_data = emit_data + row_data.tolist()

        self.myindex += 1

        # start from the beginning if race ends
        if self.myindex == len(self.data):
            self.myindex = 0

        storm.logInfo("Emiting %s" % str(emit_data))
        storm.emit(emit_data)
Exemple #9
0
 def process(self, tup):
     # Split the inbound sentence at spaces
     words = tup.values[0].split(" ")
     # Loop over words and emit
     for word in words:
         storm.logInfo("Emitting-----> %s" % word)
         storm.emit([word])
Exemple #10
0
 def nextTuple(self):
     time.sleep(0.2)
     # TODO
     # Task: randomly generate sentence from sentences string array
     sentence = random.chioce(SENTENCE)
     storm.logInfo("Emitting %s" % sentence)
     storm.emit([sentence])
Exemple #11
0
    def process(self, tuple):
        id_tweet, text = tuple.values
        storm.logInfo("LT3BOLTINFO")
        storm.logInfo(text)

        json = get_res(text.encode('utf-8'))

        ''.join(json)
        json = json.split('\n')[-2]
        json_string = json.replace("'", '"')

        data = simplejson.loads(json_string)
        data['id'] = str(id_tweet)
        data['source'] = "LT3"
        data['info'] = text

        if (data['relevance_boolean'] == 1 and data['severity_boolean'] == 1):
            data['flag'] = "LT3"
        else:
            data['flag'] = "none"

        del data['relevance_boolean']
        del data['severity_boolean']

        json_string = simplejson.dumps(data)

        storm.emit([json_string])
Exemple #12
0
 def initialize(self, conf, context):
     self._conf = conf
     self._context = context
     storm.logInfo("Counter bolt instance starting...")
     self._top_words = Counter()
     self._N = 10
     self._top_N_map = {}
     self._top_N_heap = []
 def nextTuple(self):
     time.sleep(0.2)
     # TODO
     # Task: randomly generate sentence from sentences string array
     # Note: only generate one sentence in this function
     sentence = random.choice(SENTENCES)
     storm.logInfo("Emiting %s" % sentence)
     storm.emit([sentence])
Exemple #14
0
 def process(self, tup):
     # Split the inbound sentence at spaces
     words = re.split('[^a-zA-Z0-9-]', tup.values[0])
     # words = tup.values[0].split()
     # Loop over words and emit
     for word in words:
         if word:
             storm.logInfo("Emitting %s" % word)
             storm.emit([word])
Exemple #15
0
 def process(self, tup):
     # Get the word from the inbound tuple
     word = tup.values[0]
     # Increment the counter
     self._counter[word] +=1
     count = self._counter[word]
     storm.logInfo("Emitting &&&&&&&&&&&& %s:%s" % (word, count))
     # Emit the word and count
     storm.emit([word, count])
 def process(self, tup):
     # Get the word from the inbound tuple
     word = tup.values[0]
     # Increment the counter
     self._counter[word] +=1
     count = self._counter[word]
     storm.logInfo("Emitting %s:%s" % (word, count))
     # Emit the word and count
     storm.emit([word, count])
    def initialize(self, conf, context):
        self._conf = conf
        self._context = context

        storm.logInfo("Counter bolt instance starting...")

        # TODO:
        # Task: set N
        pass
 def process(self, tup):
     # TODO:
     # Task 1: make the words all lower case
     storm.logInfo("Received tuple %s" % tup)
     line = tup.values[0]
     line = line.lower()
     # Task 2: remove the common words
     if line not in self._common_words and line != " ":
         storm.logInfo("Normalized word %s" % line)
         storm.emit([line])
Exemple #19
0
 def initialize(self, conf, context):
     self._conf = conf
     self._context = context
     self._topic = 'temptopic_words'
     self._producer = KafkaProducer(
         bootstrap_servers=[
             '10.78.68.45:9092', '10.78.68.46:9092', '10.78.68.47:9092'
         ],
         value_serializer=lambda m: json.dumps(m).encode('utf-8'))
     storm.logInfo("Split bolt instance starting...")
Exemple #20
0
    def initialize(self, conf, context):
        self._conf = conf
        self._context = context
        self._complete = False

        storm.logInfo("Spout instance starting...")

        # TODO:
        # Task: Initialize the file reader
        pass
 def process(self, tup):
     # TODO
     # Task: word count
     # Hint: using instance variable to tracking the word count
     words = tup.values[0]
     
     self._counter[word] + = 1
     count = self._counter[word]
     storm.logInfo("Emitting %s:%s" % (word, count))
     storm.emit([word, count])
    def nextTuple(self):
        self._index += 1
        for msg in self._consumer:
            words = msg.value["data"]
            for word in words:
                tuple_id = str(uuid.uuid4())
                record = {"data": [word], "fail_count": 0}
                self._tuples[tuple_id] = record

                storm.logInfo("index %d - emiting: %s" % (self._index, word))
                storm.emit([word], id=tuple_id)
 def process(self, tup):
     # TODO
     # Task: split sentence and emit words
     # Hint: split on "[^a-zA-Z0-9-]"
     storm.logInfo("The tuple has format %s" % tup)
     line = tup.values[0]
     line = re.sub(r"[^a-zA-Z0-9-]", ' ', line)
     line = line.split()
     for word in line:
         storm.logInfo("Emitting %s" % word)
         storm.emit([word])
    def init_model_when_necessary(self, station, query_type):
        if not self.dictionary.has_key(station):
            self.dictionary[station] = {}
            storm.logInfo("station["+station+"] is initialized!")
        if not self.dictionary[station].has_key(query_type):
            self.dictionary[station][query_type] = self.model_creation_mapping_table[query_type](station)
            storm.logInfo("station["+station+","+ str(query_type)+"] is initialized!")

        # TODO: this function should return if there is any error in the model initialization.

        return 1
Exemple #25
0
    def process(self, tup):
        # TODO:
        # Task 1: make the words all lower case
        # Task 2: remove the common words

        word = tup.values[0]
        if word:
            word = word.lower()
            if word not in self._common_words:
                storm.logInfo("Normalize %s" % word)
                storm.emit([word])
Exemple #26
0
    def initialize(self, conf, context):
        self._conf = conf
        self._context = context
        self._complete = False

        storm.logInfo("Spout instance starting...")

        # TODO:
        # Task: Initialize the file reader
        self._path = conf['input']
        self._file_reader = open(self._path, 'r')
Exemple #27
0
    def process(self, tup):
        # Split the inbound sentence at spaces

                # process touples here
        # data = json.loads(tup)

        # words = tup.values[0].split()
        # # Loop over words and emit
        # for word in words:
        storm.logInfo("Emitting from DB BOLT")
        #     storm.emit([word])
        storm.emit([tup])
Exemple #28
0
    def process(self, tup):
        # Process block data here

        data = tup.values[0]
        data = json.loads(data)
        data = json.loads(data)
        blockData = data["result"]

        trsansaction = blockData["transactions"]
        storm.logInfo("Emitting from Transaction BOLT")
        #     storm.emit([word])
        storm.emit([trsansaction])
Exemple #29
0
    def nextTuple(self):
        # TODO:
        # Task 1: read the next line and emit a tuple for it
        # Task 2: don't forget to sleep for 1 second when the file is
        #         entirely read to prevent a busy-loop

        line = self._f.readline()

        if line:
            storm.logInfo("Emiting %s" % line)
            storm.emit([line])
        else:
            sleep(1)
Exemple #30
0
    def initialize(self, conf, context):
        self._conf = conf
        self._context = context

        storm.logInfo("Top-N bolt instance starting...")

        # TODO:
        # Task: set N
        self._nvalue = conf['topValue']
        # End
        self._countmap = {}
        self._max = 0
        self._min = 0
def getTimeValue(timestamp, entity, database):

    doc = database[entity].find_one({'Time': timestamp})
    doc = json.dumps(doc, sort_keys=True, indent=4, default=json_util.default)

    data = json.loads(doc)
    if data != None:
        storm.logInfo(
            "-------------------------------------------------------------------> ",
            data["Value"])
        return data["Value"]
    else:
        storm.logInfo("-------> 0" + str(timestamp))
        return 0
Exemple #32
0
    def initialize(self, conf, context):
        self._conf = conf
        self._context = context

        self._common_words = [
            "the", "be", "a", "an", "and", "of", "to", "in", "am", "is", "are",
            "at", "not", "that", "have", "i", "it", "for", "on", "with", "he",
            "she", "as", "you", "do", "this", "but", "his", "by", "from",
            "they", "we", "her", "or", "will", "my", "one", "all", "s", "if",
            "any", "our", "may", "your", "these", "d", " ", "me", "so", "what",
            "him", "their"
        ]

        storm.logInfo("Normalizer bolt instance starting...")
Exemple #33
0
    def initialize(self, conf, context):
        self._conf = conf
        self._context = context
        self._complete = False

        storm.logInfo("Spout instance starting...")
        #self._myreaderfile = self._conf[input.file] 
        self._myreadfilepointer = open("/tmp/data.txt")
        #storm.logInfo("%s",self._myreaderfile)
        #storm.logInfo("see this f****d up thing")

        # TODO:
        # Task: Initialize the file reader
        pass
Exemple #34
0
    def process(self, tuple):
        tweet_id = tuple.values["tweet-id"]
        sentiment = tuple.values["sentiment"]
        country = tuple.values["country"]
        hashtags = tuple.values["hashtags"]
        storm.logInfo("Received tweet with tweet id: "+str(tweet_id))
        for h in hashtags:
            try:
                db_obj = Hashtag.get(hashtag=h)
                db_obj_count = db_obj.count
                db_obj_overall_sentiment = (((db_obj.overall_sentiment * db_obj_count) + sentiment) / (db_obj.count + 1))
                db_obj_country_sentiment = db_obj.country_sentiment["country"]
                db_obj_country_sentiment = (((db_obj_country_sentiment["sentiment"] * db_obj_country_sentiment["count"]) +
                                             sentiment) / (db_obj.country_sentiment["count"] + 1))
                db_obj_country_sentiment["count"] += 1
                db_obj.count += 1
                db_obj.overall_sentiment = db_obj_overall_sentiment
                db_obj.country_sentiment = db_obj_country_sentiment
                db_obj.save()
                storm.logInfo("Updating Hashtag: "+h)

            except:

                db_obj = Hashtag(hashtag=h, overall_sentiment=sentiment, country_sentiment={country: {
                    "sentiment": sentiment, "count": 1}}, count=1)
                db_obj.save()
                storm.logInfo("Inserting New Hashtag: "+h)
 def initialize(self, conf, context):
     self._conf = conf
     self._context = context
     # Create a new counter for this instance
     self._counter = Counter()
     storm.logInfo("Counter bolt instance starting...")
    def initialize(self, conf, context):
        self._conf = conf
        self._context = context

        storm.logInfo("Spout instance starting...")
 def nextTuple(self):
     # Emit a random sentence
     sentence = random.choice(SENTENCES)
     storm.logInfo("Emiting %s" % sentence)
     storm.emit([sentence])