Ejemplo n.º 1
0
 def get_unique_tweets(self, data_dict):
     # TODO: Implement filter to check if Tweet text starts with 'RT'
     """
     :param data_dict:
     :return:
     """
     flag = False
     try:
         text = data_dict['text'].encode('ascii', 'ignore').lower()
         # Check for 'retweeted_status' in metadata field to determine
         # if tweet is a retweet (1st check)
         if 'retweeted_status' not in data_dict:
             url_match = URL.match(text)
             # Check if link contains url
             if url_match:
                 match_group = url_match.group()
                 if len(self.key_list) > 0:
                     if any(match_group in item for item in self.key_list):
                         flag = True
                     if flag is False:
                         data_dict['text'] = match_group
                         print "Inserted text: " + data_dict['text'] + '\n'
                         self.key_list.append(match_group)
                         sid = SentimentIntensityAnalyzer()
                         ss = sid.polarity_scores(text)
                         print ss['compound']
                         score = ss['compound']
                         if score < 0:
                             score += (3 * score)
                         for w in GOOGLE:
                             if w in text and self.google_price >= 0:
                                 self.google_price = score
                                 self.google_text = text
                         for w in MICROSOFT:
                             if w in text and self.microsoft_price >= 0:
                                 self.microsoft_price = score
                                 self.microsoft_text = text
                         for w in FACEBOOK:
                             if w in text and self.facebook_price >= 0:
                                 self.facebook_price = score
                                 self.facebook_text = text
                         p.trigger('test_channel', 'my_event',
                                   {'google': self.google_price,
                                    'microsoft': self.microsoft_price,
                                    'facebook': self.facebook_price})
                         p.trigger('tweet_channel', 'my_event',
                                   {
                                       'google_text': self.google_text,
                                       'microsoft_text': self.microsoft_text,
                                       'facebook_text' : self.facebook_text
                                   })
                         self.google_price = 0
                         self.microsoft_price = 0
                         self.facebook_price = 0
                 else:
                     self.key_list.append(url_match.group())
     except TypeError, e:
         print >> sys.stderr, e
         self.log_error(str(e))
Ejemplo n.º 2
0
 def get_unique_tweets(self, data_dict):
     # TODO: Implement filter to check if Tweet text starts with 'RT'
     """
     :param data_dict:
     :return:
     """
     flag = False
     try:
         text = data_dict['text'].encode('ascii', 'ignore').lower()
         # Check for 'retweeted_status' in metadata field to determine
         # if tweet is a retweet (1st check)
         if 'retweeted_status' not in data_dict:
             print "Number of tweets in collection: " + \
                   str(self.stream_filter.collection.count())
             url_match = URL.match(text)
             # Check if link contains url
             if url_match:
                 match_group = url_match.group()
                 if len(self.key_list) > 0:
                     if any(match_group in item for item in self.key_list):
                         flag = True
                     if flag is False:
                         data_dict['text'] = match_group
                         print "Inserted text: " + data_dict['text'] + '\n'
                         self.key_list.append(match_group)
                         self.stream_filter.collection.insert(data_dict)
                         if self.wtf is True:
                             if os.path.isfile(self.filename):
                                 with open(self.filename, 'a') as outfile:
                                     json.dump(data_dict['text'], outfile)
                                     outfile.write('\n')
                             else:
                                 with open(self.filename, 'w') as outfile:
                                     json.dump(data_dict['text'], outfile)
                                     outfile.write('\n')
                 else:
                     self.key_list.append(url_match.group())
             else:
                 print "Inserted text: " + text
                 self.stream_filter.collection.insert(data_dict)
     except TypeError, e:
         print >> sys.stderr, e
         self.log_error(str(e))