def convertTweetJSONToMessage(tweet, **twitter_stream_settings):
     tweetTime = getDateTimeObjectFromTweetTimestamp(tweet['created_at'])
     message = Message(tweet['user']['screen_name'], tweet['id'], tweet['text'], tweetTime)
     message.vector = Vector()
     for phrase in getPhrases(getWordsFromRawEnglishMessage(tweet['text']), twitter_stream_settings['min_phrase_length'], twitter_stream_settings['max_phrase_length']):
         if phrase not in message.vector: message.vector[phrase]=0
         message.vector[phrase]+=1
     return message
Example #2
0
 def convertTweetJSONToMessage(tweet, **twitter_stream_settings):
     tweetTime = getDateTimeObjectFromTweetTimestamp(tweet['created_at'])
     message = Message(tweet['user']['screen_name'], tweet['id'],
                       tweet['text'], tweetTime)
     message.vector = Vector()
     for phrase in getPhrases(getWordsFromRawEnglishMessage(tweet['text']),
                              twitter_stream_settings['min_phrase_length'],
                              twitter_stream_settings['max_phrase_length']):
         if phrase not in message.vector: message.vector[phrase] = 0
         message.vector[phrase] += 1
     return message
Example #3
0
 def _tweetIterator(self):
     userMap = {}
     for tweet in TwitterIterators.iterateFromFile(self.fileName + '.gz'):
         user = tweet['user']['screen_name']
         phrases = [
             phrase.replace(' ', unique_string) for phrase in getPhrases(
                 getWordsFromRawEnglishMessage(tweet['text']),
                 self.stream_settings['min_phrase_length'],
                 self.stream_settings['max_phrase_length'])
         ]
         if phrases:
             if user not in userMap: userMap[user] = ' '.join(phrases)
             else: userMap[user] += ' ' + ' '.join(phrases)
     return userMap.iteritems()
Example #4
0
 def _tweetWithTimestampIterator(self):
     userMap = defaultdict(dict)
     for tweet in TwitterIterators.iterateFromFile(self.fileName + '.gz'):
         user = tweet['user']['screen_name']
         userMap[user]['user'] = {'screen_name': user}
         userMap[user]['id'] = tweet['id']
         userMap[user]['created_at'] = tweet['created_at']
         if 'text' not in userMap[user]: userMap[user]['text'] = ' '
         phrases = [
             phrase.replace(' ', unique_string) for phrase in getPhrases(
                 getWordsFromRawEnglishMessage(tweet['text']),
                 self.stream_settings['min_phrase_length'],
                 self.stream_settings['max_phrase_length'])
         ]
         if phrases: userMap[user]['text'] += ' ' + ' '.join(phrases)
     return userMap.iteritems()
 def _tweetWithTimestampIterator(self):
         userMap = defaultdict(dict)
         for tweet in TwitterIterators.iterateFromFile(self.fileName+'.gz'):
             user = tweet['user']['screen_name']
             userMap[user]['user'] = {'screen_name': user}
             userMap[user]['id'] = tweet['id']
             userMap[user]['created_at'] = tweet['created_at']
             if 'text' not in userMap[user]: userMap[user]['text'] = ' '
             phrases = [phrase.replace(' ', unique_string) for phrase in getPhrases(getWordsFromRawEnglishMessage(tweet['text']), self.stream_settings['min_phrase_length'], self.stream_settings['max_phrase_length'])]
             if phrases: userMap[user]['text']+= ' ' + ' '.join(phrases)
         return userMap.iteritems()
 def _tweetIterator(self):
         userMap = {}
         for tweet in TwitterIterators.iterateFromFile(self.fileName+'.gz'):
             user = tweet['user']['screen_name']
             phrases = [phrase.replace(' ', unique_string) for phrase in getPhrases(getWordsFromRawEnglishMessage(tweet['text']), self.stream_settings['min_phrase_length'], self.stream_settings['max_phrase_length'])]
             if phrases:
                 if user not in userMap: userMap[user] = ' '.join(phrases)
                 else: userMap[user]+= ' ' + ' '.join(phrases)
         return userMap.iteritems()