Ejemplo n.º 1
0
 def predict(self, username):
     prediction_dict = {}
     solr_user =  self.get_solr_user(username)
     if solr_user:
         print 'got data'
         prediction_dict = solr_user
     else:
         user = self.ie.extract(username, self.tweet_count)
         user_fe = FeaturesExtractor(user)
         user_features = user_fe.get_features()
         predicted = self.pp.predict(user_features)
         preprocessor = Preprocessor(user.tweets_text)
         prediction_dict = {
              username: {
                  'f': user_features,
                  'o': predicted['o'],
                  'c': predicted['c'],
                  'e': predicted['e'],
                  'a': predicted['a'],
                  'n': predicted['n'],
                  'tweets': user.tweets_text,
                  'top_words': preprocessor.most_used_words(max_count=self.search_config['top_words']),
                  'hashtags': preprocessor.most_used_hashtags(max_count=self.search_config['top_hashtags']),
                  'bigrams': preprocessor.most_used_bigrams(max_count=self.search_config['top_bigrams'])
                  }
              }
         if prediction_dict[username]['hashtags'] == '':
             prediction_dict[username]['hashtags'] = '$'
         if prediction_dict[username]['bigrams'] == '':
             prediction_dict[username]['bigrams'] = '$'
         if prediction_dict[username]['top_words'] == '':
             prediction_dict[username]['top_words'] = '$'
         if prediction_dict[username]['tweets'] == '':
             prediction_dict[username]['tweets'] = '$'
         solr_dict = FeaturesConverter.convert_features_to_solr(prediction_dict)
         self.solr.addUser(solr_dict[0])
     similar = self.solr.getSimilarUsers(username, tf=self.search_config['tf'], df=self.search_config['df'], count=self.search_config['similar'])
     prediction_dict[username].update({
         'similar': similar,
         'top_words': self.split_data(prediction_dict[username]['top_words']),
         'hashtags': self.split_data(prediction_dict[username]['hashtags']),
         'bigrams': self.split_bigrams(prediction_dict[username]['bigrams']),
         })
     return prediction_dict
Ejemplo n.º 2
0
    def load_users(self):
        i = 0
        to_remove = []
        for username in self.data:
            if i == self.train_count:
                break
            solr_data = self.get_solr_user(username)
            if solr_data:
                self.data[username] = solr_data[username]
                i += 1
                continue

            try:
                user = self.ie.extract(username, self.tweet_count)
                fe = FeaturesExtractor(user)
                self.data[username]['f'] = fe.get_features()
                if user.tweets_text == '':
                    continue
                self.data[username]['tweets'] = user.tweets_text
                preprocessor = Preprocessor(user.tweets_text)
                self.data[username]['top_words'] = preprocessor.most_used_words(max_count=self.search_config['top_words'])
                self.data[username]['hashtags'] = preprocessor.most_used_hashtags(max_count=self.search_config['top_hashtags'])
                self.data[username]['bigrams'] = preprocessor.most_used_bigrams(max_count=self.search_config['top_bigrams'])
                if self.data[username]['hashtags'] == '':
                    self.data[username]['hashtags'] = '$'
                if self.data[username]['bigrams'] == '':
                    self.data[username]['bigrams'] = '$'
                if self.data[username]['top_words'] == '':
                    self.data[username]['top_words'] = '$'
                solr_dict = FeaturesConverter.convert_features_to_solr({username: self.data[username]})
                self.solr.addUser(solr_dict[0])
                i += 1
            except ZeroDivisionError:
                if username in self.data:
                    to_remove.append(username)
            except TweepError:
                if username in self.data:
                    to_remove.append(username)
        for uname in to_remove:
            if uname in self.data:
                del self.data[uname]
Ejemplo n.º 3
0
 def get_solr_user(self, username):
     solr_user =  self.solr.getUser(username)
     if solr_user:
         return FeaturesConverter.convert_solr_to_features(solr_user)
     else:
         return None