Example #1
0
 def get_correctly_classified_tweets(self, tweets_and_sentiment):
     """
     Classifies the given set of tweets and returns the ones that were correctly classified.
     """
     tweets, sentimentvalues = zip(*tweets_and_sentiment)
     if sentimentvalues!=None:
         self.test_words_and_values = sentimentvalues
     count_vector = self.vect.transform([t.text for t in tweets])
     tfidf_count = self.tfidf_transformer.transform(count_vector)
     if self.only_text_features:
         combined_vector = tfidf_count
     else:
         dict_vector = self.dict_vectorizer.transform([features.get_feature_set(t, self.featureset, v) for t,v in zip(tweets, self.test_words_and_values)])
         tfidf_dict = self.dict_transformer.transform(dict_vector)
         combined_vector = sp.hstack([tfidf_count, tfidf_dict])
             
     predictions = self.best_estimator.predict(combined_vector)
     tweets, targets = utils.make_subjectivity_targets(tweets)
     #return the tweets where the target match prediction
     correct_tweets = []
     correct_sentimentvalues = []
     for i in xrange(len(tweets)):
         if predictions[i]==targets[i]:
             correct_tweets.append(tweets[i])
             correct_sentimentvalues.append(sentimentvalues[i])
     return correct_tweets, correct_sentimentvalues
Example #2
0
 def set_feature_set(self, featureset, sentimentvalues):
     """
     Extracts and stores the given feature set for classification.
     """
     self.featureset = featureset
     if featureset=='SA' or featureset=='PA':
         self.only_text_features=True
         self.feature_set = {}
     else:
         words_and_values = sentimentvalues
         self.feature_set = [features.get_feature_set(t, self.featureset, v) for t,v in zip(self.train_tweets,words_and_values)]
     
             
Example #3
0
    def classify(self, tweets, sentimentvalues=None):
        """
        Performs the classification process on list of tweets.
        """
        if sentimentvalues!=None:
            self.test_words_and_values = sentimentvalues
        count_vector = self.vect.transform([t.text for t in tweets])
        tfidf_count = self.tfidf_transformer.transform(count_vector)
        if self.only_text_features:
            combined_vector = tfidf_count
        else:
            dict_vector = self.dict_vectorizer.transform([features.get_feature_set(t, self.featureset, v) for t,v in zip(tweets, self.test_words_and_values)])
            tfidf_dict = self.dict_transformer.transform(dict_vector)
            combined_vector = sp.hstack([tfidf_count, tfidf_dict])
                
        predictions = self.best_estimator.predict(combined_vector)

        return predictions