def load_session(filename='.session'): global session try: session = util.unpickle_file(filename) except Exception, e: print "Couldn't load session: %s" % e session = {}
def sentiment_test(self, path_to_testing_data): # 1) Unpickle data posts = util.unpickle_file(path_to_testing_data) # 2) Extract features testing_set = self.lazy_apply_feautres(posts) # 3) Test with classifier errors = [] f_iter = testing_set.iterate_from(0) false_pos = false_neg = num_pos = num_neg = 0 for (post, sentiment) in posts: assert sentiment == 'positive' or sentiment == 'negative', \ 'Unknown label: \'' + sentiment + '\'' if sentiment == 'positive': num_pos += 1 else: num_neg += 1 features = f_iter.next()[0] guess = self.sentiment_classifier.classify(features) if guess != sentiment: errors.append((guess, sentiment, post)) if guess == 'positive': false_pos += 1 else: false_neg += 1 return (float(false_pos) / num_neg, float(false_neg) / num_pos, float(len(posts) - len(errors)) / len(posts), errors)
def load_session(filename='.session'): global session try: session = util.unpickle_file(filename) except Exception, e: print "Couldn't load session: %s" % e session = {}
def sentiment_train(self, path_to_labeled_data, lower_threshold=0, negative_bucket=False): self.lower_threshold = lower_threshold self.negative_bucket = negative_bucket # 1) Unpickle data posts = util.unpickle_file(path_to_labeled_data) # 2) Eliminate noise words and inflate sentiment_features self.sentiment_features = \ self.build_sentiment_features(posts) # Build the training set training_set = self.lazy_apply_feautres(posts) # 4) Train Classifier self.sentiment_classifier = \ naivebayes.NaiveBayesClassifier.train(training_set) self.sentiment_classifier.show_most_informative_features(300)