def rebuild_features(self): print 'Parsing tweets...' tweetparser.parse_all_files(self.parser_options) print 'Building features...' # build features for training data training_labels = read_labels(TRAINING) training_tweets = pickle.load(open(TRAINING_TWEETS, 'rb')) unigram_features = buildfeatures.build_unigram_feature_dict(training_tweets, training_labels) training_data = buildfeatures.get_feature_vectors(training_tweets, unigram_features) # save training data np.savetxt(TRAINING_DATA_FILE, training_data, delimiter=',') # build features for testing data testing_tweets = pickle.load(open(TESTING_TWEETS, 'rb')) testing_data = buildfeatures.get_feature_vectors(testing_tweets, unigram_features) np.savetxt(TESTING_DATA_FILE, testing_data, delimiter=',') # build features for development data development_tweets = pickle.load(open(DEVELOPMENT_TWEETS, 'rb')) development_data = buildfeatures.get_feature_vectors(development_tweets, unigram_features) np.savetxt(DEVELOPMENT_DATA_FILE, development_data, delimiter=',') # save unigram features processed pickle.dump(unigram_features, open(UNIGRAM_FEATURES_FILE, 'wb'), -1)
def classify_custom_tweets(self, custom_filename): if not os.path.exists(custom_filename): print 'The file ' + custom_filename + ' does not exist.' return try: print 'Parsing tweets...' custom_tweets = [] def collect(tweet): custom_tweets.append(tweet) tweetparser._parse_tweets(custom_filename, collect) labels = read_labels(custom_filename) topics = read_topics(custom_filename) print 'Building features...' unigram_features = pickle.load(open(UNIGRAM_FEATURES_FILE, 'rb')) data = buildfeatures.get_feature_vectors(custom_tweets, unigram_features) print 'Predicting labels...' labels = read_labels(custom_filename) topics = read_topics(custom_filename) print 'Results: ' + str(self.classifier.predict_testing_data(custom_tweets, data, topics, labels, RESULTS_FILE)) print 'See labels at: ' + RESULTS_FILE except: print 'Something went wrong. File may be in wrong format.'