import os, os.path import sys, json import cStringIO sys.path.append( '/Users/rickardbergeling/GitHub/CS4242-Social-Media-Computing/Assignment03/python/libsvm-master/python' ) import svm import pickle import dictionary from svmutil import * import helper as helperClass helper = helperClass.Helper("python/data/stopwords.txt") helper.setFeatureList(pickle.load(open("python/featureList.pkl", "rb"))) featureVectors = [] for i in range(1, len(sys.argv)): tweet = json.loads(sys.argv[i]) featureVectors.append(helper.getFeatureVector(helper.clean(tweet))) model = svm_load_model('python/tweetClassifier.model') save_stdout = sys.stdout sys.stdout = cStringIO.StringIO() predictedLabels, predictionAccuracy, predictionValues = svm_predict( [0] * len(featureVectors), featureVectors, model) sys.stdout = save_stdout for label in predictedLabels: print dictionary.translateLabel(int(label))
print "Translating labels..." # Convert labels to numbers labels = [] for l in rawLabels: labels.append(dictionary.translateSentiment(l)) print "Cleaning tweets..." # Clean all the tweets, including stopword removal, stemming and custom feature removal tweets = [] words = set() for t in rawTweets: cleanedTweet = helper.clean(t) tweets.append(cleanedTweet) words = words.union(helper.extractWords(cleanedTweet)) print "Generating feature vectors..." # Generate feature vectors for each one of the tweets featureVectors = [] for t in tweets: featureVectors.append(helper.getFeatureVector(t)) p_labs, p_acc, p_vals = svm_predict(labels, featureVectors, model) translatedLabels = [] for i in p_labs: translatedLabels.append(dictionary.translateLabel(int(i))) result_output_file = "data/output/result.csv" reader.saveSVMOutput(result_output_file, rawTweets, translatedLabels, rawLabels) reader.getStats(result_output_file)