コード例 #1
0
import os, os.path
import sys, json
import cStringIO
sys.path.append(
    '/Users/rickardbergeling/GitHub/CS4242-Social-Media-Computing/Assignment03/python/libsvm-master/python'
)
import svm
import pickle
import dictionary
from svmutil import *
import helper as helperClass

helper = helperClass.Helper("python/data/stopwords.txt")
helper.setFeatureList(pickle.load(open("python/featureList.pkl", "rb")))

featureVectors = []
for i in range(1, len(sys.argv)):
    tweet = json.loads(sys.argv[i])
    featureVectors.append(helper.getFeatureVector(helper.clean(tweet)))

model = svm_load_model('python/tweetClassifier.model')

save_stdout = sys.stdout
sys.stdout = cStringIO.StringIO()
predictedLabels, predictionAccuracy, predictionValues = svm_predict(
    [0] * len(featureVectors), featureVectors, model)
sys.stdout = save_stdout

for label in predictedLabels:
    print dictionary.translateLabel(int(label))
コード例 #2
0
print "Translating labels..."
# Convert labels to numbers
labels = []
for l in rawLabels:
	labels.append(dictionary.translateSentiment(l))

print "Cleaning tweets..."
# Clean all the tweets, including stopword removal, stemming and custom feature removal
tweets = []
words = set()
for t in rawTweets:
	cleanedTweet = helper.clean(t)
	tweets.append(cleanedTweet)
	words = words.union(helper.extractWords(cleanedTweet))

print "Generating feature vectors..."
# Generate feature vectors for each one of the tweets
featureVectors = []
for t in tweets:
	featureVectors.append(helper.getFeatureVector(t))

p_labs, p_acc, p_vals = svm_predict(labels, featureVectors, model)

translatedLabels = []
for i in p_labs:
	translatedLabels.append(dictionary.translateLabel(int(i)))

result_output_file = "data/output/result.csv"
reader.saveSVMOutput(result_output_file, rawTweets, translatedLabels, rawLabels)
reader.getStats(result_output_file)