Example #1
0
import featurize

tag1 = sys.argv[1].lower()
tag2 = sys.argv[2].lower()

outputs = {tag1: 1, tag2: -1}

tr = cPickle.load(open("vector_data.cpickle", "r"))

selected = []
for vector, tag in tr:
  if tag in outputs:
    selected.append( (outputs[tag], vector) )

random.shuffle(selected)
split = int(len(selected) * 0.80)

training_file = open("training_data.txt", "w")

for tag, vector in selected[:split]:
  training_file.write("%d %s\n" % (tag, featurize.format_features(vector)))

test_file = open("test_data.txt", "w")

for tag, vector in selected[split:]:
  v = featurize.remove_tag(tag, vector)
  test_file.write("%d %s\n" % (tag, featurize.format_features(v)))

training_file.close()
test_file.close()
Example #2
0
# This does the training from scratch each time

import os
import sys
import time

import conf
import featurize

tag1 = sys.argv[1].lower()
tag2 = sys.argv[2].lower()
text = " ".join(sys.argv[3:])

os.popen("python gen_training_test_set.py %s %s" % (tag1, tag2)).read()

cmd = os.popen("python autosvm.py training_data.txt test_data.txt")
output = cmd.read()
dir = output[output.rfind(":") + 1:].strip()

vector = featurize.vectorize(text)
line = featurize.format_features(vector)

f = dir + "%f.classify" % time.time()
open(f, "w").write(line)

os.popen(conf.LIBSVM_DIR + "svm-predict %s %s %s" % (f, dir + "model", f + ".predict")).read()
if open(f + ".predict", "r").read().split()[0] == "-1":
  print tag2
else:
  print tag1