Example #1
0
        self.start_time = time.time()

    def Train(self, training_data):
        training_list = []
        label_list = []
        for question in training_data: 
            training_list.append(question.raw_words)
            label_list.append(core.popular_tags.index(iter(question.tag_list).next()))
        
        decomposed_data = self.svd.fit_transform(self.tfidf.fit_transform(self.cv.fit_transform(training_list)))
        self.svm.fit(decomposed_data, label_list)

    def Classify(self, eval_question):
        eval_doc = self.svd.transform(self.tfidf.fit_transform(self.cv.transform([eval_question.raw_words])))
        predicted_class = core.popular_tags[self.svm.predict(eval_doc)[0]]
        print "DEBUG: Classified question, svm. %f" % (time.time() - self.start_time)
        return [predicted_class]


def knn_factory():
    return KNNClassifier()

def svm_factory():
    return OVASVMClassifier()


setup(sys.argv)

#ev.leave_one_out(knn_factory, ev.eval_tp1, './knn_eval', questions, set(truncated_popular_tags), threads=1)
ev.leave_one_out(svm_factory, ev.eval_tp1, './ova_svm_eval', questions, set(truncated_popular_tags), threads=1)
import sets
import os
import eval_classifier as ev
import preprocess_to_questions as pq
# comment out if you don't want dependency on joblib
#from joblib import Parallel, delayed

PROJECT_PATH = '/home/nclimer/autotagger/'

tags_to_consider = ['javascript', 'java', 'android', 'php', 'c#', 'python', 'jquery', 'html', 'ios']

questions = pq.read_questions(PROJECT_PATH)
questions = pq.tp1_filter(pq.filter_tags(questions, tags_to_consider))

print 'Evaulationg {} questions'.format(len(questions))
all_tags = set()
for q in questions:
  all_tags.update(q.tag_list)

print all_tags
try:
  os.mkdir('./nieve_bayse_eval')
except:
  pass

class nieve_bayse_factory:
  def __call__(self):
    return nb.NaiveBayseClassifier(1)

ev.leave_one_out(nieve_bayse_factory(), ev.eval_tp1, './nieve_bayse_eval', questions, all_tags, threads=4)
import os
import eval_classifier as ev
import binary_relevance as br
import preprocess_to_questions as pq
# comment out if you don't want dependency on joblib
#from joblib import Parallel, delayed

PROJECT_PATH = '/home/njclimer/source/csc522/'
tags_to_consider = ['javascript', 'java', 'android', 'php', 'c#', 'python', 'jquery', 'html', 'ios']
questions = pq.read_questions(PROJECT_PATH)
questions = pq.filter_tags(questions, tags_to_consider)
print 'processing {} questions.'.format(len(questions))
# questions = questions[:1000]

all_tags = set()
for q in questions:
  all_tags.update(q.tag_list)

print all_tags
try:
  os.mkdir('./nieve_bayse_br_eval_doc')
except:
  pass

def nieve_bayse_factory():
  return nb.NaiveBayseClassifier(word_frequency=False)
def binary_relevance_factory():
  return br.BinaryRelevanceClassifier(nieve_bayse_factory)
  
ev.leave_one_out(binary_relevance_factory, ev.eval_tp1, './nieve_bayse_br_eval_doc', questions, all_tags, threads=3)
import preprocess_to_questions as pq

# comment out if you don't want dependency on joblib
# from joblib import Parallel, delayed

PROJECT_PATH = "/home/njclimer/source/csc522/"

tags_to_consider = ["c++", "sql", "angularjs"]

questions = pq.read_questions(PROJECT_PATH)
questions = pq.tp1_filter(pq.filter_tags(questions, tags_to_consider))
questions = questions[:100]
print "Evaulationg {} questions".format(len(questions))
all_tags = set()
for q in questions:
    all_tags.update(q.tag_list)

print all_tags
try:
    os.mkdir("./nieve_bayse_doc_counts_eval")
except:
    pass


class nieve_bayse_factory:
    def __call__(self):
        return nb.NaiveBayseClassifier(1, word_frequency=False)


ev.leave_one_out(nieve_bayse_factory(), ev.eval_tp1, "./nieve_bayse_doc_counts_eval", questions, all_tags, threads=1)