def buildManyModels(self): ''' Uses every combination of the parameters specified below to create a MaximumEntropyClassifier, train it, and evaluate it ''' all_filesubsets = [2000, 4000, 6000] all_min_occurences = [3, 5, 7] max_iter = 4 all_grams = [[1], [1, 2]] for filesubset in all_filesubsets: for min_occurence in all_min_occurences: for grams in all_grams: self.maxent_args = { 'filesubset': filesubset, 'min_occurences': min_occurence, 'max_iter': max_iter, 'grams': grams } ent = MaximumEntropyClassifier(self.rawfname, **self.maxent_args) print 'About to train with', self.maxent_args ent.trainClassifier() self.evaluate(ent)
def run(self): ''' Trains a MaximumEntropyClassifier using <self.maxent_args> and evaluates the trained model ''' ent = MaximumEntropyClassifier(self.rawfname, **self.maxent_args) print 'Initialized classifier, about to train...' ent.trainClassifier() self.evaluate(ent)
def run(self): ''' Trains a MaximumEntropyClassifier using <self.maxent_args> and evaluates the trained model ''' ent = MaximumEntropyClassifier(self.rawfname, **self.maxent_args) print 'Initialized classifier, about to train...' ent.trainClassifier() self.evaluate(ent)
def runFromPickle(self, picklefile): f = open(picklefile, "rb") ent_model = pickle.load(f) f.close() print 'Loaded classifier from', picklefile ent = MaximumEntropyClassifier(self.rawfname, **self.maxent_args) ent.setModel(ent_model) return self.evaluate(ent)
def runFromPickle(self, picklefile): ''' Opens the NLTK model stored in <picklefile> and uses that model for evaluation ''' f = open(picklefile, "rb") # Pickle stores an NLTK model ent_model = pickle.load(f) f.close() print 'Loaded classifier from', picklefile ent = MaximumEntropyClassifier(self.rawfname, **self.maxent_args) ent.setModel(ent_model) # Return everything but the classifer string return self.evaluate(ent)[1:]
def runFromPickle(self, picklefile): ''' Opens the NLTK model stored in <picklefile> and uses that model for evaluation ''' f = open(picklefile, "rb") # Pickle stores an NLTK model ent_model = pickle.load(f) f.close() print 'Loaded classifier from', picklefile ent = MaximumEntropyClassifier(self.rawfname, **self.maxent_args) ent.setModel(ent_model) # Return everything but the classifer string return self.evaluate(ent)[1:]
def buildManyModels(self): all_filesubsets = [2000, 4000, 6000] all_min_occurences = [3, 5, 7] max_iter = 4 all_grams = [[1], [1, 2]] for filesubset in all_filesubsets: for min_occurence in all_min_occurences: for grams in all_grams: self.maxent_args = { 'filesubset': filesubset, 'min_occurences': min_occurence, 'max_iter': max_iter, 'grams': grams } ent = MaximumEntropyClassifier(self.rawfname, **self.maxent_args) ent.trainClassifier() self.evaluate(ent)
def buildManyModels(self): ''' Uses every combination of the parameters specified below to create a MaximumEntropyClassifier, train it, and evaluate it ''' all_filesubsets = [2000, 4000, 6000] all_min_occurences = [3, 5, 7] max_iter = 4 all_grams = [[1], [1,2]] for filesubset in all_filesubsets: for min_occurence in all_min_occurences: for grams in all_grams: self.maxent_args = { 'filesubset' : filesubset, 'min_occurences' : min_occurence, 'max_iter' : max_iter, 'grams' : grams } ent = MaximumEntropyClassifier(self.rawfname, **self.maxent_args) print 'About to train with', self.maxent_args ent.trainClassifier() self.evaluate(ent)
from maxentclassifier import MaximumEntropyClassifier from naivebayesclassifier import NaiveBayesClassifier import random import csv fname = 'training.csv' nb = NaiveBayesClassifier(fname, grams=[1, 2]) nb.setThresholds(neg=1.0, pos=20.0) nb.setWeight(0.000000000005) nb.trainClassifier() ment = MaximumEntropyClassifier(fname) ment.trainClassifier() classifiers = [nb, ment] def csvdata_to_list(data): d=[] for row in data: d.append(row) return d def search(text,data): output = [] i=0 for d in data: if d[0].lower().find(text) != -1: output.append([]) output[i].append(d[0])
import tweepy import os from maxentclassifier import MaximumEntropyClassifier from naivebayesclassifier import NaiveBayesClassifier # name of training set file fname = 'trainingandtestdata/training.csv' # train classifiers here first nb = NaiveBayesClassifier(fname, grams=[1,2]) nb.setThresholds(neg=1.0, pos=20.0) nb.setWeight(0.000000000005) nb.trainClassifier() ment = MaximumEntropyClassifier(fname) ment.trainClassifier() classifiers = [nb, ment] class MainHandler(tornado.web.RequestHandler): ''' Handles request to main page ''' def get(self): query = self.get_argument("query", "").strip() cchosen = int(self.get_argument("classifier-type", 0)) auth = tweepy.OAuthHandler ("Yd1EFIv3psmpXdhR3lPVjUXva","WcmeKDjoaD3suYMQbgIyXTTtKcaDvws4h5cFwmlBy7jgDMxO9E") auth.set_access_token ("797867203-pm0v4oRKAe6EThRKeAq2H8tPruUUHepzhrTuXdbB", "0HpeNMR6P00UFqMwHvadZxvhXGzIonDxW1LdMHSH5AOQR") api = tweepy.API(auth)
def run(self): ent = MaximumEntropyClassifier(self.rawfname, **self.maxent_args) ent.trainClassifier() self.evaluate(ent)