def buildManyModels(self):
        '''
      Uses every combination of the parameters specified below to create a
      MaximumEntropyClassifier, train it, and evaluate it
      '''
        all_filesubsets = [2000, 4000, 6000]

        all_min_occurences = [3, 5, 7]
        max_iter = 4
        all_grams = [[1], [1, 2]]

        for filesubset in all_filesubsets:
            for min_occurence in all_min_occurences:
                for grams in all_grams:
                    self.maxent_args = {
                        'filesubset': filesubset,
                        'min_occurences': min_occurence,
                        'max_iter': max_iter,
                        'grams': grams
                    }
                    ent = MaximumEntropyClassifier(self.rawfname,
                                                   **self.maxent_args)
                    print 'About to train with', self.maxent_args
                    ent.trainClassifier()
                    self.evaluate(ent)
    def run(self):
        '''
        Trains a MaximumEntropyClassifier using <self.maxent_args> and evaluates
        the trained model
        '''
        ent = MaximumEntropyClassifier(self.rawfname, **self.maxent_args)
        print 'Initialized classifier, about to train...'
        ent.trainClassifier()

        self.evaluate(ent)
    def run(self):
        '''
        Trains a MaximumEntropyClassifier using <self.maxent_args> and evaluates
        the trained model
        '''
        ent = MaximumEntropyClassifier(self.rawfname, **self.maxent_args)
        print 'Initialized classifier, about to train...'
        ent.trainClassifier()

        self.evaluate(ent)
    def runFromPickle(self, picklefile):

        f = open(picklefile, "rb")
        ent_model = pickle.load(f)
        f.close()

        print 'Loaded classifier from', picklefile
        ent = MaximumEntropyClassifier(self.rawfname, **self.maxent_args)
        ent.setModel(ent_model)

        return self.evaluate(ent)
    def runFromPickle(self, picklefile):
      '''
      Opens the NLTK model stored in <picklefile> and uses that model for evaluation
      '''
      f = open(picklefile, "rb")
      # Pickle stores an NLTK model
      ent_model = pickle.load(f)
      f.close()

      print 'Loaded classifier from', picklefile
      ent = MaximumEntropyClassifier(self.rawfname, **self.maxent_args)
      ent.setModel(ent_model)

      # Return everything but the classifer string
      return self.evaluate(ent)[1:]
    def runFromPickle(self, picklefile):
        '''
      Opens the NLTK model stored in <picklefile> and uses that model for evaluation
      '''
        f = open(picklefile, "rb")
        # Pickle stores an NLTK model
        ent_model = pickle.load(f)
        f.close()

        print 'Loaded classifier from', picklefile
        ent = MaximumEntropyClassifier(self.rawfname, **self.maxent_args)
        ent.setModel(ent_model)

        # Return everything but the classifer string
        return self.evaluate(ent)[1:]
    def buildManyModels(self):

        all_filesubsets = [2000, 4000, 6000]

        all_min_occurences = [3, 5, 7]
        max_iter = 4
        all_grams = [[1], [1, 2]]

        for filesubset in all_filesubsets:
            for min_occurence in all_min_occurences:
                for grams in all_grams:
                    self.maxent_args = {
                        'filesubset': filesubset,
                        'min_occurences': min_occurence,
                        'max_iter': max_iter,
                        'grams': grams
                    }
                    ent = MaximumEntropyClassifier(self.rawfname,
                                                   **self.maxent_args)
                    ent.trainClassifier()
                    self.evaluate(ent)
    def buildManyModels(self):
      '''
      Uses every combination of the parameters specified below to create a
      MaximumEntropyClassifier, train it, and evaluate it
      '''
      all_filesubsets = [2000, 4000, 6000]

      all_min_occurences = [3, 5, 7]
      max_iter = 4
      all_grams = [[1], [1,2]]

      for filesubset in all_filesubsets:
        for min_occurence in all_min_occurences:
          for grams in all_grams:
            self.maxent_args = {
              'filesubset' : filesubset,
              'min_occurences' : min_occurence,
              'max_iter' : max_iter,
              'grams' : grams
            }
            ent = MaximumEntropyClassifier(self.rawfname, **self.maxent_args)
            print 'About to train with', self.maxent_args
            ent.trainClassifier()
            self.evaluate(ent)
Beispiel #9
0
from maxentclassifier import MaximumEntropyClassifier
from naivebayesclassifier import NaiveBayesClassifier
import random
import csv

fname = 'training.csv'


nb = NaiveBayesClassifier(fname, grams=[1, 2])
nb.setThresholds(neg=1.0, pos=20.0)
nb.setWeight(0.000000000005)
nb.trainClassifier()
ment = MaximumEntropyClassifier(fname)
ment.trainClassifier()
classifiers = [nb, ment]

def csvdata_to_list(data):
    d=[]
    for row in data:
        d.append(row)
    return d

def search(text,data):
    output = []
    i=0
    for d in data:
        
        if d[0].lower().find(text) != -1:
           
            output.append([])
            output[i].append(d[0])
import tweepy
import os


from maxentclassifier import MaximumEntropyClassifier
from naivebayesclassifier import NaiveBayesClassifier

# name of training set file
fname = 'trainingandtestdata/training.csv'

# train classifiers here first
nb = NaiveBayesClassifier(fname, grams=[1,2])
nb.setThresholds(neg=1.0, pos=20.0)
nb.setWeight(0.000000000005)
nb.trainClassifier()
ment = MaximumEntropyClassifier(fname)
ment.trainClassifier()
classifiers = [nb, ment]


class MainHandler(tornado.web.RequestHandler):
    '''
    Handles request to main page
    '''
    def get(self):
        query = self.get_argument("query", "").strip()
        cchosen = int(self.get_argument("classifier-type", 0))

        auth = tweepy.OAuthHandler ("Yd1EFIv3psmpXdhR3lPVjUXva","WcmeKDjoaD3suYMQbgIyXTTtKcaDvws4h5cFwmlBy7jgDMxO9E")
        auth.set_access_token ("797867203-pm0v4oRKAe6EThRKeAq2H8tPruUUHepzhrTuXdbB", "0HpeNMR6P00UFqMwHvadZxvhXGzIonDxW1LdMHSH5AOQR")
        api = tweepy.API(auth)
    def run(self):

        ent = MaximumEntropyClassifier(self.rawfname, **self.maxent_args)
        ent.trainClassifier()

        self.evaluate(ent)