Beispiel #1
0
def test(classif,
         n=1,
         train_size=500,
         mode='k',
         iterations=1,
         dataset='',
         extra_dataset=None,
         limit=None,
         binary=False,
         idf=False,
         negation=True):
    (pos_dir, neg_dir) = select_dataset(dataset)
    if extra_dataset:
        mode = 'd'
        iterations = 1
        train_size = 1000
        test_set = dataset
    else:
        test_set = None

    print "TEST CONFIGURATION"
    print "dataset: %(dataset)s, stars: %(extra_dataset)s \nn: %(n)s, limit: %(limit)s, binary: %(binary)s, \nmode: %(mode)s, iterations: %(iterations)s, idf: %(idf)s" % {
        'n': n,
        'train_size': train_size,
        'mode': mode,
        'iterations': iterations,
        'dataset': dataset,
        'extra_dataset': extra_dataset,
        'limit': limit,
        'binary': binary,
        'idf': idf
    }

    ind = Indexes(mode=mode, iterations=iterations, train_size=train_size)
    (pos_correct, neg_correct) = (0, 0)
    for k in range(iterations):
        ind.next()
        m = TestConfiguration(classif,
                              n,
                              ind,
                              pos_dir,
                              neg_dir,
                              idf=idf,
                              test_set=test_set,
                              binary=binary,
                              limit=limit,
                              negation=negation)
        m.train()
        (pos, neg) = m.test()
        pos_correct += pos
        neg_correct += neg
    print "Results:"
    print "Positive:", round((pos_correct / iterations) * 100), "%"
    print "Negative:", round((neg_correct / iterations) * 100), "%"
    print "Total:", round(
        (neg_correct + pos_correct) / (2 * iterations) * 100), "%"
Beispiel #2
0
 def crossValidate(self, iterations, mode='k', train_size=500):
     ind = Indexes(mode=mode, iterations=iterations, train_size=train_size)
     pos_correct = 0
     neg_correct = 0
     for k in range(iterations):
         ind.next()
         self.set_index(ind)
         self.train()
         (pos, neg) = self.test()
         pos_correct += pos
         neg_correct += neg
     print "Results:"
     print "Positive:", round((pos_correct/iterations)*100), "%"
     print "Negative:", round((neg_correct/iterations)*100), "%"
     print "Total:", round((neg_correct + pos_correct)/(2*iterations)*100), "%"
Beispiel #3
0
 def crossValidate(self, iterations, mode='k', train_size=500):
     ind = Indexes(mode=mode, iterations=iterations, train_size=train_size)
     pos_correct = 0
     neg_correct = 0
     for k in range(iterations):
         ind.next()
         self.set_index(ind)
         self.train()
         (pos, neg) = self.test()
         pos_correct += pos
         neg_correct += neg
     print "Results:"
     print "Positive:", round((pos_correct / iterations) * 100), "%"
     print "Negative:", round((neg_correct / iterations) * 100), "%"
     print "Total:", round(
         (neg_correct + pos_correct) / (2 * iterations) * 100), "%"
Beispiel #4
0
def test(classif, n=1, train_size=500, mode='k', iterations=1, dataset='',
         extra_dataset=None, limit=None, binary=False, idf=False, negation=True):
    (pos_dir, neg_dir) = select_dataset(dataset)
    if extra_dataset:
        mode='d'
        iterations=1
        train_size = 1000
        test_set = dataset
    else:
        test_set = None

    print "TEST CONFIGURATION"
    print "dataset: %(dataset)s, stars: %(extra_dataset)s \nn: %(n)s, limit: %(limit)s, binary: %(binary)s, \nmode: %(mode)s, iterations: %(iterations)s, idf: %(idf)s" % {'n':n,
            'train_size':train_size,
            'mode':mode,
            'iterations':iterations,
            'dataset':dataset,
            'extra_dataset':extra_dataset,
            'limit':limit,
            'binary':binary,
            'idf':idf}

    ind = Indexes(mode=mode,iterations=iterations,train_size=train_size)
    (pos_correct, neg_correct) = (0,0)
    for k in range(iterations):
        ind.next()
        m = TestConfiguration(classif, n, ind, pos_dir, neg_dir, idf=idf,
                              test_set=test_set, binary=binary, limit=limit, negation=negation)
        m.train()
        (pos, neg) = m.test()
        pos_correct += pos
        neg_correct += neg
    print "Results:"
    print "Positive:", round((pos_correct/iterations)*100), "%"
    print "Negative:", round((neg_correct/iterations)*100), "%"
    print "Total:", round((neg_correct + pos_correct)/(2*iterations)*100), "%"
Beispiel #5
0
    train_size = 800
    mode = 'k'
    iterations = 3
    extra_dataset = None

    #    print "Bayes:"
    #    test(classifier.BayesClassifier,n=n,train_size=train_size,mode=mode,iterations=iterations,
    #            dataset=dataset,extra_dataset=extra_dataset,limit=limit,binary=binary, idf=idf, negation = negation)
    print "MaxEnt:"
    #    test(classifier.MaximumEntropyClassifier,n=n,train_size=train_size,mode=mode,iterations=iterations, dataset=dataset,extra_dataset=extra_dataset,limit=limit,binary=binary, idf=idf, negation=negation)
    #   print "SVM:"
    #    test(classifier.LinearSVMClassifier,n=n,train_size=train_size,mode=mode,iterations=iterations, dataset=dataset,extra_dataset=extra_dataset,limit=limit,binary=binary, idf=idf, negation=negation)

    mvc = MajorityVotingTester(negation)
    ind = Indexes(mode='k', iterations=3, train_size=800)
    ind.next()
    print ind
    (pos_dir, neg_dir) = select_dataset(dataset)
    m1 = TestConfiguration(classifier.BayesClassifier,
                           n=n,
                           ind=ind,
                           pos_dir=pos_dir,
                           neg_dir=neg_dir,
                           binary=binary,
                           limit=limit,
                           idf=idf)
    mvc.addClassifier(m1)

    (pos_dir, neg_dir) = select_dataset(dataset)
    m2 = TestConfiguration(classifier.MaximumEntropyClassifier,
Beispiel #6
0
    train_size = 800
    mode = 'k'
    iterations = 3
    extra_dataset=None
    

#    print "Bayes:"
#    test(classifier.BayesClassifier,n=n,train_size=train_size,mode=mode,iterations=iterations,
#            dataset=dataset,extra_dataset=extra_dataset,limit=limit,binary=binary, idf=idf, negation = negation)
    print "MaxEnt:"
#    test(classifier.MaximumEntropyClassifier,n=n,train_size=train_size,mode=mode,iterations=iterations, dataset=dataset,extra_dataset=extra_dataset,limit=limit,binary=binary, idf=idf, negation=negation)
 #   print "SVM:"
#    test(classifier.LinearSVMClassifier,n=n,train_size=train_size,mode=mode,iterations=iterations, dataset=dataset,extra_dataset=extra_dataset,limit=limit,binary=binary, idf=idf, negation=negation)

    mvc = MajorityVotingTester(negation)
    ind = Indexes(mode='k',iterations=3,train_size=800)
    ind.next()
    print ind
    (pos_dir, neg_dir) = select_dataset(dataset)
    m1 = TestConfiguration(classifier.BayesClassifier, n=n, ind=ind, pos_dir=pos_dir, neg_dir=neg_dir, binary=binary, limit=limit, idf=idf)
    mvc.addClassifier(m1)

    (pos_dir, neg_dir) = select_dataset(dataset)
    m2 = TestConfiguration(classifier.MaximumEntropyClassifier, n=n, ind=ind, pos_dir=pos_dir, neg_dir=neg_dir, binary=binary, limit=limit, idf=idf)

    mvc.addClassifier(m2)


    (pos_dir, neg_dir) = select_dataset(dataset)
    m3 = TestConfiguration(classifier.LinearSVMClassifier, n=n, ind=ind, pos_dir=pos_dir, neg_dir=neg_dir, binary=binary, limit=limit, idf=idf)
    mvc.addClassifier(m3)
Beispiel #7
0
#!/usr/bin/python

import os
import ngrams
from Indexes import Indexes
import matplotlib.pyplot as plt
from classifier import MaximumEntropyClassifier

TRAIN_SIZE = 800
n = 1

print "Maximum Entropy"
pos = os.listdir("pos")
neg = os.listdir("neg")

ind = Indexes('r',1,TRAIN_SIZE)
print "> determined Indices"
ind.next()

pos_grams = [ngrams.ngrams(n, open("pos/"+pos[i]).read()) for i in ind.get_pos_train_ind()]
pos_collapsed_grams = ngrams.top_ngrams(ngrams.collapse_ngrams(pos_grams),16165)
neg_grams = [ngrams.ngrams(n, open("neg/"+neg[i]).read()) for i in ind.get_neg_train_ind()]
neg_collapsed_grams = ngrams.top_ngrams(ngrams.collapse_ngrams(neg_grams),16165)
print "> collapsed grams"

trainingset = [([k],'pos',v) for (k,v) in pos_collapsed_grams.iteritems()]
trainingset.extend([([k],'neg',v) for (k,v) in neg_collapsed_grams.iteritems()])
m = MaximumEntropyClassifier(trainingset)
print "> created model"

pos_res = []
Beispiel #8
0
#!/usr/bin/python

import os
import ngrams
from Indexes import Indexes
import matplotlib.pyplot as plt
from classifier import MaximumEntropyClassifier

TRAIN_SIZE = 800
n = 1

print "Maximum Entropy"
pos = os.listdir("pos")
neg = os.listdir("neg")

ind = Indexes('r', 1, TRAIN_SIZE)
print "> determined Indices"
ind.next()

pos_grams = [
    ngrams.ngrams(n,
                  open("pos/" + pos[i]).read())
    for i in ind.get_pos_train_ind()
]
pos_collapsed_grams = ngrams.top_ngrams(ngrams.collapse_ngrams(pos_grams),
                                        16165)
neg_grams = [
    ngrams.ngrams(n,
                  open("neg/" + neg[i]).read())
    for i in ind.get_neg_train_ind()
]