def test_save_load(self):
        """Test saving and loading with blog classifier"""
        train, test = self.split_blogs_corpus(BlogFeatures)
        classifier = NaiveBayes()
        classifier.train(train)
        classifier.save("model")

        class2 = NaiveBayes()
        class2.load("model")
        self.assertGreater(accuracy(class2, test), 0.55)
    def test_save_load_blogs_bag(self):
        train, test = self.split_blogs_corpus(BagOfWords)
        classifier = NaiveBayes()
        classifier.train(train)
        classifier.save('trained_model.p')

        c2 = NaiveBayes()
        c2.load('trained_model.p')

        self.assertEqual(classifier.model, c2.model)
        self.assertEqual(classifier.priorCount, c2.priorCount)
        self.assertEqual(classifier.countPerFeature, c2.countPerFeature)
Beispiel #3
0
			tokens += nltk.regexp_tokenize(l,pattern="\w+")
		data = util.del_dup(tokens)
		if filepath[-4:-1] == 'neg':
			ins = Instance(filename,'negative',data,tokens)	
		elif filepath[-4:-1] == 'pos':
			ins = Instance(filename,'positive',data,tokens)
		else: 
			raise Exception, "Wrong path!"
		ins_list.append(ins)
	f.close()
    	return ins_list

instance_list = load_instance('txt_sentoken/neg/')
instance_list += load_instance('txt_sentoken/pos/')
#random split train-test
p0 = random.random()
p1 = 1-p0
prop = [p0,p1]
#prop = [0.5,0.5]

accuracy = split_train_test(nb,instance_list,prop,ID,limits)
   
nb.save("movie_review_classifier.json")
nb1 = NaiveBayes.load("movie_review_classifier.json")

accuracy = split_train_test(nb,instance_list,prop,ID,limits)

f = open('results/results'+repr(ID)+'.txt','a+w')
f.write(repr(p0)+' '+repr(accuracy) + '\n')
f.close()