def testProb(self): c = Classifier(getwords) # training c.incc("Good") c.incf("hello", "Good") c.incc("Good") c.incf("world", "Good") c.incc("Good") c.incf("world", "Good") c.incc("Bad") c.incf("world", "Bad") # classify new document item = "world world wurst Wurst wurst world" self.assertEqual(c.prob(item, "Good"), 0.234375)
print "----------------------------------------------------------------" print "----------------------------------------------------------------" print "----------------------------------------------------------------" print 'Number of used trainings samples in categorie tech',countnews['tech'] print 'Number of used trainings samples in categorie notech',countnews['nontech'] print 'Number of used test samples',countnews['test'] print '--'*30 rss_classifier = Classifier() for tech in train_data["good"]: rss_classifier.train(tech, "good") for nontech in train_data["bad"]: rss_classifier.train(nontech, "bad") print "---- training finished ---------------------" for test in data: g_pb = rss_classifier.prob(test, "good") b_pb = rss_classifier.prob(test, "bad") # Normalisierung der Wahrscheinlichkeiten g_pb_n = g_pb /(g_pb + b_pb) b_pb_n = b_pb /(g_pb + b_pb) print test print "good: ", g_pb_n, " bad: ", b_pb_n rss_classifier.save("trainData")