def modelOutput(trainFile, testFile, modelType): """ output is: (naive bayes) variable name | 'class' (tan) variable name | name of its parents # empty followed by: predict class | actual class | posterior probability (12 digits after decimal point) # empty followed by: The number of the test-set examples that were correctly classified. """ attributes, labels, instances = data_provider(trainFile) if modelType == 'n': model = Bayes(attributes, labels, instances) elif modelType == 't': model = TAN(attributes, labels, instances) else: import sys print >> sys.stderr, 'model type should be [n] or [t] !!!' sys.exit() attributes, labels, instances = data_provider(testFile) # format output part1: attribute name | 'class' model.printTree() print correctClassCnt = 0 for test in instances: result = model.classify(test) if result[0] == result[1]: correctClassCnt += 1 # format output part2: predict class | actual class | posterior probability print formatOutput(result) print # format output part3: correctly classified number of test instances print correctClassCnt
class Sentiment: def __init__(self): self.classifier = Bayes() self.seg = Seg() self.seg.load('seg.pickle') def save(self, fname): self.classifier.save(fname) def load(self, fname): self.classifier = self.classifier.load(fname) def handle(self, doc): words = self.seg.seg(doc) words = self.filter_stop(words) return words def train(self, neg_docs, pos_docs): datas = [] for doc in neg_docs: datas.append([self.handle(doc), 'neg']) for doc in pos_docs: datas.append([self.handle(doc), 'pos']) self.classifier.train(datas) def classify(self, doc): ret, prob = self.classifier.classify(self.handle(doc)) if ret == 'pos': return prob else: return 1 - prob @staticmethod def filter_stop(words): return list(filter(lambda x: x not in stop_words, words))
from bayes import Bayes # First you need to create an instance of this # algorithm and defined what field/column # you want to classify instance = Bayes("Sex") # Secondly, you will need to learn about a set # of data to train the algorithm instance.learn("static/data_test.csv") # Finally you can use your trained instance to # classify a set of data (In this example we # will find the most probable sex) print(instance.classify([6, 130, 8]))