def classify(self,classify_file): content=Preprocess([]).singlefile(open(classify_file)) result=0 max_probability=-2**32 for cur_class in range(self.class_number): cur_probability=self.p_classes[cur_class] for word in content.split(): if word in self.map: cur_probability+=self.map[word][cur_class] else: cur_probability+=self.p_unknown[cur_class] if cur_probability>max_probability: max_probability=cur_probability result=cur_class return "spam" if result==0 else "ham"
import sys import os from nbpreprocess import Preprocess from nbclassify import Nbclassify from nbtraing import Nbtraing if __name__=="__main__": ##read a direcotry and use nb classify and evaluate the result using f-measure p=Preprocess(sys.argv[1:3]) p.process() nbtraing=Nbtraing(sys.argv[1:3]) nbtraing.train() classifier=Nbclassify("nb.model"); test_path=sys.argv[3] tp=0;fp=0;fn=0; for f in os.listdir(test_path): result=classifier.classify(os.path.join(test_path,f)) origin="spam" if "spm" in f else "ham" if origin==result=="spam": tp+=1 if origin=="ham" and result=="spam":fp+=1 if origin=="spam" and result=="ham":fn+=1 p=float(tp)/(tp+fp) r=float(tp)/(tp+fn) print 2*p*r/(r+p)