def predict(self,trainDir,testDir,dirOut,smooth_rate=0.001): names = util.readNames(testDir+"namefile") if(0==len(names)): names = util.readDir(testDir) outfile = dirOut + "Result_NB_" + str(time.ctime()) fout = open(outfile,"w") for name in names: print "Predicting",name infile = trainDir + name self.infile = infile self.loadFeature() self.createVocabList() self.learn(smooth_rate) self.infile = testDir + name self.loadFeature("test") for i,tesfFeatures in enumerate(self.testList): testVec = self.bagOfWords2Vec(tesfFeatures) result = self.classify(testVec) fout.write(name+" "+self.testName[i]+" "+result+"\n") fout.close() return outfile # for evaluate
def main(): smooth_rate = 0.0001 ''' #------------------------------For Feature Extractor------------------------------ extractor = Extractor() extractor.extract("../corpus/train_corpus.xml", "../train/", "train", 7, 3, 2, " | ") extractor.extract("../corpus/test_corpus.xml", "../test/", "test", 7, 3, 2," | ") ''' names = util.readNames("../test/namefile") ''' #------------------------------For Random Validation------------------------------- fout = open("../result/Tune_Result "+str(time.ctime())+".csv","a") results = [] for name in names: infile = "../train/"+name nb = NaiveBayes(infile) print "---------",name,"----------" result = nb.Random_Cross_Validation(20,4,smooth_rate) results.append(result) reStr = name + "," + str(result) + "\n" fout.write(reStr) fout.close() print "Macro AVG:",sum(results)/len(results) ''' #------------------------------------For Test-------------------------------------- for name in names: print name nb2 = NaiveBayes() resultfile = nb2.predict("../train/", "../test/", "../result/", smooth_rate) util.evaluate(resultfile, "../result/test_answer")
def predict(self, trainDir, testDir, dirOut, smooth_rate=0.001): names = util.readNames(testDir + "namefile") if (0 == len(names)): names = util.readDir(testDir) outfile = dirOut + "Result_NB_" + str(time.ctime()) fout = open(outfile, "w") for name in names: print "Predicting", name infile = trainDir + name self.infile = infile self.loadFeature() self.createVocabList() self.learn(smooth_rate) self.infile = testDir + name self.loadFeature("test") for i, tesfFeatures in enumerate(self.testList): testVec = self.bagOfWords2Vec(tesfFeatures) result = self.classify(testVec) fout.write(name + " " + self.testName[i] + " " + result + "\n") fout.close() return outfile # for evaluate
def predict(self, trainDir, testDir, dirOut, HiddenNum, itNum, learningRate): ''' trainDir : the directory of train data; testDir : the directory of test data; dirOut : the directory for the result file HiddenNum : the number of the nodes in the hidden layer itNum : the times for iterate learningRate: the learn Rate for each iteration ''' names = util.readNames(testDir + "namefile") if (0 == len(names)): names = util.readDir(testDir) outfile = dirOut + "Result_ANN_" + str(time.ctime()) fout = open(outfile, "w") for name in names: self.trainfile = trainDir + name self.testfile = testDir + name examplars = self.getExamplars() inputs = self.getInputs() outNum = len(self.ClassOrderList) inNum = len(self.VocabOrderList) # hiddenNum =int(inNum/20) bpNet = ANN.BackPropNet() bpNet.addinput(inNum) bpNet.addhidden(HiddenNum) bpNet.addouput(outNum) print "Learning", name bpNet.learn(examplars, itNum, learningRate) print "Predict", name results = bpNet.run(inputs) print "Writting result..." for i, e in enumerate(self.testName): string = name + " " + e index = results[i].index(max(results[i])) string = string + " " + self.ClassOrderList[index] + "\n" fout.write(string) fout.flush() fout.close() print "Finished Predicting" return outfile
def predict(self,trainDir,testDir,dirOut,HiddenNum,itNum,learningRate): ''' trainDir : the directory of train data; testDir : the directory of test data; dirOut : the directory for the result file HiddenNum : the number of the nodes in the hidden layer itNum : the times for iterate learningRate: the learn Rate for each iteration ''' names = util.readNames(testDir+"namefile") if(0==len(names)): names = util.readDir(testDir) outfile = dirOut + "Result_ANN_" + str(time.ctime()) fout = open(outfile,"w") for name in names: self.trainfile = trainDir + name self.testfile = testDir + name examplars = self.getExamplars() inputs = self.getInputs() outNum = len(self.ClassOrderList) inNum = len(self.VocabOrderList) # hiddenNum =int(inNum/20) bpNet=ANN.BackPropNet() bpNet.addinput(inNum) bpNet.addhidden(HiddenNum) bpNet.addouput(outNum) print "Learning",name bpNet.learn(examplars,itNum,learningRate) print "Predict",name results = bpNet.run(inputs) print "Writting result..." for i, e in enumerate(self.testName): string = name + " " + e index = results[i].index(max(results[i])) string = string + " " + self.ClassOrderList[index] + "\n" fout.write(string) fout.flush() fout.close() print "Finished Predicting" return outfile
def predict(self,trainDir,testDir,dirOut): names = util.readNames(testDir+"namefile") if(0==len(names)): names = util.readDir(testDir) outfile = dirOut + "Result_ANN_" + str(time.ctime()) fout = open(outfile,"w") for name in names: self.trainfile = trainDir + name self.testfile = testDir + name examplars = self.getExamplars() inputs = self.getInputs() outNum = len(self.ClassOrderList) inNum = len(self.VocabOrderList) hiddenNum =int(inNum/20) bpNet=ANN.BackPropNet() bpNet.addinput(inNum) bpNet.addhidden(5) bpNet.addouput(outNum) print "Learning",name bpNet.learn(examplars,40) print "Predict",name results = bpNet.run(inputs) print "Writting result..." for i, e in enumerate(self.testName): string = name + " " + e index = results[i].index(max(results[i])) string = string + " " + self.ClassOrderList[index] + "\n" fout.write(string) fout.flush() fout.close() print "Finished Predicting" return outfile
def predict(self, trainDir, testDir, dirOut): names = util.readNames(testDir + "namefile") if (0 == len(names)): names = util.readDir(testDir) outfile = dirOut + "Result_ANN_" + str(time.ctime()) fout = open(outfile, "w") for name in names: self.trainfile = trainDir + name self.testfile = testDir + name examplars = self.getExamplars() inputs = self.getInputs() outNum = len(self.ClassOrderList) inNum = len(self.VocabOrderList) hiddenNum = int(inNum / 20) bpNet = ANN.BackPropNet() bpNet.addinput(inNum) bpNet.addhidden(5) bpNet.addouput(outNum) print "Learning", name bpNet.learn(examplars, 40) print "Predict", name results = bpNet.run(inputs) print "Writting result..." for i, e in enumerate(self.testName): string = name + " " + e index = results[i].index(max(results[i])) string = string + " " + self.ClassOrderList[index] + "\n" fout.write(string) fout.flush() fout.close() print "Finished Predicting" return outfile
def main(): smooth_rate = 0.0001 ''' #------------------------------For Feature Extractor------------------------------ extractor = Extractor() extractor.extract("../corpus/train_corpus.xml", "../train/", "train", 7, 3, 2, " | ") extractor.extract("../corpus/test_corpus.xml", "../test/", "test", 7, 3, 2," | ") ''' names = util.readNames("../test/namefile") ''' #------------------------------For Random Validation------------------------------- fout = open("../result/Tune_Result "+str(time.ctime())+".csv","a") results = [] for name in names: infile = "../train/"+name nb = NaiveBayes(infile) print "---------",name,"----------" result = nb.Random_Cross_Validation(20,4,smooth_rate) results.append(result) reStr = name + "," + str(result) + "\n" fout.write(reStr) fout.close() print "Macro AVG:",sum(results)/len(results) ''' #------------------------------------For Test-------------------------------------- for name in names: print name nb2 = NaiveBayes() resultfile = nb2.predict("../train/", "../test/", "../result/",smooth_rate) util.evaluate(resultfile, "../result/test_answer")