Example #1
0
File: NB.py Project: Huarong/WSD
    def predict(self,trainDir,testDir,dirOut,smooth_rate=0.001):
        names = util.readNames(testDir+"namefile")
        if(0==len(names)):
            names = util.readDir(testDir)

        outfile = dirOut + "Result_NB_" + str(time.ctime())
        fout = open(outfile,"w")

        for name in names:
            print "Predicting",name
            infile = trainDir + name
            self.infile = infile
            self.loadFeature()
            self.createVocabList()
            self.learn(smooth_rate)

            self.infile = testDir + name
            self.loadFeature("test")

            for i,tesfFeatures in enumerate(self.testList):
                testVec = self.bagOfWords2Vec(tesfFeatures)
                result = self.classify(testVec)
                fout.write(name+" "+self.testName[i]+" "+result+"\n")

        fout.close()

        return outfile      # for evaluate
Example #2
0
File: NB.py Project: mk2908/WSD
def main():

    smooth_rate = 0.0001
    '''
    #------------------------------For Feature Extractor------------------------------
    extractor = Extractor()
    extractor.extract("../corpus/train_corpus.xml", "../train/", "train", 7, 3, 2, " | ")
    extractor.extract("../corpus/test_corpus.xml", "../test/", "test", 7, 3, 2," | ")
    '''
    names = util.readNames("../test/namefile")
    '''
    #------------------------------For Random Validation-------------------------------
    fout = open("../result/Tune_Result "+str(time.ctime())+".csv","a")
    results = []
    for name in names:
        infile = "../train/"+name

        nb = NaiveBayes(infile)
        print "---------",name,"----------"
        result = nb.Random_Cross_Validation(20,4,smooth_rate)
        results.append(result)
        reStr = name + "," + str(result) + "\n"
        fout.write(reStr)
    fout.close()
    print "Macro AVG:",sum(results)/len(results)

    '''
    #------------------------------------For Test--------------------------------------
    for name in names:
        print name
    nb2 = NaiveBayes()
    resultfile = nb2.predict("../train/", "../test/", "../result/",
                             smooth_rate)
    util.evaluate(resultfile, "../result/test_answer")
Example #3
0
File: NB.py Project: mk2908/WSD
    def predict(self, trainDir, testDir, dirOut, smooth_rate=0.001):
        names = util.readNames(testDir + "namefile")
        if (0 == len(names)):
            names = util.readDir(testDir)

        outfile = dirOut + "Result_NB_" + str(time.ctime())
        fout = open(outfile, "w")

        for name in names:
            print "Predicting", name
            infile = trainDir + name
            self.infile = infile
            self.loadFeature()
            self.createVocabList()
            self.learn(smooth_rate)

            self.infile = testDir + name
            self.loadFeature("test")

            for i, tesfFeatures in enumerate(self.testList):
                testVec = self.bagOfWords2Vec(tesfFeatures)
                result = self.classify(testVec)
                fout.write(name + " " + self.testName[i] + " " + result + "\n")

        fout.close()

        return outfile  # for evaluate
Example #4
0
    def predict(self, trainDir, testDir, dirOut, HiddenNum, itNum,
                learningRate):
        '''
        trainDir    : the directory of train data;
        testDir     : the directory of test data;
        dirOut      : the directory for the result file
        HiddenNum   : the number of the nodes in the hidden layer
        itNum       : the times for iterate
        learningRate: the learn Rate for each iteration
        '''
        names = util.readNames(testDir + "namefile")
        if (0 == len(names)):
            names = util.readDir(testDir)

        outfile = dirOut + "Result_ANN_" + str(time.ctime())
        fout = open(outfile, "w")

        for name in names:
            self.trainfile = trainDir + name
            self.testfile = testDir + name

            examplars = self.getExamplars()
            inputs = self.getInputs()

            outNum = len(self.ClassOrderList)
            inNum = len(self.VocabOrderList)
            #    hiddenNum =int(inNum/20)

            bpNet = ANN.BackPropNet()
            bpNet.addinput(inNum)
            bpNet.addhidden(HiddenNum)
            bpNet.addouput(outNum)

            print "Learning", name
            bpNet.learn(examplars, itNum, learningRate)
            print "Predict", name
            results = bpNet.run(inputs)
            print "Writting result..."
            for i, e in enumerate(self.testName):

                string = name + " " + e
                index = results[i].index(max(results[i]))
                string = string + " " + self.ClassOrderList[index] + "\n"
                fout.write(string)
                fout.flush()
        fout.close()
        print "Finished Predicting"
        return outfile
Example #5
0
    def predict(self,trainDir,testDir,dirOut,HiddenNum,itNum,learningRate):
        '''
        trainDir    : the directory of train data;
        testDir     : the directory of test data;
        dirOut      : the directory for the result file
        HiddenNum   : the number of the nodes in the hidden layer
        itNum       : the times for iterate
        learningRate: the learn Rate for each iteration
        '''
        names = util.readNames(testDir+"namefile")
        if(0==len(names)):
            names = util.readDir(testDir)

        outfile = dirOut + "Result_ANN_" + str(time.ctime())
        fout = open(outfile,"w")

        for name in names:
            self.trainfile = trainDir + name
            self.testfile = testDir + name

            examplars = self.getExamplars()
            inputs = self.getInputs()

            outNum = len(self.ClassOrderList)
            inNum = len(self.VocabOrderList)
        #    hiddenNum =int(inNum/20)

            bpNet=ANN.BackPropNet()
            bpNet.addinput(inNum)
            bpNet.addhidden(HiddenNum)
            bpNet.addouput(outNum)

            print "Learning",name
            bpNet.learn(examplars,itNum,learningRate)
            print "Predict",name
            results = bpNet.run(inputs)
            print "Writting result..."
            for i, e in enumerate(self.testName):

                string = name + " " + e
                index = results[i].index(max(results[i]))
                string = string + " " + self.ClassOrderList[index] + "\n"
                fout.write(string) 
                fout.flush()
        fout.close()
        print "Finished Predicting"  
        return outfile
Example #6
0
    def predict(self,trainDir,testDir,dirOut):
        names = util.readNames(testDir+"namefile")
        if(0==len(names)):
            names = util.readDir(testDir)

        outfile = dirOut + "Result_ANN_" + str(time.ctime())
        fout = open(outfile,"w")

        for name in names:
            self.trainfile = trainDir + name
            self.testfile = testDir + name

            examplars = self.getExamplars()
            inputs = self.getInputs()

            outNum = len(self.ClassOrderList)
            inNum = len(self.VocabOrderList)
            hiddenNum =int(inNum/20)

            bpNet=ANN.BackPropNet()
            bpNet.addinput(inNum)
            bpNet.addhidden(5)
            bpNet.addouput(outNum)

            print "Learning",name
            bpNet.learn(examplars,40)
            print "Predict",name
            results = bpNet.run(inputs)
            print "Writting result..."
            for i, e in enumerate(self.testName):

                string = name + " " + e
                index = results[i].index(max(results[i]))
                string = string + " " + self.ClassOrderList[index] + "\n"
                fout.write(string) 
                fout.flush()
        fout.close()
        print "Finished Predicting"  
        return outfile
Example #7
0
    def predict(self, trainDir, testDir, dirOut):
        names = util.readNames(testDir + "namefile")
        if (0 == len(names)):
            names = util.readDir(testDir)

        outfile = dirOut + "Result_ANN_" + str(time.ctime())
        fout = open(outfile, "w")

        for name in names:
            self.trainfile = trainDir + name
            self.testfile = testDir + name

            examplars = self.getExamplars()
            inputs = self.getInputs()

            outNum = len(self.ClassOrderList)
            inNum = len(self.VocabOrderList)
            hiddenNum = int(inNum / 20)

            bpNet = ANN.BackPropNet()
            bpNet.addinput(inNum)
            bpNet.addhidden(5)
            bpNet.addouput(outNum)

            print "Learning", name
            bpNet.learn(examplars, 40)
            print "Predict", name
            results = bpNet.run(inputs)
            print "Writting result..."
            for i, e in enumerate(self.testName):

                string = name + " " + e
                index = results[i].index(max(results[i]))
                string = string + " " + self.ClassOrderList[index] + "\n"
                fout.write(string)
                fout.flush()
        fout.close()
        print "Finished Predicting"
        return outfile
Example #8
0
File: NB.py Project: Huarong/WSD
def main():

    smooth_rate = 0.0001

    '''
    #------------------------------For Feature Extractor------------------------------
    extractor = Extractor()
    extractor.extract("../corpus/train_corpus.xml", "../train/", "train", 7, 3, 2, " | ")
    extractor.extract("../corpus/test_corpus.xml", "../test/", "test", 7, 3, 2," | ")
    '''
    names = util.readNames("../test/namefile")
    

    '''
    #------------------------------For Random Validation-------------------------------
    fout = open("../result/Tune_Result "+str(time.ctime())+".csv","a")
    results = []
    for name in names:
        infile = "../train/"+name

        nb = NaiveBayes(infile)
        print "---------",name,"----------"
        result = nb.Random_Cross_Validation(20,4,smooth_rate)
        results.append(result)
        reStr = name + "," + str(result) + "\n"
        fout.write(reStr)
    fout.close()
    print "Macro AVG:",sum(results)/len(results)

    '''
    #------------------------------------For Test--------------------------------------
    for name in names:
        print name
    nb2 = NaiveBayes()
    resultfile = nb2.predict("../train/", "../test/", "../result/",smooth_rate)
    util.evaluate(resultfile, "../result/test_answer")