""" this is a example shows load saved model and test new samples notice in GlobalInfo.Init and other initialize functions the second parameter - LoadFromFile is true, means load trained result """ import sys, os sys.path.append(os.path.join(os.getcwd(), '../')) from pymining.math.matrix import Matrix from pymining.math.text2matrix import Text2Matrix from pymining.nlp.segmenter import Segmenter from pymining.common.global_info import GlobalInfo from pymining.common.configuration import Configuration from pymining.preprocessor.chisquare_filter import ChiSquareFilter from pymining.classifier.naive_bayes import NaiveBayes if __name__ == "__main__": config = Configuration.FromFile("conf/test.xml") GlobalInfo.Init(config, "__global__", True) txt2mat = Text2Matrix(config, "__matrix__", True) chiFilter = ChiSquareFilter(config, "__filter__", True) nbModel = NaiveBayes(config, "naive_bayes", True) [testx, testy] = txt2mat.CreatePredictMatrix("data/test.txt") [testx, testy] = chiFilter.MatrixFilter(testx, testy) [resultY, precision] = nbModel.Test(testx, testy) print precision
import sys, os sys.path.append(os.path.join(os.getcwd(), '../')) from pymining.math.matrix import Matrix from pymining.math.text2matrix import Text2Matrix from pymining.nlp.segmenter import Segmenter from pymining.common.global_info import GlobalInfo from pymining.common.configuration import Configuration from pymining.preprocessor.chisquare_filter import ChiSquareFilter from pymining.classifier.naive_bayes import NaiveBayes from pymining.classifier.lda import Lda if __name__ == "__main__": config = Configuration.FromFile("conf/test.xml") GlobalInfo.Init(config, "__global__") txt2mat = Text2Matrix(config, "__matrix__") [trainx, trainy] = txt2mat.CreateTrainMatrix("data/cluster.200") chiFilter = ChiSquareFilter(config, "__filter__") chiFilter.TrainFilter(trainx, trainy) [trainx, trainy] = chiFilter.MatrixFilter(trainx, trainy) lda = Lda(config, "lda") lda.Train(trainx, trainy, 10)
from pymining.preprocessor.data_format import * import time from time import clock as now if __name__ == "__main__": config = Configuration.FromFile("conf/test.xml") #---------------------------------------------------------------------------------1.text data----------------------------------------------------------------------------------- ### C=100 kernel=RBF p=0.03 ### Recall = 0.973913043478 Precision = 0.888888888889 Accuracy = 0.886666666667 ### F(beta=1) = 0.604792440094 F(beta=2) = 0.738299274885 AUCb = 0.786956521739 GlobalInfo.Init(config, "__global__") txt2mat = Text2Matrix(config, "__matrix__") [trainx, trainy] = txt2mat.CreateTrainMatrix("data/train.txt") chiFilter = ChiSquareFilter(config, "__filter__") chiFilter.TrainFilter(trainx, trainy) [trainx, trainy] = chiFilter.MatrixFilter(trainx, trainy) for i in range(0, trainx.nRow): if trainy[i] == 3: trainy[i] = 1 else: trainy[i] = -1 [testx, testy] = txt2mat.CreatePredictMatrix("data/test.txt") [testx, testy] = chiFilter.MatrixFilter(testx, testy) testx.nCol = trainx.nCol for i in range(0, testx.nRow): if testy[i] == 3: testy[i] = 1 else: testy[i] = -1