import sys, os sys.path.append(os.path.join(os.getcwd(), '../')) from pymining.math.matrix import Matrix from pymining.math.text2matrix import Text2Matrix from pymining.nlp.segmenter import Segmenter from pymining.common.global_info import GlobalInfo from pymining.common.configuration import Configuration from pymining.preprocessor.chisquare_filter import ChiSquareFilter from pymining.classifier.naive_bayes import NaiveBayes from pymining.classifier.lda import Lda if __name__ == "__main__": config = Configuration.FromFile("conf/test.xml") GlobalInfo.Init(config, "__global__") txt2mat = Text2Matrix(config, "__matrix__") [trainx, trainy] = txt2mat.CreateTrainMatrix("data/cluster.200") chiFilter = ChiSquareFilter(config, "__filter__") chiFilter.TrainFilter(trainx, trainy) [trainx, trainy] = chiFilter.MatrixFilter(trainx, trainy) lda = Lda(config, "lda") lda.Train(trainx, trainy, 10)
#import scipy comp from scipy import * #import pymining module from pymining.math.pca import Pca from pymining.math.matrix import Matrix from pymining.math.text2matrix import Text2Matrix from pymining.preprocessor.chisquare_filter import ChiSquareFilter from pymining.common.global_info import GlobalInfo from pymining.common.configuration import Configuration if __name__ == "__main__": #get pca config = Configuration.FromFile("conf/test.xml") GlobalInfo.Init(config, "__global__", False) txt2mat = Text2Matrix(config, "__matrix__", False) [trainx, trainy] = txt2mat.CreateTrainMatrix("data/train.txt") chiFilter = ChiSquareFilter(config, "__filter__") chiFilter.TrainFilter(trainx, trainy) [trainx, trainy] = chiFilter.MatrixFilter(trainx, trainy) pca = Pca() pca.TrainPrinComp(trainx, 2, True, False) prinCompX = pca.GetPrinComp(trainx, "col") #set colors colors = [] for y in trainy: if (y == 0):