Esempio n. 1
0
"""
this is a example shows load saved model and test new samples
notice in GlobalInfo.Init and other initialize functions
the second parameter - LoadFromFile is true, means load trained result
"""
import sys, os
sys.path.append(os.path.join(os.getcwd(), '../'))

from pymining.math.matrix import Matrix
from pymining.math.text2matrix import Text2Matrix
from pymining.nlp.segmenter import Segmenter
from pymining.common.global_info import GlobalInfo
from pymining.common.configuration import Configuration
from pymining.preprocessor.chisquare_filter import ChiSquareFilter
from pymining.classifier.naive_bayes import NaiveBayes

if __name__ == "__main__":
    config = Configuration.FromFile("conf/test.xml")
    GlobalInfo.Init(config, "__global__", True)
    txt2mat = Text2Matrix(config, "__matrix__", True)
    chiFilter = ChiSquareFilter(config, "__filter__", True)

    nbModel = NaiveBayes(config, "naive_bayes", True)

    [testx, testy] = txt2mat.CreatePredictMatrix("data/test.txt")
    [testx, testy] = chiFilter.MatrixFilter(testx, testy)
    [resultY, precision] = nbModel.Test(testx, testy)

    print precision
Esempio n. 2
0
import sys, os
sys.path.append(os.path.join(os.getcwd(), '../'))

from pymining.math.matrix import Matrix
from pymining.math.text2matrix import Text2Matrix
from pymining.nlp.segmenter import Segmenter
from pymining.common.global_info import GlobalInfo
from pymining.common.configuration import Configuration
from pymining.preprocessor.chisquare_filter import ChiSquareFilter
from pymining.classifier.naive_bayes import NaiveBayes
from pymining.classifier.lda import Lda

if __name__ == "__main__":
    config = Configuration.FromFile("conf/test.xml")
    GlobalInfo.Init(config, "__global__")
    txt2mat = Text2Matrix(config, "__matrix__")
    [trainx, trainy] = txt2mat.CreateTrainMatrix("data/cluster.200")
    chiFilter = ChiSquareFilter(config, "__filter__")
    chiFilter.TrainFilter(trainx, trainy)
    [trainx, trainy] = chiFilter.MatrixFilter(trainx, trainy)

    lda = Lda(config, "lda")
    lda.Train(trainx, trainy, 10)

Esempio n. 3
0
from pymining.preprocessor.data_format import *
import time
from time import clock as now

if __name__ == "__main__":
    config = Configuration.FromFile("conf/test.xml")
    #---------------------------------------------------------------------------------1.text data-----------------------------------------------------------------------------------
    ### C=100 kernel=RBF p=0.03
    ### Recall =  0.973913043478 Precision =  0.888888888889 Accuracy =  0.886666666667
    ### F(beta=1) =  0.604792440094 F(beta=2) =  0.738299274885 AUCb =  0.786956521739
    GlobalInfo.Init(config, "__global__")
    txt2mat = Text2Matrix(config, "__matrix__")
    [trainx, trainy] = txt2mat.CreateTrainMatrix("data/train.txt")
    chiFilter = ChiSquareFilter(config, "__filter__")
    chiFilter.TrainFilter(trainx, trainy)
    [trainx, trainy] = chiFilter.MatrixFilter(trainx, trainy)

    for i in range(0, trainx.nRow):
        if trainy[i] == 3:
            trainy[i] = 1
        else:
            trainy[i] = -1

    [testx, testy] = txt2mat.CreatePredictMatrix("data/test.txt")
    [testx, testy] = chiFilter.MatrixFilter(testx, testy)
    testx.nCol = trainx.nCol
    for i in range(0, testx.nRow):
        if testy[i] == 3:
            testy[i] = 1
        else:
            testy[i] = -1