Exemple #1
0
import sys, os
sys.path.append(os.path.join(os.getcwd(), '../'))

from pymining.math.matrix import Matrix
from pymining.math.text2matrix import Text2Matrix
from pymining.nlp.segmenter import Segmenter
from pymining.common.global_info import GlobalInfo
from pymining.common.configuration import Configuration
from pymining.preprocessor.chisquare_filter import ChiSquareFilter
from pymining.classifier.naive_bayes import NaiveBayes
from pymining.classifier.lda import Lda

if __name__ == "__main__":
    config = Configuration.FromFile("conf/test.xml")
    GlobalInfo.Init(config, "__global__")
    txt2mat = Text2Matrix(config, "__matrix__")
    [trainx, trainy] = txt2mat.CreateTrainMatrix("data/cluster.200")
    chiFilter = ChiSquareFilter(config, "__filter__")
    chiFilter.TrainFilter(trainx, trainy)
    [trainx, trainy] = chiFilter.MatrixFilter(trainx, trainy)

    lda = Lda(config, "lda")
    lda.Train(trainx, trainy, 10)

sys.path.append(os.path.join(os.getcwd(), '../'))

from pymining.math.matrix import Matrix
from pymining.math.text2matrix import Text2Matrix
from pymining.nlp.segmenter import Segmenter
from pymining.common.global_info import GlobalInfo
from pymining.common.configuration import Configuration 
from pymining.preprocessor.chisquare_filter import ChiSquareFilter
from pymining.classifier.twc_naive_bayes import TwcNaiveBayes

if __name__ == "__main__":
    config = Configuration.FromFile("conf/test.xml")
    GlobalInfo.Init(config, "__global__")
    txt2mat = Text2Matrix(config, "__matrix__")
    [trainx, trainy] = txt2mat.CreateTrainMatrix("data/train.txt")
    chiFilter = ChiSquareFilter(config, "__filter__")
import sys, os
sys.path.append(os.path.join(os.getcwd(), '../'))

import math
import pickle
import sys

from pymining.math.matrix import Matrix
from pymining.math.text2matrix import Text2Matrix
from pymining.nlp.segmenter import Segmenter
from pymining.common.global_info import GlobalInfo
from pymining.common.configuration import Configuration 
from pymining.preprocessor.chisquare_filter import ChiSquareFilter
from pymining.classifier.twc_naive_bayes import TwcNaiveBayes
this is a example shows train model using a corpus, and test a sample in str
"""
import sys, os

sys.path.append(os.path.join(os.getcwd(), '../'))

from pymining.math.matrix import Matrix
from pymining.math.text2matrix import Text2Matrix
from pymining.nlp.segmenter import Segmenter
from pymining.common.global_info import GlobalInfo
from pymining.common.configuration import Configuration
from pymining.preprocessor.chisquare_filter import ChiSquareFilter
from pymining.classifier.naive_bayes import NaiveBayes

if __name__ == "__main__":
    config = Configuration.FromFile("conf/test.xml")
    GlobalInfo.Init(config, "__global__")
    txt2mat = Text2Matrix(config, "__matrix__")
    [trainx, trainy] = txt2mat.CreateTrainMatrix("data/train.txt")
    chiFilter = ChiSquareFilter(config, "__filter__")
    chiFilter.TrainFilter(trainx, trainy)

    nbModel = NaiveBayes(config, "naive_bayes")
    nbModel.Train(trainx, trainy)

    inputStr = "仅售28元!原价698元的康迩福韩国美容美体中心的韩国特色美容套餐1份(紫莱花园店、时代奥城店2店通用):韩国特色面部SPA护理1次+韩国特色面部瘦脸加毛孔净化1次+韩国特色水"
    [cols, vals] = txt2mat.CreatePredictSample(inputStr.decode("utf-8"))
    [cols, vals] = chiFilter.SampleFilter(cols, vals)
    probTuple = nbModel.TestSample(cols, vals)
    print probTuple
"""
this is a example shows load saved model and test new samples
notice in GlobalInfo.Init and other initialize functions
the second parameter - LoadFromFile is true, means load trained result
"""
import sys, os
sys.path.append(os.path.join(os.getcwd(), '../'))

from pymining.math.matrix import Matrix
from pymining.math.text2matrix import Text2Matrix
from pymining.nlp.segmenter import Segmenter
from pymining.common.global_info import GlobalInfo
from pymining.common.configuration import Configuration
from pymining.preprocessor.chisquare_filter import ChiSquareFilter
from pymining.classifier.naive_bayes import NaiveBayes

if __name__ == "__main__":
    config = Configuration.FromFile("conf/test.xml")
    GlobalInfo.Init(config, "__global__", True)
    txt2mat = Text2Matrix(config, "__matrix__", True)
    chiFilter = ChiSquareFilter(config, "__filter__", True)

    nbModel = NaiveBayes(config, "naive_bayes", True)

    [testx, testy] = txt2mat.CreatePredictMatrix("data/test.txt")
    [testx, testy] = chiFilter.MatrixFilter(testx, testy)
    [resultY, precision] = nbModel.Test(testx, testy)

    print precision