Example #1
0
    download(url, dst)
    return dst


install_jar('text-classification-svm-1.0.2.jar', PROJECT_PATH,
            'http://file.hankcs.com/bin/text-classification-svm-1.0.2.jar')
install_jar('liblinear-1.95.jar', PROJECT_PATH,
            'http://file.hankcs.com/bin/liblinear-1.95.jar')
##########################################################################################

# 载入分类器
LinearSVMClassifier = SafeJClass(
    'com.hankcs.hanlp.classification.classifiers.LinearSVMClassifier')
# 保存模型的工具
IOUtil = SafeJClass('com.hankcs.hanlp.corpus.io.IOUtil')

# 载入分词器
BigramTokenizer = JClass(
    'com.hankcs.hanlp.classification.tokenizers.BigramTokenizer')

##########################################################################################

if __name__ == '__main__':
    divisionTrainData(TRAIN_DATA_PATH, CLASSIFICATION_DATA_PATH)
    classifier = LinearSVMClassifier()
    classifier.train(CLASSIFICATION_DATA_PATH)
    # 保存模型
    model = classifier.getmodel()
    IOUtil.saveObjectTo(model, os.path.join())
    print(classifier.classify("我去挂机了"))
Example #2
0
    download(url, dst)
    return dst


install_jar('text-classification-svm-1.0.2.jar', PROJECT_PATH,
            'http://file.hankcs.com/bin/text-classification-svm-1.0.2.jar')
install_jar('liblinear-1.95.jar', PROJECT_PATH,
            'http://file.hankcs.com/bin/liblinear-1.95.jar')
##########################################################################################

# 载入分类器
LinearSVMClassifier = SafeJClass(
    'com.hankcs.hanlp.classification.classifiers.LinearSVMClassifier')
# 保存模型的工具
IOUtil = SafeJClass('com.hankcs.hanlp.corpus.io.IOUtil')

# 载入分词器
BigramTokenizer = JClass(
    'com.hankcs.hanlp.classification.tokenizers.BigramTokenizer')

##########################################################################################

if __name__ == '__main__':
    divisionTrainData(TRAIN_DATA_PATH, CLASSIFICATION_DATA_PATH)
    classifier = LinearSVMClassifier()
    classifier.train(CLASSIFICATION_DATA_PATH)
    # 保存模型
    model = classifier.getmodel()
    IOUtil.saveObjectTo(model, model_path)
    print(classifier.classify("我去挂机了"))
Example #3
0
                elif ((text[2] == "0")):
                    TN += 1
    else:
        print("range value is false")
        fi.close()
        return
    print("TP:", TP, "\tFP:", FP, "\nFN:", FN, "\tTN:", TN)
    P = TP / (TP + FP)
    R = TP / (TP + FN)
    SCORE = 4 * P * R / (P + 3 * R)
    print("TP:", TP, "\tFP:", FP, "\nFN:", FN, "\tTN:", TN)
    print("精确率:", P, "召回率", R, "F1:", SCORE)
    fi.close()


##########################################################################################

if __name__ == '__main__':
    dataPath = dataPreprocessing(TRAIN_DATA_PATH)
    print(dataPath)
    divisionTrainData(dataPath, CLASSIFICATION_DATA_PATH)
    # 使用前90%的数据进行训练,使用后10%数据进行验证
    training_corpus = FileDataSet().setTokenizer(BigramTokenizer()).load(
        CLASSIFICATION_DATA_PATH, "UTF-8", 0.9)
    classifier = LinearSVMClassifier()
    classifier.train(training_corpus)
    model = classifier.getModel()
    IOUtil.saveObjectTo(model, SAVE_MODEL_PATH)
    # 使用后10%的数据进行验证
    valuation(dataPath, -0.1, classifier)
Example #4
0
install_jar('text-classification-svm-1.0.2.jar', PROJECT_PATH,
            'http://file.hankcs.com/bin/text-classification-svm-1.0.2.jar')
install_jar('liblinear-1.95.jar', PROJECT_PATH,
            'http://file.hankcs.com/bin/liblinear-1.95.jar')
##########################################################################################

# 载入分词器
BigramTokenizer = JClass(
    'com.hankcs.hanlp.classification.tokenizers.BigramTokenizer')

# 载入分类器
LinearSVMClassifier = SafeJClass(
    'com.hankcs.hanlp.classification.classifiers.LinearSVMClassifier')

# 保存模型的工具
IOUtil = SafeJClass('com.hankcs.hanlp.corpus.io.IOUtil')

##########################################################################################

if __name__ == '__main__':
    divisionTrainData(TRAIN_DATA_PATH, CLASSIFICATION_DATA_PATH)
    classifier = LinearSVMClassifier()
    classifier.train(CLASSIFICATION_DATA_PATH)

    # 保存模型
    model = classifier.getModel()
    IOUtil.saveObjectTo(model, os.path.join(PROJECT_PATH, '.svm.ser'))

    print(classifier.classify("我去挂机了"))