download(url, dst) return dst install_jar('text-classification-svm-1.0.2.jar', PROJECT_PATH, 'http://file.hankcs.com/bin/text-classification-svm-1.0.2.jar') install_jar('liblinear-1.95.jar', PROJECT_PATH, 'http://file.hankcs.com/bin/liblinear-1.95.jar') ########################################################################################## # 载入分类器 LinearSVMClassifier = SafeJClass( 'com.hankcs.hanlp.classification.classifiers.LinearSVMClassifier') # 保存模型的工具 IOUtil = SafeJClass('com.hankcs.hanlp.corpus.io.IOUtil') # 载入分词器 BigramTokenizer = JClass( 'com.hankcs.hanlp.classification.tokenizers.BigramTokenizer') ########################################################################################## if __name__ == '__main__': divisionTrainData(TRAIN_DATA_PATH, CLASSIFICATION_DATA_PATH) classifier = LinearSVMClassifier() classifier.train(CLASSIFICATION_DATA_PATH) # 保存模型 model = classifier.getmodel() IOUtil.saveObjectTo(model, os.path.join()) print(classifier.classify("我去挂机了"))
download(url, dst) return dst install_jar('text-classification-svm-1.0.2.jar', PROJECT_PATH, 'http://file.hankcs.com/bin/text-classification-svm-1.0.2.jar') install_jar('liblinear-1.95.jar', PROJECT_PATH, 'http://file.hankcs.com/bin/liblinear-1.95.jar') ########################################################################################## # 载入分类器 LinearSVMClassifier = SafeJClass( 'com.hankcs.hanlp.classification.classifiers.LinearSVMClassifier') # 保存模型的工具 IOUtil = SafeJClass('com.hankcs.hanlp.corpus.io.IOUtil') # 载入分词器 BigramTokenizer = JClass( 'com.hankcs.hanlp.classification.tokenizers.BigramTokenizer') ########################################################################################## if __name__ == '__main__': divisionTrainData(TRAIN_DATA_PATH, CLASSIFICATION_DATA_PATH) classifier = LinearSVMClassifier() classifier.train(CLASSIFICATION_DATA_PATH) # 保存模型 model = classifier.getmodel() IOUtil.saveObjectTo(model, model_path) print(classifier.classify("我去挂机了"))
elif ((text[2] == "0")): TN += 1 else: print("range value is false") fi.close() return print("TP:", TP, "\tFP:", FP, "\nFN:", FN, "\tTN:", TN) P = TP / (TP + FP) R = TP / (TP + FN) SCORE = 4 * P * R / (P + 3 * R) print("TP:", TP, "\tFP:", FP, "\nFN:", FN, "\tTN:", TN) print("精确率:", P, "召回率", R, "F1:", SCORE) fi.close() ########################################################################################## if __name__ == '__main__': dataPath = dataPreprocessing(TRAIN_DATA_PATH) print(dataPath) divisionTrainData(dataPath, CLASSIFICATION_DATA_PATH) # 使用前90%的数据进行训练,使用后10%数据进行验证 training_corpus = FileDataSet().setTokenizer(BigramTokenizer()).load( CLASSIFICATION_DATA_PATH, "UTF-8", 0.9) classifier = LinearSVMClassifier() classifier.train(training_corpus) model = classifier.getModel() IOUtil.saveObjectTo(model, SAVE_MODEL_PATH) # 使用后10%的数据进行验证 valuation(dataPath, -0.1, classifier)
install_jar('text-classification-svm-1.0.2.jar', PROJECT_PATH, 'http://file.hankcs.com/bin/text-classification-svm-1.0.2.jar') install_jar('liblinear-1.95.jar', PROJECT_PATH, 'http://file.hankcs.com/bin/liblinear-1.95.jar') ########################################################################################## # 载入分词器 BigramTokenizer = JClass( 'com.hankcs.hanlp.classification.tokenizers.BigramTokenizer') # 载入分类器 LinearSVMClassifier = SafeJClass( 'com.hankcs.hanlp.classification.classifiers.LinearSVMClassifier') # 保存模型的工具 IOUtil = SafeJClass('com.hankcs.hanlp.corpus.io.IOUtil') ########################################################################################## if __name__ == '__main__': divisionTrainData(TRAIN_DATA_PATH, CLASSIFICATION_DATA_PATH) classifier = LinearSVMClassifier() classifier.train(CLASSIFICATION_DATA_PATH) # 保存模型 model = classifier.getModel() IOUtil.saveObjectTo(model, os.path.join(PROJECT_PATH, '.svm.ser')) print(classifier.classify("我去挂机了"))