예제 #1
0
import func
##########引入写的函数############

#引入训练集和测试集(包括标签和句子)
train_label_list,train_corpus_list=func.part_jieba('data/train_set.txt')
test_label_list,test_corpus_list=func.part_jieba('data/test_set.txt')

#训练模型
train_tfidf,transformer,vectorizer=func.tfidf_cop(train_corpus_list)#计算训练集tfidf
classiy_model=func.model_train(train_tfidf,train_label_list) #训练模型并测试

#测试
test_tfidf=func.tfidf_cop2(test_corpus_list,transformer,vectorizer)#计算测试集tfidf
test1=test_tfidf[:1000]
func.model_test(test_tfidf,test_label_list,test1,classiy_model)
예제 #2
0
import func
##########引入写的函数############

train_label_list, train_corpus_list = func.part_jieba(
    'C:/Users/Tina/Desktop/samples/train_set.txt')
test_label_list, test_corpus_list = func.part_jieba(
    'C:/Users/Tina/Desktop/samples/test_set.txt')

corpus_list = test_corpus_list + train_corpus_list  #构造处理完的句子
label_list = test_label_list + train_label_list  #构造标签

length = len(test_label_list)  #便于计算完tfidf之后的划分

tfidf = func.tfidf_cop(corpus_list)  #计算tfidf

classiy_model = func.model_train(tfidf, label_list)  #训练模型并测试

test_tfidf = tfidf[:length]
test1 = tfidf[:1000]
func.model_test(test_tfidf, test_label_list, test1, classiy_model)