fileHandler_t.seek(0) lines_t = fileHandler_t.readlines() fileHandler_t.close() fileHandler_tr = open('data\\data.txt') fileHandler_tr.seek(0) lines_tr = fileHandler_tr.readlines() fileHandler_tr.close() print "read finished" vector_array = [] train_array = [] flag = [] for line in lines_t: line = line.split('\t',1) # cut word thrid arrary of sentence should change to 1 in test set vector_array.append(vector_create(line[1], word_r, word_n)) pass for line in lines_tr: line = line.split('\t', 2) train_array.append(vector_create(line[2], word_r, word_n)) flag.append(line[1]) pass print "svm start" # clf = svm.SVC() clf = svm.LinearSVC(dual=False, C=10) clf.fit(train_array, flag) joblib.dump(clf, 'classifiter/svc_classifiter.pkl') # clf = joblib.load('classifiter.pkl') print "svm finished" print "predicting" fileHandler_out = open('dest_result/svc_result_14.csv', "w")
# print count # print 'radio:', float(is_rubbish)/count if(int(line[0])%5 == 0): list_test.append(line) else: list_train.append(line) pass train_array=[] test_array=[] flag = [] answer = [] result = [] word_r, word_n = fileUtil.read_dict() print 'set train vector' for list1 in list_train: train_array.append(vector_create(list1[2], word_r, word_n)) flag.append(list1[1]) pass print 'set test vector' for list2 in list_test: test_array.append(vector_create(list2[2], word_r, word_n)) answer.append(list2[1]) pass print "read finished" # svm clf = modle(train_array, flag) result = predict(clf, test_array) sum_rubbish = 0 sum_normal = 0 pre_rubbish = 0 pre_normal = 0