def test(texts, classes, models, nn_params, folds=4): ''' Check the performance on an SVM implementation, given a list of texts and their classes (negative/neutral/positive) Uses k-fold cross-validation (keeping in mind to divide the data appropriately, depending on the class) ''' classes = np.array(classes) texts = np.array(texts) wrongs = [] auc_sum = 0 for train, test in cross_validation.StratifiedKFold(classes, folds): texts_train = texts[train] classes_train = classes[train] texts_test = texts[test] classes_test = classes[test] n = Ensemble(texts_train, classes_train, nn_params, models) predictions = n.classify(texts_test) predictions[predictions<0] = 0 auc = calculate_auc(classes_test, predictions) print auc auc_sum += auc for i in range(len(texts_test)): if abs(classes_test[i] - predictions[i]) > 0.5: wrongs.append((classes_test[i], predictions[i], texts_test[i])) ''' import csv writer = open('wrongs.csv', 'w') for w in wrongs: writer.write('%s,%s,%s\n' % w) writer.close() ''' return auc_sum / folds
# #results[results<0] = 0 # #print calculate_auc(classes, results) # r1 = np.array(m1.classify(texts)) # print calculate_auc(classes, r1) # r2 = np.array(m2.classify(texts)) # print calculate_auc(classes, r2) # r = (1.2*r1 + 0.8*r2) / 2 # r[r>1] = 1 # r[r<0] = 0 # print calculate_auc(classes, r) #print TestSVM.test_model(texts, classes, models[-1]) #print TestSVM.test(texts, classes, models, nn_params) n = Ensemble(texts, classes, nn_params, models) end = time.time() # print "training time=" # print end-start start = time.time() # evaluate the classfier on verification dataset texts = [] inp = raw_input() while inp: texts.append(inp.decode('utf8')) inp = raw_input() results = n.classify(texts) print results
r[r>1] = 1 r[r<0] = 0 print calculate_auc(classes, r) #print TestSVM.test_model(texts, classes, models[-1]) #print TestSVM.test(texts, classes, models, nn_params) n = Ensemble(texts, classes, nn_params, models) texts = [] csvr = csv.reader(open('test.csv', 'rb'), delimiter=',', quotechar='"') csvr.next() for row in csvr: texts.append(row[1].decode('utf8')) results = n.classify(texts) results[results<0] = 0 results[results>1] = 1 writer = open('rez.csv', 'w') for r in results: writer.write('%s\n' % r) writer.close() ''' wrongs = [] for i in range(len(texts)): if abs(classes[i] - results[i]) > 0.5: