def decisionTree(trainx, trainy,testx,testy): treeLearn = mlpy.ClassTree(minsize=2) treeLearn.learn(trainx, trainy) treeVals = treeLearn.pred(testx) treeVals = correctVals(treeVals) print("The decision tree accuracy on the test set is: " + str(mlpy.accuracy(treeVals, testy))) trainVals = treeLearn.pred(trainx) treeVals = correctVals(trainVals) print("The decision tree accuracy on the training set is: " + str(mlpy.accuracy(trainVals, trainy)))
def test_gamma_C(x_arr, y_arr, itraining, K_chk, y_chk, get_ml, get_kernel=None): shp = (len(y_arr), len(x_arr)) pnl = pd.Panel(data=[np.zeros(shp), np.zeros(shp)], items=['error', 'accuracy'], major_axis=y_arr, minor_axis=x_arr) for g in x_arr: for c in y_arr: print "computing g=%r, c=%r" % (g, c) ml = get_ml(g, c, get_kernel) y_all = cls(ml, itraining, K_chk, y_chk) pnl['error'][g][c] = mlpy.error(y_chk, y_all[y_chk.index]) pnl['accuracy'][g][c] = mlpy.accuracy(y_chk, y_all[y_chk.index]) return pnl
#ACCURACY measure CLASSIFICATION import mlpy t = [3,2,3,3,3,1,1,1] p = [3,2,1,3,3,2,1,1] mlpy.error(t, p) mlpy.accuracy(t, p) #Sensitivity, Specitivity, AUC import mlpy t = [1, 1, 1,-1, 1,-1,-1,-1] p = [1,-1, 1, 1, 1,-1, 1,-1] mlpy.error_p(t, p) mlpy.error_n(t, p) mlpy.sensitivity(t, p) mlpy.specificity(t, p) mlpy.ppv(t, p) mlpy.npv(t, p) mlpy.mcc(t, p) p = [2.3,-0.4, 1.6, 0.6, 3.2,-4.9, 1.3,-0.3] mlpy.auc_wmw(t, p) p = [2.3,0.4, 1.6, -0.6, 3.2,-4.9, -1.3,-0.3] mlpy.auc_wmw(t, p) #Mean Squared Error REGRESSION import mlpy t = [2.4,0.4,1.2,-0.2,3.3,-4.9,-1.1,-0.1] p = [2.3,0.4,1.6,-0.6,3.2,-4.9,-1.3,-0.3] mlpy.mse(t, p)
test_y = train_y[:split_index] train_X = train_X[split_index:] train_y = train_y[split_index:] print strftime("%Y-%m-%d %H:%M:%S", gmtime()), ": Parsing complete!\n" else: print strftime("%Y-%m-%d %H:%M:%S", gmtime()), ": Parsing test data..." # Parsing test input data data_reader = csv.reader(open(input_test_file, 'rb'), delimiter = ",") data_reader.next() # Skip the first line, since it contains the labels test_X = [] for row in data_reader: line_x = [1] for i in range(len(row)): line_x.append(float(row[i])) test_X.append(line_x) print strftime("%Y-%m-%d %H:%M:%S", gmtime()), ": Finished parsing test data!" predicted_labels = Predict_svm(train_X, train_y, test_X) print strftime("%Y-%m-%d %H:%M:%S", gmtime()), ": Finished predicting!" if crossval == True: # Only useful for cross-validation, otherwise test_y is unknown and we get error=1/accuracy=0 print "Error: ", mlpy.error(test_y, predicted_labels) print "Accuracy: ", mlpy.accuracy(test_y, predicted_labels) else: print strftime("%Y-%m-%d %H:%M:%S", gmtime()), ": Writing predicted labels to file..." prediction_filename = "test_" + strftime("%Y-%m-%d_%H-%M-%S", gmtime()) + ".csv" prediction_writer = csv.writer(open(prediction_filename, 'wb')) for row in predicted_labels: list_row = [row] prediction_writer.writerow(list_row)
def mlpyKNN(k,trainx,trainy,testx,testy,typist): mlknn = mlpy.KNN(k) mlknn.learn(trainx, trainy) knnVals = mlknn.pred(testx) print("The mlpy KNN accuracy on the " + typist + " set for k = "+ str(k) +" is: " + str(mlpy.accuracy(knnVals, testy)))
testy = [] indx = i.pop() for c in indx[0]: trainx.append(x[c].split(",")) trainy.append(y[c]) for c in indx[1]: testx.append(x[c].split(",")) testy.append(y[c]) mlknn = mlpy.KNN(5) mlknn.learn(trainx, trainy) knnVals = mlknn.pred(testx) print("The mlpy KNN accuracy on fold " + str(m) + " set for k = 5 is: " + str(mlpy.accuracy(knnVals, testy))) KNNRESULTS.append(mlpy.accuracy(knnVals, testy)) treeLearn = mlpy.ClassTree(minsize=2) treeLearn.learn(trainx, trainy) treeVals = treeLearn.pred(testx) treeVals = correctVals(treeVals) print("The Decision Tree accuracy on on fold " + str(m) + " is: "+ str(mlpy.accuracy(treeVals, testy))) DTRESULTS.append(mlpy.accuracy(treeVals, testy)) svm = mlpy.LibLinear(solver_type="l2r_lr") svm.learn(trainx,trainy) svmVals = svm.pred(testx) print("The mlpy Logistic Regression accuracy on fold " + str(m) + " is: " + str(mlpy.accuracy(svmVals, testy))) LRRESULTS.append(mlpy.accuracy(svmVals, testy))
def make_model(self): if self.model_type not in [ "Combined", "comb", "Comb", "overall", "Overall", "o", "O" ]: training_fet = np.array([ select_fet2(i[0], i[1], self.model_type) for i in self.datasets.training() ]) testing_fet = np.array([ select_fet2(i[0], i[1], self.model_type) for i in self.datasets.testing() ]) self.logit1.learn(training_fet[:, :-1], training_fet[:, -1]) test_pred = [self.logit1.pred(i[:-1]) for i in testing_fet] test_vals = [i[-1] for i in testing_fet] cds_test_pred = [ self.logit1.pred(i[:-1]) for i in testing_fet if i[-1] == '1' ] cds_test_vals = [i[-1] for i in testing_fet if i[-1] == '1'] ncds_test_pred = [ self.logit1.pred(i[:-1]) for i in testing_fet if i[-1] == '0' ] ncds_test_vals = [i[-1] for i in testing_fet if i[-1] == '0'] print "=-" * 5 + "Accuracies" + "=-" * 5 print "Overall accuracy of predictor : %s" % mlpy.accuracy( test_vals, test_pred) print "Accuracy of predicting CDS correctly : %s" % mlpy.accuracy( cds_test_vals, cds_test_pred) print "Accuracy of predicting NCDS correctly : %s" % mlpy.accuracy( ncds_test_vals, ncds_test_pred) else: training_fet_1 = np.array([ select_fet2(i[0], i[1], "u") for i in self.datasets.training() ]) testing_fet_1 = np.array([ select_fet2(i[0], i[1], "u") for i in self.datasets.testing() ]) self.logit1.learn(training_fet_1[:, :-1], training_fet_1[:, -1]) training_fet_2 = np.array([ select_fet2(i[0], i[1], "c") for i in self.datasets.training() ]) testing_fet_2 = np.array([ select_fet2(i[0], i[1], "c") for i in self.datasets.testing() ]) self.logit2.learn(training_fet_2[:, :-1], training_fet_2[:, -1]) k = len(training_fet_1) j = len(testing_fet_1) comb_training = [[ self.logit1.pred_probability(training_fet_1[i][:-1])[1], self.logit2.pred_probability(training_fet_2[i][:-1])[1], training_fet_1[i][-1] ] for i in range(k)] # training file comb_training = np.array(comb_training, dtype=float) self.logit3.learn(comb_training[:, :-1], comb_training[:, -1]) comb_testing = [[ self.logit1.pred_probability(testing_fet_1[i][:-1])[1], self.logit2.pred_probability(testing_fet_2[i][:-1])[1], testing_fet_1[i][-1] ] for i in range(j)] # testing file comb_testing = np.array(comb_testing, dtype=float) test_pred = [self.logit3.pred(i[:-1]) for i in comb_testing] test_vals = [i[-1] for i in comb_testing] cds_test_pred = [ self.logit3.pred(i[:-1]) for i in comb_testing if i[-1] == 1 ] cds_test_vals = [i[-1] for i in comb_testing if i[-1] == 1] ncds_test_pred = [ self.logit3.pred(i[:-1]) for i in comb_testing if i[-1] == 0 ] ncds_test_vals = [i[-1] for i in comb_testing if i[-1] == 0] print "=-" * 5 + "Accuracies" + "=-" * 5 print "Overall accuracy of predictor : %s" % mlpy.accuracy( test_vals, test_pred) print "Accuracy of predicting CDS correctly : %s" % mlpy.accuracy( cds_test_vals, cds_test_pred) print "Accuracy of predicting NCDS correctly : %s" % mlpy.accuracy( ncds_test_vals, ncds_test_pred)