def runLDA(x,y, testX, testY, testfilename): (prob, mean, cov) = lda.trainLDA(x,y) resultfile=open(testfilename + "_result",'w') resultfile.write("'Probabilities','Means','Covariance','Confusion Matrix'\n") resultfile.write(str(prob)+"\n"+str(mean)+"\n"+str(cov)+"\n"+str(lda.getConfusionMatrix(prob, mean, cov,testX, testY))) resultfile.close() return
def kfolds_all_algos(k, x, y, isotest_x, isotest_y): k_groups = splitdata(k, x, y) #now we have the k groups, assign each one as test once and run tests! print "groups split" lda_train_results = [] lda_test_results = [] lda_iso_results = [] nb_train_results = [] nb_test_results = [] nb_iso_results = [] lr_train_results = [] lr_test_results = [] lr_iso_results = [] for i in xrange(k): print "K Fold number " + str(i) test = k_groups[i] train = [] train.append([]) #x train.append([]) #y for j in xrange(k): if(j != i): train[0].extend(k_groups[j][0]) train[1].extend(k_groups[j][1]) #Now we have test and training data... what shall we do? #train on LDA print "Training LDA..." (prob, mean, cov) = lda.trainLDA(copy.deepcopy(train[0]), copy.deepcopy(train[1])) #print str(prob) + "\t" + str(mean) + "\t" + str(cov) print "DONE training LDA." print "Training NB..." (py, theta) = naivebayes.trainNaiveBayesMN(copy.deepcopy(train[0]), copy.deepcopy(train[1])) #print str(py) + "\t" + str(theta) print "DONE training NB" print "Training Logistic Regression..." t_x = copy.deepcopy(train[0]) for i in xrange(len(t_x)): temp_row = [1] temp_row.extend(t_x[i]) t_x[i] = temp_row (wvector, scales) = logisticregression.trainLogisticReg(0.01, 0.00001, 100, t_x, train[1]) #print str(wvector) print "DONE training Logistic Regression.\n" #lr_model = linmod.LogisticRegression() #lr_model.fit(t_x, train[1]) #for model, name in ((lr_model, "LR"),): # tp, tn, fp, fn = 0, 0, 0, 0 # for i in xrange(0, len(t_x)): # val = model.predict(t_x[i]) # if (val == 1 and train[1][i] == 1): # tp += 1 # elif (val == 1 and train[1][i] == 0): # fp += 1 # elif (val == 0 and train[1][i] == 0): # tn += 1 # elif (val == 0 and train[1][i] == 1): # fn += 1 # print "%s - TP: %d, FP: %d, TN: %d, FN: %d" % (name, tp, fp, tn, fn) #get Prediction Errors on left out set lr_test_error = logisticregression.getConfusionMatrix(wvector,scales, copy.deepcopy(test[0]), copy.deepcopy(test[1])) lr_train_error = logisticregression.getConfusionMatrix(wvector,scales, copy.deepcopy(train[0]), copy.deepcopy(train[1])) lr_iso_error = logisticregression.getConfusionMatrix(wvector,scales, copy.deepcopy(isotest_x), copy.deepcopy(isotest_y)) lda_test_error = lda.getConfusionMatrix(prob, mean, cov, copy.deepcopy(test[0]), copy.deepcopy(test[1])) lda_train_error = lda.getConfusionMatrix(prob, mean, cov, copy.deepcopy(train[0]), copy.deepcopy(train[1])) lda_iso_error = lda.getConfusionMatrix(prob, mean, cov, copy.deepcopy(isotest_x), copy.deepcopy(isotest_y)) nb_test_error = naivebayes.getConfusionMatrixMN(py, theta, copy.deepcopy(test[0]), copy.deepcopy(test[1])) nb_train_error = naivebayes.getConfusionMatrixMN(py, theta, copy.deepcopy(train[0]), copy.deepcopy(train[1])) nb_iso_error = naivebayes.getConfusionMatrixMN(py, theta, copy.deepcopy(isotest_x), copy.deepcopy(isotest_y)) #add to sets the false positives (for now) lr_train_results.append(lr_train_error) lr_test_results.append(lr_test_error) lr_iso_results.append(lr_iso_error) lda_train_results.append(lda_train_error) lda_test_results.append(lda_test_error) lda_iso_results.append(lda_iso_error) nb_train_results.append(nb_train_error) nb_test_results.append(nb_test_error) nb_iso_results.append(nb_iso_error) #calc average training and test error for each algorithm avr_lda_train = averageconfusionmatrix(lda_train_results) avr_lda_test = averageconfusionmatrix(lda_test_results) avr_lda_iso = averageconfusionmatrix(lda_iso_results) avr_lr_train = averageconfusionmatrix(lr_train_results) avr_lr_test = averageconfusionmatrix(lr_test_results) avr_lr_iso = averageconfusionmatrix(lr_iso_results) avr_nb_train = averageconfusionmatrix(nb_train_results) avr_nb_test = averageconfusionmatrix(nb_test_results) avr_nb_iso = averageconfusionmatrix(nb_iso_results) return [avr_lr_train, avr_lr_test, avr_lr_iso, avr_lda_train, avr_lda_test, avr_lda_iso, avr_nb_train, avr_nb_test, avr_nb_iso]
def kfolds_all_algos(k, x, y, isotest_x, isotest_y): k_groups = splitdata(k, x, y) #now we have the k groups, assign each one as test once and run tests! print "groups split" lda_train_results = [] lda_test_results = [] lda_iso_results = [] nb_train_results = [] nb_test_results = [] nb_iso_results = [] lr_train_results = [] lr_test_results = [] lr_iso_results = [] for i in xrange(k): print "K Fold number " + str(i) test = k_groups[i] train = [] train.append([]) #x train.append([]) #y for j in xrange(k): if (j != i): train[0].extend(k_groups[j][0]) train[1].extend(k_groups[j][1]) #Now we have test and training data... what shall we do? #train on LDA print "Training LDA..." (prob, mean, cov) = lda.trainLDA(copy.deepcopy(train[0]), copy.deepcopy(train[1])) #print str(prob) + "\t" + str(mean) + "\t" + str(cov) print "DONE training LDA." print "Training NB..." (py, theta) = naivebayes.trainNaiveBayesMN(copy.deepcopy(train[0]), copy.deepcopy(train[1])) #print str(py) + "\t" + str(theta) print "DONE training NB" print "Training Logistic Regression..." t_x = copy.deepcopy(train[0]) for i in xrange(len(t_x)): temp_row = [1] temp_row.extend(t_x[i]) t_x[i] = temp_row (wvector, scales) = logisticregression.trainLogisticReg(0.01, 0.00001, 100, t_x, train[1]) #print str(wvector) print "DONE training Logistic Regression.\n" #lr_model = linmod.LogisticRegression() #lr_model.fit(t_x, train[1]) #for model, name in ((lr_model, "LR"),): # tp, tn, fp, fn = 0, 0, 0, 0 # for i in xrange(0, len(t_x)): # val = model.predict(t_x[i]) # if (val == 1 and train[1][i] == 1): # tp += 1 # elif (val == 1 and train[1][i] == 0): # fp += 1 # elif (val == 0 and train[1][i] == 0): # tn += 1 # elif (val == 0 and train[1][i] == 1): # fn += 1 # print "%s - TP: %d, FP: %d, TN: %d, FN: %d" % (name, tp, fp, tn, fn) #get Prediction Errors on left out set lr_test_error = logisticregression.getConfusionMatrix( wvector, scales, copy.deepcopy(test[0]), copy.deepcopy(test[1])) lr_train_error = logisticregression.getConfusionMatrix( wvector, scales, copy.deepcopy(train[0]), copy.deepcopy(train[1])) lr_iso_error = logisticregression.getConfusionMatrix( wvector, scales, copy.deepcopy(isotest_x), copy.deepcopy(isotest_y)) lda_test_error = lda.getConfusionMatrix(prob, mean, cov, copy.deepcopy(test[0]), copy.deepcopy(test[1])) lda_train_error = lda.getConfusionMatrix(prob, mean, cov, copy.deepcopy(train[0]), copy.deepcopy(train[1])) lda_iso_error = lda.getConfusionMatrix(prob, mean, cov, copy.deepcopy(isotest_x), copy.deepcopy(isotest_y)) nb_test_error = naivebayes.getConfusionMatrixMN( py, theta, copy.deepcopy(test[0]), copy.deepcopy(test[1])) nb_train_error = naivebayes.getConfusionMatrixMN( py, theta, copy.deepcopy(train[0]), copy.deepcopy(train[1])) nb_iso_error = naivebayes.getConfusionMatrixMN( py, theta, copy.deepcopy(isotest_x), copy.deepcopy(isotest_y)) #add to sets the false positives (for now) lr_train_results.append(lr_train_error) lr_test_results.append(lr_test_error) lr_iso_results.append(lr_iso_error) lda_train_results.append(lda_train_error) lda_test_results.append(lda_test_error) lda_iso_results.append(lda_iso_error) nb_train_results.append(nb_train_error) nb_test_results.append(nb_test_error) nb_iso_results.append(nb_iso_error) #calc average training and test error for each algorithm avr_lda_train = averageconfusionmatrix(lda_train_results) avr_lda_test = averageconfusionmatrix(lda_test_results) avr_lda_iso = averageconfusionmatrix(lda_iso_results) avr_lr_train = averageconfusionmatrix(lr_train_results) avr_lr_test = averageconfusionmatrix(lr_test_results) avr_lr_iso = averageconfusionmatrix(lr_iso_results) avr_nb_train = averageconfusionmatrix(nb_train_results) avr_nb_test = averageconfusionmatrix(nb_test_results) avr_nb_iso = averageconfusionmatrix(nb_iso_results) return [ avr_lr_train, avr_lr_test, avr_lr_iso, avr_lda_train, avr_lda_test, avr_lda_iso, avr_nb_train, avr_nb_test, avr_nb_iso ]