def marginPerceptron_cvRate(train_matrix, train_actual_labels, rateRange, num_folds): av_accuracy_list = [] for rate in rateRange: print 'rate: ', rate accuracy_list = [] training_lab = copy.deepcopy(train_actual_labels) training_lab = training_lab.tolist() training_mat = copy.deepcopy(train_matrix) training_mat = training_mat.tolist() subset_size = len(training_mat) / num_folds for i in range(num_folds): # if i == num_folds - 1: temp_test_Matrix = training_mat[(i) * subset_size:] temp_train_Matrix = training_mat[0:][:i * subset_size] # temp_test_Labels = training_lab[(i) * subset_size:] temp_train_Labels = training_lab[0:][:i * subset_size] else: temp_test_Matrix = training_mat[i * subset_size:][:subset_size] #temp_train_Matrix =np.append(training_mat[:i * subset_size],training_mat[(i + 1) * subset_size:]) temp_train_Matrix = training_mat[:i * subset_size] + training_mat[ (i + 1) * subset_size:] # temp_test_Labels = training_lab[i * subset_size:][:subset_size] temp_train_Labels = training_lab[:i * subset_size] + training_lab[ (i + 1) * subset_size:] # Training w_train, numUpdates_simpleP = percepFunctions.marginPerceptron( temp_train_Matrix, temp_train_Labels, 'zeros', 1, rate) #print 'weight %s is %f' %(i,max(w_train)) predicted_labels = projectFunctions.prediction_Perceptron( temp_test_Matrix, temp_test_Labels, w_train) # Metric #test_accuracy, test_f1_score = projectFunctions.accuracyMETRIC(temp_test_Labels, predicted_labels) test_accuracy = projectFunctions.accuracyMETRIC( temp_test_Labels, predicted_labels) accuracy_list.append(test_accuracy) av_accuracy = np.mean(accuracy_list) av_accuracy_list.append(av_accuracy) print av_accuracy best_accuracy = max(av_accuracy_list) max_index = av_accuracy_list.index(best_accuracy) best_rate = rateRange[max_index] # line, = plt.plot(rateRange, av_accuracy_list) plt.xlabel('Learning Rate') plt.ylabel('Accuracy') plt.title('Accuracy v/s Learning Rate, for Simple perceptron') plt.grid(True) plt.show() return best_accuracy, best_rate, av_accuracy_list
def eliminateBADfeaturesWITHperceptron(finalFeatureCASE, labelCASE, thresholdBIAS): featureCASE = 'w' tempList = [] delLIST = [] train_matrix, train_labels, test_matrix, test_labels, eval_matrix, eval_labels = eliminateZEROfetures( featureCASE, labelCASE) for c in range(len(train_matrix[0])): temp_train_matrix = copy.deepcopy(train_matrix) temp_train_matrix = np.delete(temp_train_matrix, c, axis=1) w, dummy = percepFunctions.weightedPerceptron(temp_train_matrix, train_labels, 'zeros', 0.675) train_pred_labels = projectFunctions.prediction_Perceptron( temp_train_matrix, train_labels, w) acc_train = projectFunctions.accuracyMETRIC(train_labels, train_pred_labels) dummyP, dummyR, f1_train = projectFunctions.f1METRIC( train_labels, train_pred_labels) tempList.append([c, acc_train, f1_train]) #print 'c:',c,' acc:',acc_train,' f1:',f1_train columnSUM = np.sum(tempList, axis=0) meanF1 = columnSUM[2] / len(tempList) for c in range(len(train_matrix[0])): if tempList[c][2] < meanF1 + thresholdBIAS: delLIST.append(c) train_matrix = np.delete(train_matrix, delLIST, axis=1) test_matrix = np.delete(test_matrix, delLIST, axis=1) eval_matrix = np.delete(eval_matrix, delLIST, axis=1) # if finalFeatureCASE == '01': for i in range(len(train_matrix)): train_matrix[i][train_matrix[i] > 0] = 1 for i in range(len(test_matrix)): test_matrix[i][test_matrix[i] > 0] = 1 for i in range(len(eval_matrix)): eval_matrix[i][eval_matrix[i] > 0] = 1 elif finalFeatureCASE == 'normalize': colRangeLIST = train_matrix.max(axis=0) - train_matrix.min(axis=0) for i in range(len(train_matrix)): for j in range(len(train_matrix[0])): train_matrix[i][j] = train_matrix[i][j] / colRangeLIST[j] colRangeLIST = test_matrix.max(axis=0) - test_matrix.min(axis=0) colRangeLIST[colRangeLIST == 0] = 1 for i in range(len(test_matrix)): for j in range(len(test_matrix[0])): test_matrix[i][j] = (test_matrix[i][j]) / colRangeLIST[j] colRangeLIST = eval_matrix.max(axis=0) - eval_matrix.min(axis=0) colRangeLIST[colRangeLIST == 0] = 1 for i in range(len(eval_matrix)): for j in range(len(eval_matrix[0])): eval_matrix[i][j] = (eval_matrix[i][j]) / colRangeLIST[j] return train_matrix, train_labels, test_matrix, test_labels, eval_matrix, eval_labels
def algoFORepochs(numEpochs, trainMatrix, trainLabels, testMatrix, testLabels, algo_type, learningRate, initialWcondition, margin=None): #for info ARRAY infoList = [] temp_list = [] temp_list.append('shuffleTYPE') temp_list.append('num of Epochs') temp_list.append('acc_train') temp_list.append('f1_train') temp_list.append('acc_test') temp_list.append('f1_test') temp_list.append('weights') # infoList.append(temp_list) tempTrainMatrix = copy.deepcopy(trainMatrix) tempTrainLabels = copy.deepcopy(trainLabels) best_w = np.zeros(361) best_train_acc = 0 best_f1_train = 0 av_count = 1 #for noYesShuffle in range(2): total_numUpdates = 0 w = 0 for e_ind in range(numEpochs): #shuffling section start if e_ind == 0: shuffleTYPE = 'no shuffle' trainingExMat = trainMatrix trainExLabels = trainLabels else: shuffleTYPE = 'with shuffle' if e_ind == 0: trainingExMat = trainMatrix trainExLabels = trainLabels else: trainingExMat, trainExLabels = shuffle(tempTrainMatrix, tempTrainLabels, random_state=0) #trainingExMat, trainExLabels = shuffleFunction(tempTrainMatrix, tempTrainLabels) tempTrainMatrix = trainingExMat tempTrainLabels = trainExLabels # shuffling section end if e_ind == 0: weightCondition = initialWcondition else: weightCondition = w if algo_type == 'simple': w, numUpdates = simplePerceptron(trainingExMat, trainExLabels, weightCondition, learningRate) elif algo_type == 'margin': w, numUpdates = marginPerceptron(trainingExMat, trainExLabels, weightCondition, learningRate, margin) elif algo_type == 'aggressive': w, numUpdates = agrresiveMarginPerceptron(trainingExMat, trainExLabels, weightCondition, learningRate, margin) elif algo_type == 'weightedAggressive': w, numUpdates = WeightedAgrresiveMarginPerceptron( trainingExMat, trainExLabels, weightCondition, learningRate, margin) elif algo_type == 'weightedSimple': w, numUpdates = weightedPerceptron(trainingExMat, trainExLabels, weightCondition, learningRate) elif algo_type == 'winnow': w, numUpdates = winnowFunctions.simpleWinnow( trainingExMat, trainExLabels, 'ones', 2) total_numUpdates += numUpdates if algo_type == 'winnow': train_pred_labels = projectFunctions.prediction_Winnow( trainMatrix, trainLabels, w) acc_train = projectFunctions.accuracyMETRIC( trainLabels, train_pred_labels) test_pred_labels = projectFunctions.prediction_Winnow( testMatrix, testLabels, w) acc_test = projectFunctions.accuracyMETRIC(testLabels, test_pred_labels) dummyP, dummyR, f1_train = projectFunctions.f1METRIC( trainLabels, train_pred_labels) dummyP, dummyR, f1_test = projectFunctions.f1METRIC( testLabels, test_pred_labels) else: train_pred_labels = projectFunctions.prediction_Perceptron( trainMatrix, trainLabels, w) acc_train = projectFunctions.accuracyMETRIC( trainLabels, train_pred_labels) test_pred_labels = projectFunctions.prediction_Perceptron( testMatrix, testLabels, w) acc_test = projectFunctions.accuracyMETRIC(testLabels, test_pred_labels) dummyP, dummyR, f1_train = projectFunctions.f1METRIC( trainLabels, train_pred_labels) dummyP, dummyR, f1_test = projectFunctions.f1METRIC( testLabels, test_pred_labels) #for info ARRAY temp_list = [] temp_list.append(shuffleTYPE) temp_list.append(e_ind + 1) temp_list.append(acc_train) temp_list.append(f1_train) temp_list.append(acc_test) temp_list.append(f1_test) temp_list.append(w) # infoList.append(temp_list) ''' if acc_train >= best_train_acc: best_train_acc = acc_train w_ATbestTRAINacc = w train_acc_ATbestTRAINacc = acc_train test_acc_ATbestTRAINacc = acc_test eval_acc_ATbestTRAINacc = acc_eval train_f1_ATbestTRAINacc = f1_train test_f1_ATbestTRAINacc = f1_test eval_f1_ATbestTRAINacc = f1_eval ''' if f1_train >= best_f1_train: best_shuffle_type = shuffleTYPE best_epoch_index = e_ind best_acc_train = acc_train best_f1_train = f1_train best_acc_test = acc_test best_f1_test = f1_test best_w = w best_LIST = [ best_shuffle_type, best_epoch_index, best_acc_train, best_f1_train, best_acc_test, best_f1_test, best_w ] return best_LIST
#trainMatrix, testMatrix, evalMatrix = projectFunctions.featureTransformation(train_matrix, test_matrix, eval_matrix,1) #Simple-Weighted Perceptron w Epochs learningRate = 0.675 margin = 0 maxEpochs = 1 bestLIST = percepFunctions.algoFORepochs(maxEpochs, trainMatrix, train_actual_labels, testMatrix, test_actual_labels, evalMatrix, eval_actual_labels, 'simple', learningRate, margin) for i in range(1,7): print bestLIST[i] for i in range(8,14): print bestLIST[i] #Write leaderboard file solution_filename = raw_input('Enter x__x in ./solutions_log/x__x_solutions.csv: ') w_val = bestLIST[7] eval_pred_labels = projectFunctions.prediction_Perceptron(eval_matrix, eval_actual_labels, w_val) projectFunctions.write_solutions('perceptron',eval_pred_labels,'./solutions_log/solutions/'+solution_filename+'.solutions.csv') ''' filepath = './solutions_log/simple_50e_infolist.txt' thefile = open('./solutions_log/infoARRAY/'+solution_filename+'.infoARRAY.csv', 'w') for item in simple_weighted_info: thefile.write("%s\n" % item) thefile.close() ''' #end_time = timeit.default_timer() #Simple or Weighted Perceptron '''
def simplePerceptron_cvWeight(train_matrix, train_actual_labels, weightRange, num_folds): av_accuracy_list = [] for weight in weightRange: print 'weight: ', weight ''' ############### kf = KFold(n_splits=5) temp_train_Matrix = copy.deepcopy(train_matrix) temp_train_Labels = copy.deepcopy(train_actual_labels) count = 0 accuracy_list = [] for train_indices, test_indices in kf.split(temp_train_Matrix): temp_test_Matrix = [] temp_test_Labels = [] for test_indx in test_indices: count +=1 #print 'count: ', count temp_test_Matrix.append(temp_train_Matrix[test_indx-count]) temp_test_Labels.append(temp_train_Labels[test_indx-count]) np.delete(temp_train_Matrix, test_indx-count, 0) np.delete(temp_train_Labels, test_indx-count, 0) ################ ''' accuracy_list = [] training_lab = copy.deepcopy(train_actual_labels) training_lab = training_lab.tolist() training_mat = copy.deepcopy(train_matrix) training_mat = training_mat.tolist() subset_size = len(training_mat) / num_folds for i in range(num_folds): # if i == num_folds - 1: temp_test_Matrix = training_mat[(i) * subset_size:] temp_train_Matrix = training_mat[0:][:i * subset_size] # temp_test_Labels = training_lab[(i) * subset_size:] temp_train_Labels = training_lab[0:][:i * subset_size] continue temp_test_Matrix = training_mat[i * subset_size:][:subset_size] #temp_train_Matrix =np.append(training_mat[:i * subset_size],training_mat[(i + 1) * subset_size:]) temp_train_Matrix = training_mat[:i * subset_size] + training_mat[ (i + 1) * subset_size:] # temp_test_Labels = training_lab[i * subset_size:][:subset_size] temp_train_Labels = training_lab[:i * subset_size] + training_lab[ (i + 1) * subset_size:] # # Training w_train, numUpdates_simpleP = percepFunctions.simplePerceptron( temp_train_Matrix, temp_train_Labels, weight, 1) # Prediction on eval.anon predicted_labels = projectFunctions.prediction_Perceptron( temp_test_Matrix, temp_test_Labels, w_train) # Metric #test_accuracy, test_f1_score = projectFunctions.accuracyMETRIC(temp_test_Labels, predicted_labels) test_accuracy = projectFunctions.accuracyMETRIC( temp_test_Labels, predicted_labels) accuracy_list.append(test_accuracy) av_accuracy = np.mean(accuracy_list) av_accuracy_list.append(av_accuracy) print av_accuracy best_accuracy = max(av_accuracy_list) max_index = av_accuracy_list.index(best_accuracy) best_weight = weightRange[max_index] # line, = plt.plot(weightRange, av_accuracy_list) plt.xlabel('initialization weight') plt.ylabel('Accuracy') plt.title('Accuracy v/s Learning weight, for Simple perceptron') plt.grid(True) plt.show() return best_accuracy, weight, av_accuracy_list
def simplePerceptron_cvRate(train_matrix, train_actual_labels, rateRange, num_folds): av_accuracy_list = [] av_f1_list = [] for rate in rateRange: print 'rate: ', rate ''' ############### kf = KFold(n_splits=5) temp_train_Matrix = copy.deepcopy(train_matrix) temp_train_Labels = copy.deepcopy(train_actual_labels) count = 0 accuracy_list = [] for train_indices, test_indices in kf.split(temp_train_Matrix): temp_test_Matrix = [] temp_test_Labels = [] for test_indx in test_indices: count +=1 #print 'count: ', count temp_test_Matrix.append(temp_train_Matrix[test_indx-count]) temp_test_Labels.append(temp_train_Labels[test_indx-count]) np.delete(temp_train_Matrix, test_indx-count, 0) np.delete(temp_train_Labels, test_indx-count, 0) ################ ''' accuracy_list = [] f1_list = [] training_lab = copy.deepcopy(train_actual_labels) training_lab = training_lab.tolist() training_mat = copy.deepcopy(train_matrix) training_mat = training_mat.tolist() subset_size = len(training_mat) / num_folds for i in range(num_folds): # if i == num_folds - 1: temp_test_Matrix = training_mat[(i) * subset_size:] temp_train_Matrix = training_mat[0:][:i * subset_size] # temp_test_Labels = training_lab[(i) * subset_size:] temp_train_Labels = training_lab[0:][:i * subset_size] else: temp_test_Matrix = training_mat[i * subset_size:][:subset_size] #temp_train_Matrix =np.append(training_mat[:i * subset_size],training_mat[(i + 1) * subset_size:]) temp_train_Matrix = training_mat[:i * subset_size] + training_mat[ (i + 1) * subset_size:] # temp_test_Labels = training_lab[i * subset_size:][:subset_size] temp_train_Labels = training_lab[:i * subset_size] + training_lab[ (i + 1) * subset_size:] # Training w_train, numUpdates_simpleP = percepFunctions.weightedPerceptron( temp_train_Matrix, temp_train_Labels, 'zeros', rate) predicted_labels = projectFunctions.prediction_Perceptron( temp_test_Matrix, temp_test_Labels, w_train) # Metric #test_accuracy = projectFunctions.accuracyMETRIC(temp_test_Labels, predicted_labels) test_f1 = projectFunctions.f1METRIC(temp_test_Labels, predicted_labels) #accuracy_list.append(test_accuracy) f1_list.append(test_f1) #av_accuracy = np.mean(accuracy_list) av_f1 = np.mean(f1_list) #av_accuracy_list.append(av_accuracy) av_f1_list.append(av_f1) #print av_accuracy print av_f1 #best_accuracy = max(av_accuracy_list) best_f1 = max(av_f1_list) #max_index = av_accuracy_list.index(best_accuracy) max_index = av_f1_list.index(best_f1) best_rate = rateRange[max_index] # #line, = plt.plot(rateRange, av_accuracy_list) line, = plt.plot(rateRange, av_f1_list) plt.xlabel('Learning Rate') #plt.ylabel('Accuracy') #plt.title('Accuracy v/s Learning Rate, for Simple perceptron') plt.ylabel('F1 score') plt.title('F1 score v/s Learning Rate, for Simple Weighted perceptron') plt.grid(True) plt.show() #return best_accuracy, best_rate, av_accuracy_list return best_f1, best_rate, av_f1_list
bestEpochs = epochsCV ; bestGamma = gamma_; bestC = C_; bestAccuracy = accuracy wCV = svmFunctions.SVM(train_matrix, train_labels, epochsCV, bestC, bestGamma) pred_train_list = svmFunctions.prediction(train_matrix, train_labels, wCV) pred_test_list = svmFunctions.prediction(test_matrix, test_labels, wCV) acc_train = svmFunctions.accuracyMETRIC(train_labels, pred_train_list) acc_test = svmFunctions.accuracyMETRIC(test_labels, pred_test_list) dummyP, dummyR, f1_train = projectFunctions.f1METRIC(train_labels, pred_train_list) dummyP, dummyR, f1_test = projectFunctions.f1METRIC(test_labels, pred_test_list) print 'Best gamma(0) : ', bestGamma,' Best C: ', bestC,' Best epochs: ', bestEpochs,' Train Acc:', acc_train,' Train f1:', f1_train,' Test Acc', acc_test,'% Test f1:', f1_test endTime = timeit.default_timer(); print 'Total run time: ', endTime - startTime,'secs' ''' print 'Final Support Vector Machines:' epochsFINAL = 20 CFINAL = 11000 gammaFINAL = 1e-06 wFINAL = svmFunctions.SVM(train_matrix, train_labels, epochsFINAL, CFINAL, gammaFINAL) pred_train_list = projectFunctions.prediction_Perceptron( train_matrix, train_labels, wFINAL) pred_test_list = projectFunctions.prediction_Perceptron( test_matrix, test_labels, wFINAL) acc_train = projectFunctions.accuracyMETRIC(train_labels, pred_train_list) acc_test = projectFunctions.accuracyMETRIC(test_labels, pred_test_list) dummyP, dummyR, f1_train = projectFunctions.f1METRIC(train_labels, pred_train_list) dummyP, dummyR, f1_test = projectFunctions.f1METRIC(test_labels, pred_test_list) print '\tBest gamma(0) : ', gammaFINAL, ' Best C: ', CFINAL, ' Train Acc:', acc_train, ' Train f1:', f1_train, ' Test Acc', acc_test, '% Test f1:', f1_test 1e-06