def eliminateBADfeaturesWITHperceptron(finalFeatureCASE, labelCASE,
                                       thresholdBIAS):
    featureCASE = 'w'
    tempList = []
    delLIST = []
    train_matrix, train_labels, test_matrix, test_labels, eval_matrix, eval_labels = eliminateZEROfetures(
        featureCASE, labelCASE)
    for c in range(len(train_matrix[0])):
        temp_train_matrix = copy.deepcopy(train_matrix)
        temp_train_matrix = np.delete(temp_train_matrix, c, axis=1)
        w, dummy = percepFunctions.weightedPerceptron(temp_train_matrix,
                                                      train_labels, 'zeros',
                                                      0.675)
        train_pred_labels = projectFunctions.prediction_Perceptron(
            temp_train_matrix, train_labels, w)
        acc_train = projectFunctions.accuracyMETRIC(train_labels,
                                                    train_pred_labels)
        dummyP, dummyR, f1_train = projectFunctions.f1METRIC(
            train_labels, train_pred_labels)
        tempList.append([c, acc_train, f1_train])
        #print 'c:',c,'    acc:',acc_train,'    f1:',f1_train
    columnSUM = np.sum(tempList, axis=0)
    meanF1 = columnSUM[2] / len(tempList)
    for c in range(len(train_matrix[0])):
        if tempList[c][2] < meanF1 + thresholdBIAS:
            delLIST.append(c)
    train_matrix = np.delete(train_matrix, delLIST, axis=1)
    test_matrix = np.delete(test_matrix, delLIST, axis=1)
    eval_matrix = np.delete(eval_matrix, delLIST, axis=1)
    #
    if finalFeatureCASE == '01':
        for i in range(len(train_matrix)):
            train_matrix[i][train_matrix[i] > 0] = 1
        for i in range(len(test_matrix)):
            test_matrix[i][test_matrix[i] > 0] = 1
        for i in range(len(eval_matrix)):
            eval_matrix[i][eval_matrix[i] > 0] = 1
    elif finalFeatureCASE == 'normalize':
        colRangeLIST = train_matrix.max(axis=0) - train_matrix.min(axis=0)
        for i in range(len(train_matrix)):
            for j in range(len(train_matrix[0])):
                train_matrix[i][j] = train_matrix[i][j] / colRangeLIST[j]
        colRangeLIST = test_matrix.max(axis=0) - test_matrix.min(axis=0)
        colRangeLIST[colRangeLIST == 0] = 1
        for i in range(len(test_matrix)):
            for j in range(len(test_matrix[0])):
                test_matrix[i][j] = (test_matrix[i][j]) / colRangeLIST[j]
        colRangeLIST = eval_matrix.max(axis=0) - eval_matrix.min(axis=0)
        colRangeLIST[colRangeLIST == 0] = 1
        for i in range(len(eval_matrix)):
            for j in range(len(eval_matrix[0])):
                eval_matrix[i][j] = (eval_matrix[i][j]) / colRangeLIST[j]
    return train_matrix, train_labels, test_matrix, test_labels, eval_matrix, eval_labels
Example #2
0
import numpy as np
import scipy
import operator
import featureTRANSFORM

train_matrix, train_labels, test_matrix, test_labels, eval_matrix, eval_labels = featureTRANSFORM.eliminateZEROfetures(
    '01', -1)

print '\n------------------ Winnow: ------------------'
w_train, numUpdates = winnowFunctions.simpleWinnow(train_matrix, train_labels,
                                                   'ones', 2)
train_pred_labels = projectFunctions.prediction_Winnow(train_matrix,
                                                       train_labels, w_train)
train_accuracy = projectFunctions.accuracyMETRIC(train_labels,
                                                 train_pred_labels)
dummy, fummy, train_f1_score = projectFunctions.f1METRIC(
    train_labels, train_pred_labels)
print 'Accuracy : train: ', train_accuracy
#Prediction on test
test_pred_labels = projectFunctions.prediction_Winnow(test_matrix, test_labels,
                                                      w_train)
test_accuracy = projectFunctions.accuracyMETRIC(test_labels, test_pred_labels)
test_f1_score = projectFunctions.f1METRIC(test_labels, test_pred_labels)
print 'Accuracy : test: ', test_accuracy
#Prediction on eval.anon
eval_pred_labels = projectFunctions.prediction_Winnow(eval_matrix, eval_labels,
                                                      w_train)
eval_accuracy = projectFunctions.accuracyMETRIC(eval_labels, eval_pred_labels)
eval_f1_score = projectFunctions.f1METRIC(eval_labels, eval_pred_labels)
print 'Accuracy : eval.anon: ', eval_accuracy
#Write leaderboard file
#projectFunctions.write_solutions(eval_pred_labels,"simpleWinnow.csv")
Example #3
0
import bewNeighborFunctions as kNN
import featureTRANSFORM
import projectFunctions as pF

train_matrix, train_labels, test_matrix, test_labels, eval_matrix, eval_labels = featureTRANSFORM.eliminateZEROfetures(
    '01', -1)
k = 3
p = 2
'''
pred_train_labels = kNN.kneighbors(train_matrix,train_labels,train_matrix,k,p)
train_accuracy = pF.accuracyMETRIC(train_labels, pred_train_labels)

dum,pum,train_f1 = pF.f1METRIC(train_labels,pred_train_labels)
print 'train:   Accuracy:',train_accuracy,'%    F1:',train_f1
'''
pred_test_labels = kNN.kneighbors(train_matrix, train_labels, test_matrix, k,
                                  p)
test_accuracy = pF.accuracyMETRIC(test_labels, pred_test_labels)
dum, pum, test_f1 = pF.f1METRIC(test_labels, pred_test_labels)
print 'test:   Accuracy:', test_accuracy, '%    F1:', test_f1

eval_pred_labels = kNN.kneighbors(train_matrix, train_labels, eval_matrix, k,
                                  p)
solution_filename = raw_input(
    'Enter x__x in ./solutions_log/x__x_solutions.csv: ')
pF.write_solutions('perceptron', eval_pred_labels,
                   './' + solution_filename + '.solutions.csv')
def algoFORepochs(numEpochs,
                  trainMatrix,
                  trainLabels,
                  testMatrix,
                  testLabels,
                  algo_type,
                  learningRate,
                  initialWcondition,
                  margin=None):
    #for info ARRAY

    infoList = []
    temp_list = []
    temp_list.append('shuffleTYPE')
    temp_list.append('num of Epochs')
    temp_list.append('acc_train')
    temp_list.append('f1_train')
    temp_list.append('acc_test')
    temp_list.append('f1_test')
    temp_list.append('weights')
    #
    infoList.append(temp_list)

    tempTrainMatrix = copy.deepcopy(trainMatrix)
    tempTrainLabels = copy.deepcopy(trainLabels)

    best_w = np.zeros(361)
    best_train_acc = 0
    best_f1_train = 0
    av_count = 1
    #for noYesShuffle in range(2):
    total_numUpdates = 0
    w = 0
    for e_ind in range(numEpochs):
        #shuffling section start
        if e_ind == 0:
            shuffleTYPE = 'no shuffle'
            trainingExMat = trainMatrix
            trainExLabels = trainLabels
        else:
            shuffleTYPE = 'with shuffle'
            if e_ind == 0:
                trainingExMat = trainMatrix
                trainExLabels = trainLabels
            else:
                trainingExMat, trainExLabels = shuffle(tempTrainMatrix,
                                                       tempTrainLabels,
                                                       random_state=0)
                #trainingExMat, trainExLabels = shuffleFunction(tempTrainMatrix, tempTrainLabels)
                tempTrainMatrix = trainingExMat
                tempTrainLabels = trainExLabels
        # shuffling section end
        if e_ind == 0:
            weightCondition = initialWcondition
        else:
            weightCondition = w
        if algo_type == 'simple':
            w, numUpdates = simplePerceptron(trainingExMat, trainExLabels,
                                             weightCondition, learningRate)
        elif algo_type == 'margin':
            w, numUpdates = marginPerceptron(trainingExMat, trainExLabels,
                                             weightCondition, learningRate,
                                             margin)
        elif algo_type == 'aggressive':
            w, numUpdates = agrresiveMarginPerceptron(trainingExMat,
                                                      trainExLabels,
                                                      weightCondition,
                                                      learningRate, margin)
        elif algo_type == 'weightedAggressive':
            w, numUpdates = WeightedAgrresiveMarginPerceptron(
                trainingExMat, trainExLabels, weightCondition, learningRate,
                margin)
        elif algo_type == 'weightedSimple':
            w, numUpdates = weightedPerceptron(trainingExMat, trainExLabels,
                                               weightCondition, learningRate)
        elif algo_type == 'winnow':
            w, numUpdates = winnowFunctions.simpleWinnow(
                trainingExMat, trainExLabels, 'ones', 2)
        total_numUpdates += numUpdates

        if algo_type == 'winnow':
            train_pred_labels = projectFunctions.prediction_Winnow(
                trainMatrix, trainLabels, w)
            acc_train = projectFunctions.accuracyMETRIC(
                trainLabels, train_pred_labels)
            test_pred_labels = projectFunctions.prediction_Winnow(
                testMatrix, testLabels, w)
            acc_test = projectFunctions.accuracyMETRIC(testLabels,
                                                       test_pred_labels)
            dummyP, dummyR, f1_train = projectFunctions.f1METRIC(
                trainLabels, train_pred_labels)
            dummyP, dummyR, f1_test = projectFunctions.f1METRIC(
                testLabels, test_pred_labels)
        else:
            train_pred_labels = projectFunctions.prediction_Perceptron(
                trainMatrix, trainLabels, w)
            acc_train = projectFunctions.accuracyMETRIC(
                trainLabels, train_pred_labels)
            test_pred_labels = projectFunctions.prediction_Perceptron(
                testMatrix, testLabels, w)
            acc_test = projectFunctions.accuracyMETRIC(testLabels,
                                                       test_pred_labels)
            dummyP, dummyR, f1_train = projectFunctions.f1METRIC(
                trainLabels, train_pred_labels)
            dummyP, dummyR, f1_test = projectFunctions.f1METRIC(
                testLabels, test_pred_labels)

        #for info ARRAY
        temp_list = []
        temp_list.append(shuffleTYPE)
        temp_list.append(e_ind + 1)
        temp_list.append(acc_train)
        temp_list.append(f1_train)
        temp_list.append(acc_test)
        temp_list.append(f1_test)
        temp_list.append(w)
        #
        infoList.append(temp_list)
        '''
        if acc_train >= best_train_acc:
            best_train_acc = acc_train
            w_ATbestTRAINacc = w
            train_acc_ATbestTRAINacc = acc_train
            test_acc_ATbestTRAINacc = acc_test
            eval_acc_ATbestTRAINacc = acc_eval
            train_f1_ATbestTRAINacc = f1_train
            test_f1_ATbestTRAINacc = f1_test
            eval_f1_ATbestTRAINacc = f1_eval
        '''
        if f1_train >= best_f1_train:
            best_shuffle_type = shuffleTYPE
            best_epoch_index = e_ind
            best_acc_train = acc_train
            best_f1_train = f1_train
            best_acc_test = acc_test
            best_f1_test = f1_test
            best_w = w

    best_LIST = [
        best_shuffle_type, best_epoch_index, best_acc_train, best_f1_train,
        best_acc_test, best_f1_test, best_w
    ]
    return best_LIST
max_depth = max(depth_list)

list_of_labels = list(Counter(train_actual_labels))
predicted_train_list = treeFunctions.predictFUNCTION(tree, train_matrix,
                                                     list_of_labels,
                                                     major_trainLABEL)
predicted_test_list = treeFunctions.predictFUNCTION(tree, test_matrix,
                                                    list_of_labels,
                                                    major_trainLABEL)
predicted_eval_list = treeFunctions.predictFUNCTION(tree, eval_matrix,
                                                    list_of_labels,
                                                    major_trainLABEL)
#
train_accuracy = projectFunctions.accuracyMETRIC(train_actual_labels,
                                                 predicted_train_list)
train_f1_score = projectFunctions.f1METRIC(train_actual_labels,
                                           predicted_train_list)
test_accuracy = projectFunctions.accuracyMETRIC(test_actual_labels,
                                                predicted_test_list)
test_f1_score = projectFunctions.f1METRIC(test_actual_labels,
                                          predicted_test_list)
eval_accuracy = projectFunctions.accuracyMETRIC(eval_actual_labels,
                                                predicted_eval_list)
eval_f1_score = projectFunctions.f1METRIC(eval_actual_labels,
                                          predicted_eval_list)

solution_filename = raw_input(
    'Enter x__x in ./solutions_log/x__x_solutions.csv: ')
projectFunctions.write_solutions(
    'decision tree', predicted_eval_list,
    './solutions_log/solutions/' + solution_filename + '.solutions.csv')
#np.save('trees.npy', tree)
Example #6
0
max_depth = max(depth_list)

list_of_labels = list(Counter(train_labels))
predicted_train_list = treeFunctions.predictFUNCTION(tree, train_matrix,
                                                     list_of_labels,
                                                     major_trainLABEL)
predicted_test_list = treeFunctions.predictFUNCTION(tree, test_matrix,
                                                    list_of_labels,
                                                    major_trainLABEL)
predicted_eval_list = treeFunctions.predictFUNCTION(tree, eval_matrix,
                                                    list_of_labels,
                                                    major_trainLABEL)
#
train_accuracy = projectFunctions.accuracyMETRIC(train_labels,
                                                 predicted_train_list)
dunn, gi, train_f1_score = projectFunctions.f1METRIC(train_labels,
                                                     predicted_train_list)
test_accuracy = projectFunctions.accuracyMETRIC(test_labels,
                                                predicted_test_list)
f, j, test_f1_score = projectFunctions.f1METRIC(test_labels,
                                                predicted_test_list)
print 'Train f1: '
print train_f1_score
print 'Train Accuracy: '
print train_accuracy
print 'Test f1: '
print test_f1_score
print 'Test Accuracy: '
print test_accuracy

stopTime = timeit.default_timer()
print 'time: ', stopTime - startTime
Example #7
0
def simplePerceptron_cvRate(train_matrix, train_actual_labels, rateRange,
                            num_folds):
    av_accuracy_list = []
    av_f1_list = []
    for rate in rateRange:
        print 'rate: ', rate
        '''
        ###############
        kf = KFold(n_splits=5)
        temp_train_Matrix = copy.deepcopy(train_matrix)
        temp_train_Labels = copy.deepcopy(train_actual_labels)
        count = 0
        accuracy_list = []
        for train_indices, test_indices in kf.split(temp_train_Matrix):
            temp_test_Matrix = []
            temp_test_Labels = []
            for test_indx in test_indices:
                count +=1
                #print 'count: ', count
                temp_test_Matrix.append(temp_train_Matrix[test_indx-count])
                temp_test_Labels.append(temp_train_Labels[test_indx-count])
                np.delete(temp_train_Matrix, test_indx-count, 0)
                np.delete(temp_train_Labels, test_indx-count, 0)

        ################
        '''
        accuracy_list = []
        f1_list = []
        training_lab = copy.deepcopy(train_actual_labels)
        training_lab = training_lab.tolist()
        training_mat = copy.deepcopy(train_matrix)
        training_mat = training_mat.tolist()
        subset_size = len(training_mat) / num_folds
        for i in range(num_folds):
            #
            if i == num_folds - 1:
                temp_test_Matrix = training_mat[(i) * subset_size:]
                temp_train_Matrix = training_mat[0:][:i * subset_size]
                #
                temp_test_Labels = training_lab[(i) * subset_size:]
                temp_train_Labels = training_lab[0:][:i * subset_size]
            else:
                temp_test_Matrix = training_mat[i * subset_size:][:subset_size]
                #temp_train_Matrix =np.append(training_mat[:i * subset_size],training_mat[(i + 1) * subset_size:])
                temp_train_Matrix = training_mat[:i *
                                                 subset_size] + training_mat[
                                                     (i + 1) * subset_size:]
                #
                temp_test_Labels = training_lab[i * subset_size:][:subset_size]
                temp_train_Labels = training_lab[:i *
                                                 subset_size] + training_lab[
                                                     (i + 1) * subset_size:]
            # Training
            w_train, numUpdates_simpleP = percepFunctions.weightedPerceptron(
                temp_train_Matrix, temp_train_Labels, 'zeros', rate)
            predicted_labels = projectFunctions.prediction_Perceptron(
                temp_test_Matrix, temp_test_Labels, w_train)
            # Metric
            #test_accuracy = projectFunctions.accuracyMETRIC(temp_test_Labels, predicted_labels)
            test_f1 = projectFunctions.f1METRIC(temp_test_Labels,
                                                predicted_labels)
            #accuracy_list.append(test_accuracy)
            f1_list.append(test_f1)
        #av_accuracy = np.mean(accuracy_list)
        av_f1 = np.mean(f1_list)
        #av_accuracy_list.append(av_accuracy)
        av_f1_list.append(av_f1)

        #print av_accuracy
        print av_f1
    #best_accuracy = max(av_accuracy_list)
    best_f1 = max(av_f1_list)
    #max_index = av_accuracy_list.index(best_accuracy)
    max_index = av_f1_list.index(best_f1)
    best_rate = rateRange[max_index]
    #
    #line, = plt.plot(rateRange, av_accuracy_list)
    line, = plt.plot(rateRange, av_f1_list)
    plt.xlabel('Learning Rate')
    #plt.ylabel('Accuracy')
    #plt.title('Accuracy v/s Learning Rate, for Simple perceptron')
    plt.ylabel('F1 score')
    plt.title('F1 score v/s Learning Rate, for Simple Weighted perceptron')
    plt.grid(True)
    plt.show()
    #return best_accuracy, best_rate, av_accuracy_list
    return best_f1, best_rate, av_f1_list
Example #8
0
        bestEpochs = epochsCV ; bestGamma = gamma_; bestC = C_; bestAccuracy = accuracy
wCV = svmFunctions.SVM(train_matrix, train_labels, epochsCV, bestC, bestGamma)
pred_train_list = svmFunctions.prediction(train_matrix, train_labels, wCV)
pred_test_list = svmFunctions.prediction(test_matrix, test_labels, wCV)
acc_train = svmFunctions.accuracyMETRIC(train_labels, pred_train_list)
acc_test = svmFunctions.accuracyMETRIC(test_labels, pred_test_list)
dummyP, dummyR, f1_train = projectFunctions.f1METRIC(train_labels, pred_train_list)
dummyP, dummyR, f1_test = projectFunctions.f1METRIC(test_labels, pred_test_list)
print 'Best gamma(0) : ', bestGamma,'    Best C: ', bestC,'    Best epochs: ', bestEpochs,'   Train Acc:', acc_train,'   Train f1:', f1_train,'    Test Acc', acc_test,'%   Test f1:', f1_test
endTime = timeit.default_timer(); print 'Total run time: ', endTime - startTime,'secs'
'''

print 'Final Support Vector Machines:'
epochsFINAL = 20
CFINAL = 11000
gammaFINAL = 1e-06
wFINAL = svmFunctions.SVM(train_matrix, train_labels, epochsFINAL, CFINAL,
                          gammaFINAL)
pred_train_list = projectFunctions.prediction_Perceptron(
    train_matrix, train_labels, wFINAL)
pred_test_list = projectFunctions.prediction_Perceptron(
    test_matrix, test_labels, wFINAL)
acc_train = projectFunctions.accuracyMETRIC(train_labels, pred_train_list)
acc_test = projectFunctions.accuracyMETRIC(test_labels, pred_test_list)
dummyP, dummyR, f1_train = projectFunctions.f1METRIC(train_labels,
                                                     pred_train_list)
dummyP, dummyR, f1_test = projectFunctions.f1METRIC(test_labels,
                                                    pred_test_list)
print '\tBest gamma(0) : ', gammaFINAL, '    Best C: ', CFINAL, '   Train Acc:', acc_train, '   Train f1:', f1_train, '    Test Acc', acc_test, '%   Test f1:', f1_test

1e-06