Example #1
0
    def getClassifier(self):
        classifier = None
        class_path = os.path.join(self.output_dir, self.get_file_path(CLASS_FILE))

        try:
            if os.path.isfile(class_path) and not self.retrain:
                log.info("Loading classifier from %s" % class_path)
                classifier = joblib.loads(class_path)
            else:
                classifier = OneVsRestClassifier(qda(store_covariances=True))
        except Exception as e:
            log.error(e)
            exit()

        return classifier
Example #2
0
    def getClassifier(self):
        classifier = None
        class_path = os.path.join(self.output_dir,
                                  self.get_file_path(CLASS_FILE))

        try:
            if os.path.isfile(class_path) and not self.retrain:
                log.info("Loading classifier from %s" % class_path)
                classifier = joblib.loads(class_path)
            else:
                classifier = OneVsRestClassifier(qda(store_covariances=True))
        except Exception as e:
            log.error(e)
            exit()

        return classifier
Example #3
0
def StratifiedShuffleSplit_cross_validate_func_QDA(X, y,partitioner) -> (np.array, np.array,np.array):    
    runs = 4
    accuracy_list=[]
    error_rate_list=[]
    QDA= np.empty([runs])
    for i in range(runs):        
        qda_results = cross_validate(qda(), X, y, scoring="accuracy", cv=partitioner)        
        QDA[i] = np.mean(qda_results["test_score"])
        error_rate_qda = 1-QDA[i] 
        print("QDA[i]")
        print(QDA[i])        
        print("error_rate_qda")
        print(error_rate_qda)
        accuracy_list.append(QDA[i])
        error_rate_list.append(error_rate_qda)
    plt.plot(error_rate_list)
    plt.show()
    plt.plot(accuracy_list)
    plt.show()
	train_count=22
	select_index=random.sample(range(x_k.shape[0]),train_count)#randomly select 20 indices
	# print(select_index)
	X=np.append(X,x_k[select_index,:],axis=0)
	y=np.append(y,y_k[select_index],axis=0)
	test_index=list(set(range(x_k.shape[0])).difference(set(select_index)))#selecting the complement indices
	X_test=np.append(X_test,x_k[test_index,:],axis=0)
	y_test=np.append(y_test,y_k[test_index],axis=0)

print('X:\n{}\nshape:{}'.format(X,X.shape))
print('y:\n{}\nshape:{}'.format(y,y.shape))

print('X_test:\n{}\nshape:{}'.format(X_test,X_test.shape))
print('y_test:\n{}\nshape:{}'.format(y_test,y_test.shape))

clf=qda()

clf.fit(X,y)


#on TRAINING data
train_prediction=clf.predict(X)
train_score = clf.score(X,y)

print('prediction on training set:\n{}'.format(train_prediction))
train_prediction = np.expand_dims(train_prediction,axis=1)
y= np.expand_dims(y,axis=1)
print('prediction - truth  array for TEST data: \n{}'.format(np.hstack((train_prediction,y))))
print('score on training set: {}'.format(train_score))

#on TEST data
def qdaclustering():
    feature0 = './curves/curves_classify_svd2_500'  # _svd2.npy feature
    feature1 = './curves/curves_classify'  # _svd.npy feature
    X = np.empty(
        [1, 2]
    )  # [sample, features], for now there are only two features available - svd2 and svd
    y = np.empty([1])
    testlist_x = []
    testlist_y = []
    testlist2_x = []
    testlist2_y = []

    for fname in os.listdir(feature0):
        if (
            (not fname.startswith('.'))
                and (('orig' in fname) or ('_momentum_' in fname))
        ):  # or ('_steptarget_' in fname) or ('_momentum_' in fname) or ('_fgsm_' in fname))):
            fnsection = fname.split('_')
            sample_y = nbtype(fname)
            sample_x1 = last_trans_rank(os.path.join(
                feature0, fname))  # first feature 'svd2'
            fname_x2 = fname.split('.')[0][:-1] + '.npy'

            if os.path.exists(os.path.join(feature1, fname_x2)):
                sample_x2 = last_trans_rank(os.path.join(feature1, fname_x2))
                # print(X.shape)
                # print(np.array([[sample_x1, sample_x2]]).shape)
                X = np.append(X, np.array([[sample_x1, sample_x2]]), 0)
                # X = np.append(X, np.array([[sample_x1]]), 0)
                y = np.append(y, np.array([sample_y]), 0)

                if sample_y == 0:
                    testlist_x.append(sample_x1)
                    testlist_y.append(sample_x2)
                else:
                    testlist2_x.append(sample_x1)
                    testlist2_y.append(sample_x2)

    X = np.delete(X, 0, 0)
    y = np.delete(y, 0, 0)
    print("dimensions of data:")
    print(X.shape)
    print(y.shape)

    clf = qda()
    clf.fit(X, y)

    pred = clf.predict(X)

    print("Classifier Score:")
    print(clf.score(X, y))
    print("Error:")
    print(sum(abs(pred - y)))
    print("Ordinary images trained:")
    print(len(testlist_x))
    print("")
    print(len(testlist2_x))

    plt.scatter(testlist_x, testlist_y, alpha=0.5)
    plt.scatter(testlist2_x, testlist2_y, color='r', alpha=0.5)
    plt.title('Example - 2D distribution of original and adversarial images')
    plt.xlabel('SVD2')
    plt.ylabel('SVD')
    plt.legend(['orig', 'adv'], prop={'size': 12})
    plt.show()
def performance_evaluation(args, output_array, folds, label_list,
                           best_parameter_pair):
    if args.method == 'SVM':
        temp_str = 'The best parameter for SVM is: cost = ' + str(
            best_parameter_pair['cost']) + ', gamma = ' + str(
                best_parameter_pair['gamma'])
        # print(temp_str.center(40, '+'))
        results = []
        true_labels = []
        predict_labels = []
        predict_probability = []
        for train, test in folds:
            x_train = output_array[train]
            x_test = output_array[test]
            y_train = label_list[train]
            y_test = label_list[test]
            classification = svm.SVC(C=2**best_parameter_pair['cost'],
                                     gamma=2**best_parameter_pair['gamma'],
                                     probability=True)
            classification.fit(x_train, y_train)
            y_test_predict = classification.predict(x_test)
            y_test_prob_predict = classification.predict_proba(x_test)[:, 1]
            result = evaluation(y_test, y_test_predict)
            results.append(result)
            true_labels.append(y_test)
            predict_labels.append(y_test_predict)
            predict_probability.append(y_test_prob_predict)
        plot_roc_curve(true_labels, predict_probability, args.result_dir)
        plot_pr_curve(true_labels, predict_probability, args.result_dir)
        final_result = np.array(results).mean(axis=0)
        result_print(final_result)

    elif args.method == 'LinearSVM':
        temp_str = 'The best parameter for Linear SVM is: cost = ' + str(
            best_parameter_pair['cost'])
        # print(temp_str.center(40, '+'))
        results = []
        true_labels = []
        predict_labels = []
        predict_probability = []
        for train, test in folds:
            x_train = output_array[train]
            x_test = output_array[test]
            y_train = label_list[train]
            y_test = label_list[test]
            classification = svm.SVC(C=2**best_parameter_pair['cost'],
                                     kernel="linear",
                                     probability=True)
            classification.fit(x_train, y_train)
            y_test_predict = classification.predict(x_test)
            y_test_prob_predict = classification.predict_proba(x_test)[:, 1]
            result = evaluation(y_test, y_test_predict)
            results.append(result)
            true_labels.append(y_test)
            predict_labels.append(y_test_predict)
            predict_probability.append(y_test_prob_predict)
        plot_roc_curve(true_labels, predict_probability, args.result_dir)
        plot_pr_curve(true_labels, predict_probability, args.result_dir)
        final_result = np.array(results).mean(axis=0)
        result_print(final_result)

    elif args.method == 'RF':
        temp_str = 'The best parameter for RF is: tree = ' + str(
            best_parameter_pair['tree'])
        # print(temp_str.center(40, '+'))
        results = []
        true_labels = []
        predict_labels = []
        predict_probability = []
        for train, test in folds:
            x_train = output_array[train]
            x_test = output_array[test]
            y_train = label_list[train]
            y_test = label_list[test]
            classification = RandomForestClassifier(
                random_state=42, n_estimators=best_parameter_pair['tree'])
            classification.fit(x_train, y_train)
            y_test_predict = classification.predict(x_test)
            y_test_prob_predict = classification.predict_proba(x_test)[:, 1]
            result = evaluation(y_test, y_test_predict)
            results.append(result)
            true_labels.append(y_test)
            predict_labels.append(y_test_predict)
            predict_probability.append(y_test_prob_predict)
        plot_roc_curve(true_labels, predict_probability, args.result_dir)
        plot_pr_curve(true_labels, predict_probability, args.result_dir)
        final_result = np.array(results).mean(axis=0)
        result_print(final_result)

    elif args.method == 'KNN':
        temp_str = 'The best parameter for KNN is: neighbors = ' + str(
            best_parameter_pair['ngb'])
        # print(temp_str.center(40, '+'))
        results = []
        true_labels = []
        predict_labels = []
        predict_probability = []
        for train, test in folds:
            x_train = output_array[train]
            x_test = output_array[test]
            y_train = label_list[train]
            y_test = label_list[test]
            classification = KNeighborsClassifier(
                n_neighbors=best_parameter_pair['ngb'])
            classification.fit(x_train, y_train)
            y_test_predict = classification.predict(x_test)
            y_test_prob_predict = classification.predict_proba(x_test)[:, 1]
            result = evaluation(y_test, y_test_predict)
            results.append(result)
            true_labels.append(y_test)
            predict_labels.append(y_test_predict)
            predict_probability.append(y_test_prob_predict)
        plot_roc_curve(true_labels, predict_probability, args.result_dir)
        plot_pr_curve(true_labels, predict_probability, args.result_dir)
        final_result = np.array(results).mean(axis=0)
        result_print(final_result)

    elif args.method == 'AdaBoost' or args.method == 'NB' or args.method == 'LDA' or args.method == 'QDA':
        results = []
        true_labels = []
        predict_labels = []
        predict_probability = []
        for train, test in folds:
            x_train = output_array[train]
            x_test = output_array[test]
            y_train = label_list[train]
            y_test = label_list[test]
            if args.method == 'AdaBoost':
                classification = AdaBoostClassifier()
            elif args.method == 'NB':
                classification = GaussianNB()
            elif args.method == 'LDA':
                classification = lda()
            elif args.method == 'QDA':
                classification = qda()
            classification.fit(x_train, y_train)
            y_test_predict = classification.predict(x_test)
            y_test_prob_predict = classification.predict_proba(x_test)[:, 1]
            result = evaluation(y_test, y_test_predict)
            results.append(result)
            true_labels.append(y_test)
            predict_labels.append(y_test_predict)
            predict_probability.append(y_test_prob_predict)
        plot_roc_curve(true_labels, predict_probability, args.result_dir)
        plot_pr_curve(true_labels, predict_probability, args.result_dir)
        final_result = np.array(results).mean(axis=0)
        result_print(final_result)

    all_predict = classification.predict(output_array)
    with open(args.result_dir + 'prediction result', 'w') as f:
        space = '          '
        f.write('No.' + space + 'True Label' + space + 'Predict Label\n')
        for i in range(len(all_predict)):
            f.write(
                str(i) + space + str(label_list[i]) + space +
                str(all_predict[i]))
            f.write('\n')
Example #7
0
from sklearn.datasets import fetch_mldata

if __name__ == '__main__':
    from data.data_reader import get_training_data
    from data.data_combinator import get_full_combinations

    x_train, y_train, x_val, y_val = get_training_data(validation=True)
    x_train = get_full_combinations(x_train)
    x_val = get_full_combinations(x_val)

    LDA = lda()
    LDA.fit(x_train, y_train)
    LDA_prob = LDA.predict_proba(x_val)
    LDA_prob

    QDA = qda()
    QDA.fit(x_train, y_train)
    QDA_prob = QDA.predict_proba(x_val)
    QDA_prob

    GNB = GaussianNB()
    GNB.fit(x_train, y_train)
    GaussianNB_prob = GNB.predict_proba(x_val)
    GaussianNB_prob

    # alpha = 1.0
    LOG = LogisticRegression()
    LOG.fit(x_train, y_train)
    # RIDGE = Ridge(alpha=alpha)
    # RIDGE.fit(x_train, y_train)
    # LASSO = Lasso(alpha=alpha)
Example #8
0
tuning_param = [{
    'C': [0.01, 0.1, 1, 5, 10, 100],
    'gamma': [0.01, 0.1, 1, 5, 10, 100]
}]

svm_fit = GridSearchCV(SVC(kernel='rbf'), tuning_param, cv=10)
svm_fit.fit(data_x, data_y)

svm_fit.best_params_
#{'C': 0.01, 'gamma': 0.1}

#Fit the model using the parameters found
svm_best_fit = SVC(kernel='rbf', C=0.01, gamma=0.1)
svm_best_fit.fit(x_clas_train, y_clas_train)
np.mean(svm_best_fit.predict(x_clas_cv) - y_clas_cv)
#0.5833333333333334

#LDA
lda_fit = lda()
lda_fit.fit(x_clas_train, y_clas_train)

np.mean(lda_fit.predict(x_clas_cv) - y_clas_cv)
#0.20833333333333334

#QDA
qda_fit = qda()
qda_fit.fit(x_clas_train, y_clas_train)

np.mean(qda_fit.predict(x_clas_cv) - y_clas_cv)
#0.4583333333333333