def getClassifier(self): classifier = None class_path = os.path.join(self.output_dir, self.get_file_path(CLASS_FILE)) try: if os.path.isfile(class_path) and not self.retrain: log.info("Loading classifier from %s" % class_path) classifier = joblib.loads(class_path) else: classifier = OneVsRestClassifier(qda(store_covariances=True)) except Exception as e: log.error(e) exit() return classifier
def StratifiedShuffleSplit_cross_validate_func_QDA(X, y,partitioner) -> (np.array, np.array,np.array): runs = 4 accuracy_list=[] error_rate_list=[] QDA= np.empty([runs]) for i in range(runs): qda_results = cross_validate(qda(), X, y, scoring="accuracy", cv=partitioner) QDA[i] = np.mean(qda_results["test_score"]) error_rate_qda = 1-QDA[i] print("QDA[i]") print(QDA[i]) print("error_rate_qda") print(error_rate_qda) accuracy_list.append(QDA[i]) error_rate_list.append(error_rate_qda) plt.plot(error_rate_list) plt.show() plt.plot(accuracy_list) plt.show()
train_count=22 select_index=random.sample(range(x_k.shape[0]),train_count)#randomly select 20 indices # print(select_index) X=np.append(X,x_k[select_index,:],axis=0) y=np.append(y,y_k[select_index],axis=0) test_index=list(set(range(x_k.shape[0])).difference(set(select_index)))#selecting the complement indices X_test=np.append(X_test,x_k[test_index,:],axis=0) y_test=np.append(y_test,y_k[test_index],axis=0) print('X:\n{}\nshape:{}'.format(X,X.shape)) print('y:\n{}\nshape:{}'.format(y,y.shape)) print('X_test:\n{}\nshape:{}'.format(X_test,X_test.shape)) print('y_test:\n{}\nshape:{}'.format(y_test,y_test.shape)) clf=qda() clf.fit(X,y) #on TRAINING data train_prediction=clf.predict(X) train_score = clf.score(X,y) print('prediction on training set:\n{}'.format(train_prediction)) train_prediction = np.expand_dims(train_prediction,axis=1) y= np.expand_dims(y,axis=1) print('prediction - truth array for TEST data: \n{}'.format(np.hstack((train_prediction,y)))) print('score on training set: {}'.format(train_score)) #on TEST data
def qdaclustering(): feature0 = './curves/curves_classify_svd2_500' # _svd2.npy feature feature1 = './curves/curves_classify' # _svd.npy feature X = np.empty( [1, 2] ) # [sample, features], for now there are only two features available - svd2 and svd y = np.empty([1]) testlist_x = [] testlist_y = [] testlist2_x = [] testlist2_y = [] for fname in os.listdir(feature0): if ( (not fname.startswith('.')) and (('orig' in fname) or ('_momentum_' in fname)) ): # or ('_steptarget_' in fname) or ('_momentum_' in fname) or ('_fgsm_' in fname))): fnsection = fname.split('_') sample_y = nbtype(fname) sample_x1 = last_trans_rank(os.path.join( feature0, fname)) # first feature 'svd2' fname_x2 = fname.split('.')[0][:-1] + '.npy' if os.path.exists(os.path.join(feature1, fname_x2)): sample_x2 = last_trans_rank(os.path.join(feature1, fname_x2)) # print(X.shape) # print(np.array([[sample_x1, sample_x2]]).shape) X = np.append(X, np.array([[sample_x1, sample_x2]]), 0) # X = np.append(X, np.array([[sample_x1]]), 0) y = np.append(y, np.array([sample_y]), 0) if sample_y == 0: testlist_x.append(sample_x1) testlist_y.append(sample_x2) else: testlist2_x.append(sample_x1) testlist2_y.append(sample_x2) X = np.delete(X, 0, 0) y = np.delete(y, 0, 0) print("dimensions of data:") print(X.shape) print(y.shape) clf = qda() clf.fit(X, y) pred = clf.predict(X) print("Classifier Score:") print(clf.score(X, y)) print("Error:") print(sum(abs(pred - y))) print("Ordinary images trained:") print(len(testlist_x)) print("") print(len(testlist2_x)) plt.scatter(testlist_x, testlist_y, alpha=0.5) plt.scatter(testlist2_x, testlist2_y, color='r', alpha=0.5) plt.title('Example - 2D distribution of original and adversarial images') plt.xlabel('SVD2') plt.ylabel('SVD') plt.legend(['orig', 'adv'], prop={'size': 12}) plt.show()
def performance_evaluation(args, output_array, folds, label_list, best_parameter_pair): if args.method == 'SVM': temp_str = 'The best parameter for SVM is: cost = ' + str( best_parameter_pair['cost']) + ', gamma = ' + str( best_parameter_pair['gamma']) # print(temp_str.center(40, '+')) results = [] true_labels = [] predict_labels = [] predict_probability = [] for train, test in folds: x_train = output_array[train] x_test = output_array[test] y_train = label_list[train] y_test = label_list[test] classification = svm.SVC(C=2**best_parameter_pair['cost'], gamma=2**best_parameter_pair['gamma'], probability=True) classification.fit(x_train, y_train) y_test_predict = classification.predict(x_test) y_test_prob_predict = classification.predict_proba(x_test)[:, 1] result = evaluation(y_test, y_test_predict) results.append(result) true_labels.append(y_test) predict_labels.append(y_test_predict) predict_probability.append(y_test_prob_predict) plot_roc_curve(true_labels, predict_probability, args.result_dir) plot_pr_curve(true_labels, predict_probability, args.result_dir) final_result = np.array(results).mean(axis=0) result_print(final_result) elif args.method == 'LinearSVM': temp_str = 'The best parameter for Linear SVM is: cost = ' + str( best_parameter_pair['cost']) # print(temp_str.center(40, '+')) results = [] true_labels = [] predict_labels = [] predict_probability = [] for train, test in folds: x_train = output_array[train] x_test = output_array[test] y_train = label_list[train] y_test = label_list[test] classification = svm.SVC(C=2**best_parameter_pair['cost'], kernel="linear", probability=True) classification.fit(x_train, y_train) y_test_predict = classification.predict(x_test) y_test_prob_predict = classification.predict_proba(x_test)[:, 1] result = evaluation(y_test, y_test_predict) results.append(result) true_labels.append(y_test) predict_labels.append(y_test_predict) predict_probability.append(y_test_prob_predict) plot_roc_curve(true_labels, predict_probability, args.result_dir) plot_pr_curve(true_labels, predict_probability, args.result_dir) final_result = np.array(results).mean(axis=0) result_print(final_result) elif args.method == 'RF': temp_str = 'The best parameter for RF is: tree = ' + str( best_parameter_pair['tree']) # print(temp_str.center(40, '+')) results = [] true_labels = [] predict_labels = [] predict_probability = [] for train, test in folds: x_train = output_array[train] x_test = output_array[test] y_train = label_list[train] y_test = label_list[test] classification = RandomForestClassifier( random_state=42, n_estimators=best_parameter_pair['tree']) classification.fit(x_train, y_train) y_test_predict = classification.predict(x_test) y_test_prob_predict = classification.predict_proba(x_test)[:, 1] result = evaluation(y_test, y_test_predict) results.append(result) true_labels.append(y_test) predict_labels.append(y_test_predict) predict_probability.append(y_test_prob_predict) plot_roc_curve(true_labels, predict_probability, args.result_dir) plot_pr_curve(true_labels, predict_probability, args.result_dir) final_result = np.array(results).mean(axis=0) result_print(final_result) elif args.method == 'KNN': temp_str = 'The best parameter for KNN is: neighbors = ' + str( best_parameter_pair['ngb']) # print(temp_str.center(40, '+')) results = [] true_labels = [] predict_labels = [] predict_probability = [] for train, test in folds: x_train = output_array[train] x_test = output_array[test] y_train = label_list[train] y_test = label_list[test] classification = KNeighborsClassifier( n_neighbors=best_parameter_pair['ngb']) classification.fit(x_train, y_train) y_test_predict = classification.predict(x_test) y_test_prob_predict = classification.predict_proba(x_test)[:, 1] result = evaluation(y_test, y_test_predict) results.append(result) true_labels.append(y_test) predict_labels.append(y_test_predict) predict_probability.append(y_test_prob_predict) plot_roc_curve(true_labels, predict_probability, args.result_dir) plot_pr_curve(true_labels, predict_probability, args.result_dir) final_result = np.array(results).mean(axis=0) result_print(final_result) elif args.method == 'AdaBoost' or args.method == 'NB' or args.method == 'LDA' or args.method == 'QDA': results = [] true_labels = [] predict_labels = [] predict_probability = [] for train, test in folds: x_train = output_array[train] x_test = output_array[test] y_train = label_list[train] y_test = label_list[test] if args.method == 'AdaBoost': classification = AdaBoostClassifier() elif args.method == 'NB': classification = GaussianNB() elif args.method == 'LDA': classification = lda() elif args.method == 'QDA': classification = qda() classification.fit(x_train, y_train) y_test_predict = classification.predict(x_test) y_test_prob_predict = classification.predict_proba(x_test)[:, 1] result = evaluation(y_test, y_test_predict) results.append(result) true_labels.append(y_test) predict_labels.append(y_test_predict) predict_probability.append(y_test_prob_predict) plot_roc_curve(true_labels, predict_probability, args.result_dir) plot_pr_curve(true_labels, predict_probability, args.result_dir) final_result = np.array(results).mean(axis=0) result_print(final_result) all_predict = classification.predict(output_array) with open(args.result_dir + 'prediction result', 'w') as f: space = ' ' f.write('No.' + space + 'True Label' + space + 'Predict Label\n') for i in range(len(all_predict)): f.write( str(i) + space + str(label_list[i]) + space + str(all_predict[i])) f.write('\n')
from sklearn.datasets import fetch_mldata if __name__ == '__main__': from data.data_reader import get_training_data from data.data_combinator import get_full_combinations x_train, y_train, x_val, y_val = get_training_data(validation=True) x_train = get_full_combinations(x_train) x_val = get_full_combinations(x_val) LDA = lda() LDA.fit(x_train, y_train) LDA_prob = LDA.predict_proba(x_val) LDA_prob QDA = qda() QDA.fit(x_train, y_train) QDA_prob = QDA.predict_proba(x_val) QDA_prob GNB = GaussianNB() GNB.fit(x_train, y_train) GaussianNB_prob = GNB.predict_proba(x_val) GaussianNB_prob # alpha = 1.0 LOG = LogisticRegression() LOG.fit(x_train, y_train) # RIDGE = Ridge(alpha=alpha) # RIDGE.fit(x_train, y_train) # LASSO = Lasso(alpha=alpha)
tuning_param = [{ 'C': [0.01, 0.1, 1, 5, 10, 100], 'gamma': [0.01, 0.1, 1, 5, 10, 100] }] svm_fit = GridSearchCV(SVC(kernel='rbf'), tuning_param, cv=10) svm_fit.fit(data_x, data_y) svm_fit.best_params_ #{'C': 0.01, 'gamma': 0.1} #Fit the model using the parameters found svm_best_fit = SVC(kernel='rbf', C=0.01, gamma=0.1) svm_best_fit.fit(x_clas_train, y_clas_train) np.mean(svm_best_fit.predict(x_clas_cv) - y_clas_cv) #0.5833333333333334 #LDA lda_fit = lda() lda_fit.fit(x_clas_train, y_clas_train) np.mean(lda_fit.predict(x_clas_cv) - y_clas_cv) #0.20833333333333334 #QDA qda_fit = qda() qda_fit.fit(x_clas_train, y_clas_train) np.mean(qda_fit.predict(x_clas_cv) - y_clas_cv) #0.4583333333333333