def make_data_for_prepro(): accuracys = [] training_sett, train_set_labelts, validation_set, validation_set_labels = LoadData( 'labeled_images.mat', True, True) # training_set, train_set_labels, idst = LoadData('labeled_images.mat', True, False) # kknn_class = KNeighborsClassifier(weights='distance', n_neighbors=5) # logistic_regression_solver = sklearn.linear_model.LogisticRegression(penalty='l2', dual=False, tol=0.001, C=1.2, fit_intercept=True, # intercept_scaling=1, class_weight=None, random_state=None, solver='newton-cg', # max_iter=200, multi_class='ovr', verbose=0, warm_start=False, n_jobs=2) # svm_class = svm.SVC(kernel='rbf', C=50, shrinking = False,decision_function_shape='ovr', tol=0.001, max_iter=-1) standard_train_inputs = standard_data(training_sett) standard_valid_inputs = standard_data(validation_set) fixed_train_set = fix_pixels(training_sett) fixed_valid = fix_pixels(validation_set) # garbored_train_set = gabor_filter(training_sett) # garbored_valid_set = gabor_filter(validation_set) data_list = [(training_sett, validation_set), (standard_train_inputs, standard_valid_inputs), (fixed_train_set, fixed_valid) ] #,(garbored_train_set,garbored_valid_set)] for (t, v) in data_list: # accuracys.append(knn(t, train_set_labelts, v, validation_set_labels, False)) # accuracys.append(logistic_regression(t,train_set_labelts , v, validation_set_labels, False)) # accuracys.append(run_svm(t, train_set_labelts, v, validation_set_labels, False)) net_clf = net_class(t, train_set_labelts, v, validation_set_labels, False) net_preds = [] for in_data in v: net_preds.append(net_clf.activate(in_data)) accuracys.append(get_acc(net_preds, validation_set_labels, True)) print "done iter" create_csv(accuracys, 'barplot_pre_accuracy.csv') fig = plt.figure() ax = fig.add_subplot(111) barplot_preprocess(ax, accuracys)
def make_data_for_prepro(): accuracys = [] training_sett, train_set_labelts, validation_set, validation_set_labels = LoadData('labeled_images.mat', True, True) # training_set, train_set_labels, idst = LoadData('labeled_images.mat', True, False) # kknn_class = KNeighborsClassifier(weights='distance', n_neighbors=5) # logistic_regression_solver = sklearn.linear_model.LogisticRegression(penalty='l2', dual=False, tol=0.001, C=1.2, fit_intercept=True, # intercept_scaling=1, class_weight=None, random_state=None, solver='newton-cg', # max_iter=200, multi_class='ovr', verbose=0, warm_start=False, n_jobs=2) # svm_class = svm.SVC(kernel='rbf', C=50, shrinking = False,decision_function_shape='ovr', tol=0.001, max_iter=-1) standard_train_inputs = standard_data(training_sett) standard_valid_inputs = standard_data(validation_set) fixed_train_set = fix_pixels(training_sett) fixed_valid = fix_pixels(validation_set) # garbored_train_set = gabor_filter(training_sett) # garbored_valid_set = gabor_filter(validation_set) data_list = [(training_sett,validation_set), (standard_train_inputs, standard_valid_inputs), (fixed_train_set,fixed_valid)]#,(garbored_train_set,garbored_valid_set)] for (t,v) in data_list: # accuracys.append(knn(t, train_set_labelts, v, validation_set_labels, False)) # accuracys.append(logistic_regression(t,train_set_labelts , v, validation_set_labels, False)) # accuracys.append(run_svm(t, train_set_labelts, v, validation_set_labels, False)) net_clf = net_class(t, train_set_labelts, v, validation_set_labels, False) net_preds =[] for in_data in v: net_preds.append(net_clf.activate(in_data)) accuracys.append(get_acc(net_preds,validation_set_labels, True)) print"done iter" create_csv(accuracys,'barplot_pre_accuracy.csv') fig = plt.figure() ax = fig.add_subplot(111) barplot_preprocess(ax,accuracys)
def run_my_votin(training_set, train_set_labels, validation_set=None, validation_set_labels=None, train=True): from sklearn.ensemble import VotingClassifier from pybrain.datasets import ClassificationDataSet standard_valid_inputs = standard_data(validation_set) fixed_valid = fix_pixels(validation_set) equalize_and_standard_validation= standard_data(fixed_valid) if train: standard_train_inputs = standard_data(training_set) fixed_train_set = fix_pixels(training_set) equalize_and_standard = standard_data(fixed_train_set) kknn_class = KNeighborsClassifier(weights='distance', n_neighbors=11) # kknn_class.fit(standard_train_inputs, train_set_labels.ravel()) logistic_regression_solver = sklearn.linear_model.LogisticRegression(penalty='l2', dual=False, tol=0.01, C=1.0, fit_intercept=True, intercept_scaling=1, class_weight=None, random_state=None, solver='newton-cg', max_iter=200, multi_class='ovr', verbose=0, warm_start=False, n_jobs=2) svm_class = svm.SVC(kernel='rbf', C=50, shrinking = False,decision_function_shape='ovr', tol=0.001, max_iter=-1) print"train knn" bg1 = run_bagging(fixed_train_set, train_set_labels, kknn_class, None, None, False) res_f = open('bg1knn.dump', 'w') pickle.dump(bg1,res_f ) res_f.close() print "Knn done" print"train Logistic Regression" bg2 = run_bagging(standard_train_inputs, train_set_labels, logistic_regression_solver, None, None, False) res_f = open('bg2lr.dump', 'w') pickle.dump(bg2,res_f ) res_f.close() print "done bg LR" print"train SVM" bg3 = run_bagging(equalize_and_standard, train_set_labels ,svm_class, None, None, False) res_f = open('bg3svm.dump', 'w') pickle.dump(bg3,res_f ) res_f.close() print "done bg svm" print"train Neural-Nets" net_clf = net_class(standard_train_inputs,train_set_labels, None, None, False) res_f = open('net.dump', 'w') pickle.dump(net_clf,res_f) res_f.close() print "nets done" else: print"Load knn" res_1 = open('bg1knn.dump', 'r') bg1 = pickle.load(res_1) res_1.close() print "knn done" print"Load LR" res_2 = open('bg2lr.dump', 'r') bg2 = pickle.load(res_2) res_2.close() print "LR done" print"Load SVM" res_3 = open('bg3svm.dump', 'r') bg3 = pickle.load(res_3) res_3.close() print "svm done" print"Load Neural-nets" res_4 = open('net.dump', 'r') net_clf = pickle.load(res_4) res_4.close() print "net done" preds_arr = [] pred_weights = [0.1, 0.26,0.34] net_weight = 0.30 preds_arr.append(bg1.predict_proba(fixed_valid)) preds_arr.append(bg2.predict_proba(standard_valid_inputs)) preds_arr.append(bg3.predict_proba(equalize_and_standard_validation)) net_preds =[] for in_data in standard_valid_inputs: net_preds.append(net_clf.activate(in_data)) # preds_arr.append(net_preds) fin_pred = [] for i in range(len(standard_valid_inputs)): tmp_np = np.zeros(7) for w ,pp in zip(pred_weights, preds_arr): tmp_np += pp[i] * w tmp_np += net_preds[i] * net_weight fin_pred.append(tmp_np) fin_labels = [(np.argmax(ar, axis=0)+1) for ar in fin_pred] create_csv(fin_labels,'test_csv.csv') if validation_set_labels: fin_acc, err = get_acc(fin_labels, validation_set_labels) print 'The final accuracy after bagging and votig is :', fin_acc fin_one_of_k = [] for c in fin_labels: carr = [int(i==c-1) for i in range(0,7)] fin_one_of_k.append(carr) return fin_one_of_k
def run_my_votin(training_set, train_set_labels, validation_set=None, validation_set_labels=None, train=True): from sklearn.ensemble import VotingClassifier from pybrain.datasets import ClassificationDataSet standard_valid_inputs = standard_data(validation_set) fixed_valid = fix_pixels(validation_set) equalize_and_standard_validation = standard_data(fixed_valid) if train: standard_train_inputs = standard_data(training_set) fixed_train_set = fix_pixels(training_set) equalize_and_standard = standard_data(fixed_train_set) kknn_class = KNeighborsClassifier(weights='distance', n_neighbors=11) # kknn_class.fit(standard_train_inputs, train_set_labels.ravel()) logistic_regression_solver = sklearn.linear_model.LogisticRegression( penalty='l2', dual=False, tol=0.01, C=1.0, fit_intercept=True, intercept_scaling=1, class_weight=None, random_state=None, solver='newton-cg', max_iter=200, multi_class='ovr', verbose=0, warm_start=False, n_jobs=2) svm_class = svm.SVC(kernel='rbf', C=50, shrinking=False, decision_function_shape='ovr', tol=0.001, max_iter=-1) print "train knn" bg1 = run_bagging(fixed_train_set, train_set_labels, kknn_class, None, None, False) res_f = open('bg1knn.dump', 'w') pickle.dump(bg1, res_f) res_f.close() print "Knn done" print "train Logistic Regression" bg2 = run_bagging(standard_train_inputs, train_set_labels, logistic_regression_solver, None, None, False) res_f = open('bg2lr.dump', 'w') pickle.dump(bg2, res_f) res_f.close() print "done bg LR" print "train SVM" bg3 = run_bagging(equalize_and_standard, train_set_labels, svm_class, None, None, False) res_f = open('bg3svm.dump', 'w') pickle.dump(bg3, res_f) res_f.close() print "done bg svm" print "train Neural-Nets" net_clf = net_class(standard_train_inputs, train_set_labels, None, None, False) res_f = open('net.dump', 'w') pickle.dump(net_clf, res_f) res_f.close() print "nets done" else: print "Load knn" res_1 = open('bg1knn.dump', 'r') bg1 = pickle.load(res_1) res_1.close() print "knn done" print "Load LR" res_2 = open('bg2lr.dump', 'r') bg2 = pickle.load(res_2) res_2.close() print "LR done" print "Load SVM" res_3 = open('bg3svm.dump', 'r') bg3 = pickle.load(res_3) res_3.close() print "svm done" print "Load Neural-nets" res_4 = open('net.dump', 'r') net_clf = pickle.load(res_4) res_4.close() print "net done" preds_arr = [] pred_weights = [0.1, 0.26, 0.34] net_weight = 0.30 preds_arr.append(bg1.predict_proba(fixed_valid)) preds_arr.append(bg2.predict_proba(standard_valid_inputs)) preds_arr.append(bg3.predict_proba(equalize_and_standard_validation)) net_preds = [] for in_data in standard_valid_inputs: net_preds.append(net_clf.activate(in_data)) # preds_arr.append(net_preds) fin_pred = [] for i in range(len(standard_valid_inputs)): tmp_np = np.zeros(7) for w, pp in zip(pred_weights, preds_arr): tmp_np += pp[i] * w tmp_np += net_preds[i] * net_weight fin_pred.append(tmp_np) fin_labels = [(np.argmax(ar, axis=0) + 1) for ar in fin_pred] create_csv(fin_labels, 'test_csv.csv') if validation_set_labels: fin_acc, err = get_acc(fin_labels, validation_set_labels) print 'The final accuracy after bagging and votig is :', fin_acc fin_one_of_k = [] for c in fin_labels: carr = [int(i == c - 1) for i in range(0, 7)] fin_one_of_k.append(carr) return fin_one_of_k