def test_predict_consistent(): """ Check binary predict decision has also predicted probability above 0.5. """ for kernel in kernels: gpc = GaussianProcessClassifier(kernel=kernel).fit(X, y) assert_array_equal(gpc.predict(X), gpc.predict_proba(X)[:, 1] >= 0.5)
def test_multi_class(kernel): # Test GPC for multi-class classification problems. gpc = GaussianProcessClassifier(kernel=kernel) gpc.fit(X, y_mc) y_prob = gpc.predict_proba(X2) assert_almost_equal(y_prob.sum(1), 1) y_pred = gpc.predict(X2) assert_array_equal(np.argmax(y_prob, 1), y_pred)
b_list.append(b_i) mol_frac_list = np.column_stack((a_list, b_list)) return mol_frac_list, a_list, b_list plotlist, a_frac, b_frac = mol_frac_gen(a_start=0.1, a_stop=0.8, b_start=0.1, b_stop=0.45, a_number=400, b_number=400) expected = y_test # correct ans predicted = classifier.predict(X_test) plotted = classifier.predict(plotlist) prediction_df_dict = {"predict": plotted, "A_RAW": a_frac, "B_RAW": b_frac} prediction_df = pd.DataFrame(prediction_df_dict) color_list = [ "lightblue", "lightcoral", "navajowhite", "plum", "lightpink", "violet", "lightgreen", "mediumseagreen", "lightyellow", "mediumpurple" ] test_color_list = ["blue", "red", "tan", "indigo", "deeppink"] plt.rc('font', family='serif') plt.rc('xtick', labelsize='medium')
# Specify Gaussian Processes with fixed and optimized hyperparameters gp_fix = GaussianProcessClassifier(kernel=1.0 * RBF(length_scale=1.0), optimizer=None) gp_fix.fit(X[:train_size], y[:train_size]) gp_opt = GaussianProcessClassifier(kernel=1.0 * RBF(length_scale=1.0)) gp_opt.fit(X[:train_size], y[:train_size]) print("Log Marginal Likelihood (initial): %.3f" % gp_fix.log_marginal_likelihood(gp_fix.kernel_.theta)) print("Log Marginal Likelihood (optimized): %.3f" % gp_opt.log_marginal_likelihood(gp_opt.kernel_.theta)) print("Accuracy: %.3f (initial) %.3f (optimized)" % (accuracy_score(y[:train_size], gp_fix.predict(X[:train_size])), accuracy_score(y[:train_size], gp_opt.predict(X[:train_size])))) print("Log-loss: %.3f (initial) %.3f (optimized)" % (log_loss(y[:train_size], gp_fix.predict_proba(X[:train_size])[:, 1]), log_loss(y[:train_size], gp_opt.predict_proba(X[:train_size])[:, 1]))) # Plot posteriors plt.figure() plt.scatter(X[:train_size, 0], y[:train_size], c='k', label="Train data", edgecolors=(0, 0, 0)) plt.scatter(X[train_size:, 0],
kernel = 1.0 * RBF(1.0) gpc = GaussianProcessClassifier(kernel=kernel, random_state=0) gpc.fit(X_train, Y_train) pkl_fileName = "Guassian_model.pkl" with open(pkl_fileName, 'wb') as file: pickle.dump(gpc, file) acc = gpc.score(X_train, Y_train) #acc=gpc.score(X_test,Y_test) from sklearn.metrics import precision_score from sklearn.metrics import recall_score gaussianNB_pred = gpc.predict(X_train) precision = precision_score(Y_train, gaussianNB_pred, average='binary') recall = recall_score(Y_train, gaussianNB_pred, average='binary') F1 = (2 * precision * recall) / (precision + recall) print("---------------ON TRAIN DATA-----------------") print(" Using Guassian Classifier\n precision=" + str(precision * 100) + "\nRecall=" + str(recall * 100)) print("Accuracy ==" + str(acc * 100)) print("F1 is=" + str(F1)) print("---------------ON TRAIN DATA--------------") ########################################### from sklearn.model_selection import KFold from sklearn.gaussian_process import GaussianProcessClassifier from sklearn.gaussian_process.kernels import RBF
def GPC2(X, Y, Z): Y = np.squeeze(Y) kernels = 1.0 * RBF(length_scale=1.0) clf = GaussianProcessClassifier(kernel=kernels, warm_start=True).fit(X, Y) p = clf.predict(Z) return p
def test_predict_consistent(kernel): # Check binary predict decision has also predicted probability above 0.5. gpc = GaussianProcessClassifier(kernel=kernel).fit(X, y) assert_array_equal(gpc.predict(X), gpc.predict_proba(X)[:, 1] >= 0.5)
XGBClassifier = XGBClassifier() XGBClassifier.fit(X, y) y_pred = XGBClassifier.predict(X_test) XGBClassifier_accy = round(accuracy_score(y_pred, y_test), 3) print(XGBClassifier_accy) from sklearn.ensemble import ExtraTreesClassifier ExtraTreesClassifier = ExtraTreesClassifier() ExtraTreesClassifier.fit(X, y) y_pred = ExtraTreesClassifier.predict(X_test) extraTree_accy = round(accuracy_score(y_pred, y_test), 3) print(extraTree_accy) from sklearn.gaussian_process import GaussianProcessClassifier GaussianProcessClassifier = GaussianProcessClassifier() GaussianProcessClassifier.fit(X, y) y_pred = GaussianProcessClassifier.predict(X_test) gau_pro_accy = round(accuracy_score(y_pred, y_test), 3) print(gau_pro_accy) from sklearn.ensemble import VotingClassifier voting_classifier = VotingClassifier(estimators=[ ('lr_grid', logreg_grid), ('svc', svm_grid), ('random_forest', rf_grid), ('gradient_boosting', gradient_boost), ('decision_tree_grid',dectree_grid), ('knn_classifier', knn_grid), ('XGB_Classifier', XGBClassifier), ('bagging_classifier', bagging_grid), ('adaBoost_classifier',adaBoost_grid), ('ExtraTrees_Classifier', ExtraTreesClassifier),
[190, 90, 47], [175, 64, 39], [177, 70, 40], [159, 55, 37], [171, 75, 42], [181, 85, 43]] Y = ['male', 'male', 'female', 'female', 'male', 'male', 'female', 'female', 'female', 'male', 'male'] # train them on our data clf = clf.fit(X, Y) clf1 = clf1.fit(X, Y) clf2 = clf2.fit(X, Y) clf3 =clf3.fit(X, Y) prediction = clf.predict([[190, 70, 43]]) prediction1 = clf1.predict([[120, 78, 49]]) prediction2 = clf2.predict([[130,89,33]]) prediction3 = clf3.predict([[150,99,55]]) print(accuracy_score(prediction,prediction3)); print('DecisionTreeClassifier') print(prediction) print('KNeighborsClassifier') print(prediction1) print('RandomForestClassifier') print(prediction2) print('GaussianProcessClassifier') print(prediction3)
y=df['category'] # In[11]: # gaussian process kernel=1.0*RBF(1.0) gpc=GaussianProcessClassifier(kernel=kernel,random_state=0).fit(x, y) gpc.score(x,y) # In[12]: x_predict=gpc.predict(x) # In[13]: x_predict_prob=gpc.predict_proba(x) # In[14]: # multi-class logistic classification mul_lr=linear_model.LogisticRegression(multi_class='multinomial',solver='newton-cg').fit(x,y) x_predict_log=mul_lr.predict(x)
# Nearest Neighbors pred_KNN = clf_KNN.predict(X) acc_KNN = accuracy_score(Y, pred_KNN) * 100 print('Accuracy for KNN: {}'.format(acc_KNN)) # In[9]: # RBF SVM pred_svm_RBF = clf_svm_RBF.predict(X) acc_svm_RBF = accuracy_score(Y, pred_svm_RBF) * 100 print('Accuracy for RBF SVM: {}'.format(acc_svm_RBF)) # In[10]: # Gaussian Process pred_GPC = clf_GPC.predict(X) acc_GPC = accuracy_score(Y, pred_GPC) * 100 print('Accuracy for GPC: {}'.format(acc_GPC)) # In[11]: # Random Forest pred_RFC = clf_RFC.predict(X) acc_RFC = accuracy_score(Y, pred_RFC) * 100 print('Accuracy for RFC: {}'.format(acc_RFC)) # In[12]: # Neural Net pred_NN = clf_NN.predict(X) acc_NN = accuracy_score(Y, pred_NN) * 100
# get the evaluation set a = process_eval_depression() x_eval = a[0] y_eval = a[1] # get the test set a = process_test_depression() x_test = a[0] y_test = a[1] print(x_test) import numpy as np from sklearn.gaussian_process import GaussianProcessClassifier gp = GaussianProcessClassifier() gp.fit(x_train, y_train) y_pred = gp.predict(x_eval) y_pred2 = gp.predict(x_test) print(gp.predict([[0, 3, 1, 1]])) from sklearn import metrics print("Precision for evaluation set:",metrics.precision_score(y_eval, y_pred)) print("Precision for test set:",metrics.precision_score(y_test, y_pred2)) print("recall for evaluation set:",metrics.recall_score(y_eval, y_pred)) print("recall for test set:",metrics.recall_score(y_test, y_pred2))
class WindowClassifier(): def __init__(self, X, Y): # X refers to the features, Y the labels self._X = X self._Y = Y # Test 40% Train 60% self.X_train, self.X_test, self.y_train, self.y_test = train_test_split( self._X, self._Y, test_size=0.4) def handleEvaluate(self, modelName, extraParameter=None): # X -> features to predict if modelName == "nn": self.knn_metrics = {} self._knn = neighbors.KNeighborsClassifier(extraParameter) self._knn.fit(self.X_train, self.y_train) self.knn(5) return self.knn_metrics['accuracy'] elif modelName == "svm": self.svm_metrics = {} self._svm = self._svm = svm.SVC(C=extraParameter) self._svm.fit(self.X_train, self.y_train) self.svm(5) return self.svm_metrics['accuracy'] elif modelName == "rf": self.rff_metrics = {} self._rff = RandomForestClassifier(extraParameter) self._rff.fit(self.X_train, self.y_train) self.random_forest(5) return self.rff_metrics['accuracy'] elif modelName == "lr": self.lr_metrics = {} self._lr = LogisticRegression() self._lr.fit(self.X_train, self.y_train) self.lr(5) return self.lr_metrics['accuracy'] elif modelName == "mlp": self.mlp_metrics = {} self._mlp = MLPClassifier() self._mlp.fit(self.X_train, self.y_train) self.mlp(5) return self.mlp_metrics['accuracy'] elif modelName == "gpc": self.gaupc_metrics = {} self._gaupc = GaussianProcessClassifier() self._gaupc.fit(self.X_train, self.y_train) self.gaupc(5) return self.gaupc_metrics['accuracy'] elif modelName == "dtc": self.detc_metrics = {} self._detc = DecisionTreeClassifier() self._detc.fit(self.X_train, self.y_train) self.detc(5) return self.detc_metrics['accuracy'] elif modelName == "ada": self.adab_metrics = {} self._adab = AdaBoostClassifier() self._adab.fit(self.X_train, self.y_train) self.adab(5) return self.adab_metrics['accuracy'] elif modelName == "gnb": self.ganb_metrics = {} self._ganb = GaussianNB() self._ganb.fit(self.X_train, self.y_train) self.ganb(5) return self.ganb_metrics['accuracy'] elif modelName == "qd": self.qud_metrics = {} self._qud = QuadraticDiscriminantAnalysis() self._qud.fit(self.X_train, self.y_train) self.qud(5) return self.qud_metrics['accuracy'] # call other methods here return None def handlePredict(self, X, modelName): # X -> features to predict print("WindowClassifier => handlePredict()") if modelName == "nn": return self.predict_knn(X) elif modelName == "svm": return self.predict_svm(X) elif modelName == "rf": return self.predict_forest(X) elif modelName == "mlp": return self.predict_mlp(X) elif modelName == "gpc": return self.predict_gaupc(X) elif modelName == "dtc": return self.predict_detc(X) elif modelName == "ada": return self.predict_adab(X) elif modelName == "gnb": return self.predict_ganb(X) elif modelName == "qd": return self.predict_qud(X) return None # Add new classifers here def logistic(self, CV, n_estimate): # Create KNN classifier clf = neighbors.KNeighborsClassifier(n_neighbors=n_estimate) # Train model with a specified cv cv_scores = cross_val_score(clf, self._X, self._Y, cv=CV, scoring='accuracy') cv_precision = cross_val_score(clf, self._X, self._Y, cv=CV, scoring='precision') cv_recall = cross_val_score(clf, self._X, self._Y, cv=CV, scoring='recall') values = { 'accuracy': np.mean(cv_scores), 'precision': np.mean(cv_precision), 'recall': np.mean(cv_recall) } return values def X(self): return self._X if self._X is not None else None def Y(self): return self._Y if self._Y is not None else None def Y_test(self): return self.y_test if self.y_test is not None else None '''Next few methods get the accuracy, until the webpage is developed we will redefine this again''' def knn_accuracy(self): if len(self.knn_metrics) != 0: return self.knn_metrics['accuracy'] raise ValueError('Knn has not been evaluated') def rff_accuracy(self): if len(self.rff_metrics) != 0: return self.knn_metrics['accuracy'] raise ValueError('Random Forest has not been evaluated') def svm_accuracy(self): if len(self.svm_metrics) != 0: return self.knn_metrics['accuracy'] raise ValueError('SVM has not been evaluated') '''All classifiers along with metrics in interest and other predictions''' def knn(self, CV): # Train model with a specified cv cv_scores = cross_val_score(self._knn, self._X, self._Y, cv=CV, scoring='accuracy') cv_precision = cross_val_score(self._knn, self._X, self._Y, cv=CV, scoring='precision') cv_recall = cross_val_score(self._knn, self._X, self._Y, cv=CV, scoring='recall') values = { 'accuracy': np.mean(cv_scores), 'precision': np.mean(cv_precision), 'recall': np.mean(cv_recall) } self.knn_metrics = values return values def predict_knn(self, matrix=None): # matrix -> features print("WindowClassifier => predictKNN()") # Make prediction with knn # print(matrix[0]) predX = self.X_test if matrix is None else matrix # print(predX.shape) predY = self._knn.predict(predX) # => [0, 1, 0, 1, 1, 1, 1] # print(predY) return predY def svm(self, CV): # Train model with a specified cv cv_scores = cross_val_score(self._svm, self._X, self._Y, cv=CV, scoring='accuracy') cv_precision = cross_val_score(self._svm, self._X, self._Y, cv=CV, scoring='precision') cv_recall = cross_val_score(self._svm, self._X, self._Y, cv=CV, scoring='recall') values = { 'accuracy': np.mean(cv_scores), 'precision': np.mean(cv_precision), 'recall': np.mean(cv_recall) } self.svm_metrics = values return values def predict_svm(self, matrix=None): # matrix -> features print("WindowClassifier => predictSVM()") # Make prediction with svm Y_pred = self._svm.predict(self.X_test if matrix is None else matrix) return Y_pred def random_forest(self, CV): # Train model with a specified cv cv_scores = cross_val_score(self._rff, self._X, self._Y, cv=CV, scoring='accuracy') cv_precision = cross_val_score(self._rff, self._X, self._Y, cv=CV, scoring='precision') cv_recall = cross_val_score(self._rff, self._X, self._Y, cv=CV, scoring='recall') values = { 'accuracy': np.mean(cv_scores), 'precision': np.mean(cv_precision), 'recall': np.mean(cv_recall) } self.rff_metrics = values return values def predict_forest(self, matrix=None): # matrix -> features # Make prediction with random forest Y_pred = self._rff.predict(self.X_test if matrix is None else matrix) return Y_pred def lr(self, CV): # Train model with a specified cv cv_scores = cross_val_score(self._lr, self._X, self._Y, cv=CV, scoring='accuracy') cv_precision = cross_val_score(self._lr, self._X, self._Y, cv=CV, scoring='precision') cv_recall = cross_val_score(self._lr, self._X, self._Y, cv=CV, scoring='recall') values = { 'accuracy': np.mean(cv_scores), 'precision': np.mean(cv_precision), 'recall': np.mean(cv_recall) } self.lr_metrics = values return values def mlp(self, CV): # Train model with a specified cv cv_scores = cross_val_score(self._mlp, self._X, self._Y, cv=CV, scoring='accuracy') cv_precision = cross_val_score(self._mlp, self._X, self._Y, cv=CV, scoring='precision') cv_recall = cross_val_score(self._mlp, self._X, self._Y, cv=CV, scoring='recall') values = { 'accuracy': np.mean(cv_scores), 'precision': np.mean(cv_precision), 'recall': np.mean(cv_recall) } self.mlp_metrics = values return values def predict_mlp(self, matrix=None): # matrix -> features print("WindowClassifier => predictMLP()") # Make prediction with svm Y_pred = self._mlp.predict(self.X_test if matrix is None else matrix) return Y_pred def gaupc(self, CV): # Train model with a specified cv cv_scores = cross_val_score(self._gaupc, self._X, self._Y, cv=CV, scoring='accuracy') cv_precision = cross_val_score(self._gaupc, self._X, self._Y, cv=CV, scoring='precision') cv_recall = cross_val_score(self._gaupc, self._X, self._Y, cv=CV, scoring='recall') values = { 'accuracy': np.mean(cv_scores), 'precision': np.mean(cv_precision), 'recall': np.mean(cv_recall) } self.gaupc_metrics = values return values def predict_gaupc(self, matrix=None): # matrix -> features print("WindowClassifier => predictGPC()") # Make prediction with svm Y_pred = self._gaupc.predict(self.X_test if matrix is None else matrix) return Y_pred def detc(self, CV): # Train model with a specified cv cv_scores = cross_val_score(self._detc, self._X, self._Y, cv=CV, scoring='accuracy') cv_precision = cross_val_score(self._detc, self._X, self._Y, cv=CV, scoring='precision') cv_recall = cross_val_score(self._detc, self._X, self._Y, cv=CV, scoring='recall') values = { 'accuracy': np.mean(cv_scores), 'precision': np.mean(cv_precision), 'recall': np.mean(cv_recall) } self.detc_metrics = values return values def predict_detc(self, matrix=None): # matrix -> features print("WindowClassifier => predictDT()") # Make prediction with svm Y_pred = self._detc.predict(self.X_test if matrix is None else matrix) return Y_pred def adab(self, CV): # Train model with a specified cv cv_scores = cross_val_score(self._adab, self._X, self._Y, cv=CV, scoring='accuracy') cv_precision = cross_val_score(self._adab, self._X, self._Y, cv=CV, scoring='precision') cv_recall = cross_val_score(self._adab, self._X, self._Y, cv=CV, scoring='recall') values = { 'accuracy': np.mean(cv_scores), 'precision': np.mean(cv_precision), 'recall': np.mean(cv_recall) } self.adab_metrics = values return values def predict_adab(self, matrix=None): # matrix -> features print("WindowClassifier => predictADA()") # Make prediction with svm Y_pred = self._adab.predict(self.X_test if matrix is None else matrix) return Y_pred def ganb(self, CV): # Train model with a specified cv cv_scores = cross_val_score(self._ganb, self._X, self._Y, cv=CV, scoring='accuracy') cv_precision = cross_val_score(self._ganb, self._X, self._Y, cv=CV, scoring='precision') cv_recall = cross_val_score(self._ganb, self._X, self._Y, cv=CV, scoring='recall') values = { 'accuracy': np.mean(cv_scores), 'precision': np.mean(cv_precision), 'recall': np.mean(cv_recall) } self.ganb_metrics = values return values def predict_ganb(self, matrix=None): # matrix -> features print("WindowClassifier => predictGNB()") # Make prediction with svm Y_pred = self._ganb.predict(self.X_test if matrix is None else matrix) return Y_pred def qud(self, CV): # Train model with a specified cv cv_scores = cross_val_score(self._qud, self._X, self._Y, cv=CV, scoring='accuracy') cv_precision = cross_val_score(self._qud, self._X, self._Y, cv=CV, scoring='precision') cv_recall = cross_val_score(self._qud, self._X, self._Y, cv=CV, scoring='recall') values = { 'accuracy': np.mean(cv_scores), 'precision': np.mean(cv_precision), 'recall': np.mean(cv_recall) } self.qud_metrics = values return values def predict_qud(self, matrix=None): # matrix -> features print("WindowClassifier => predictQUD()") # Make prediction with svm Y_pred = self._qud.predict(self.X_test if matrix is None else matrix) return Y_pred '''Tuning''' def pick_the_best_knn(self): knn2 = neighbors.KNeighborsClassifier() param_grid = {"n_neighbors": np.arange(1, 25)} knn_gscv = GridSearchCV(knn2, param_grid, cv=5) knn_gscv.fit(self._X, self._Y) return knn_gscv.best_params_ def pick_the_best_random_forest(self): rff = RandomForestRegressor() n_estimators = [ int(x) for x in np.linspace(start=200, stop=2000, num=10) ] # Number of features to consider at every split max_features = ['auto', 'sqrt'] max_depth = [int(x) for x in np.linspace(10, 110, num=11)] max_depth.append(None) min_samples_split = [2, 5, 10] min_samples_leaf = [1, 2, 4] bootstrap = [True, False] random_grid = { 'n_estimators': n_estimators, 'max_features': max_features, 'max_depth': max_depth, 'min_samples_split': min_samples_split, 'min_samples_leaf': min_samples_leaf, 'bootstrap': bootstrap } rf_random = RandomizedSearchCV(estimator=rff, param_distributions=random_grid, n_iter=100, cv=3, verbose=2, random_state=42, n_jobs=-1) rf_random.fit(self._X, self._Y) print(rf_random.best_params_)
neigh = KNeighborsClassifier(n_neighbors=nneighbors) neigh.fit(df[features].values, df['Survived'].values.ravel()) pre = neigh.predict(test[features].values) print('accuracy on training set\n') neigh.score(df[features].values, df['Survived'].values.ravel()) #%% gaussian process classifier gp = GaussianProcessClassifier(n_jobs=-1) gp.fit(x_train, y_train) pre = gp.predict(x_val) print('accuracy on training set\n') gp.score(x_train, y_train) print('accuracy on validation set\n') gp.score(x_val, y_val) #%% logistic regressor logreg = linear_model.LogisticRegression(solver='lbfgs', penalty='l2', C=1.0e2, max_iter=1000, warm_start=True)
def run_classifications(X, y, X_test, labelname, k, features, headers): ret_predictions = {} if features != -1: X1, X_test1, n1 = feature_selection_chi2(X, X_test, y, features, headers) X2, X_test2, n2 = feature_selection_f_classif(X, X_test, y, features, headers) X = np.concatenate((X1, X2), axis=1) X_test = np.concatenate((X_test1, X_test2), axis=1) # X, X_test, n = feature_selection_f_classif(X, X_test, y, features, headers) print('{} : {}'.format("Feature Selected X", X.shape)) print('{} : {}'.format("Feature Selected X_test", X_test.shape)) print_line() # return k_fold = KFold(n_splits=k, shuffle=True, random_state=0) # # L2 LOGISTIC REGRESSION ###### # lr2 = LogisticRegression() # start_time = time.time() # lr2.fit(X, y) # runtime = str(time.time() - start_time) # y_train = lr2.predict(X) # y_test = lr2.predict(X_test) # print_classification_stats("L2 Logistic Regression " + labelname, y, y_train, y_test, runtime) # cv = cross_val_score(lr2, X, y, cv=k_fold, scoring='mean_squared_error') # print "CV Score: " + str(cv) # print "CV Average: " + str(sum(cv)/float(len(cv))) # print_line() # ret_predictions['lr2'] = np.concatenate((y_train, y_test)) # # L1 LOGISTIC REGRESSION ###### # lr1 = LogisticRegression(penalty='l1') # start_time = time.time() # lr1.fit(X, y) # runtime = str(time.time() - start_time) # y_train = lr1.predict(X) # y_test = lr1.predict(X_test) # print_classification_stats("L2 Logistic Regression " + labelname, y, y_train, y_test, runtime) # cv = cross_val_score(lr1, X, y, cv=k_fold, scoring='mean_squared_error') # print "CV Score: " + str(cv) # print "CV Average: " + str(sum(cv)/float(len(cv))) # print_line() # ret_predictions['lr1'] = np.concatenate((y_train, y_test)) # RANDOM FOREST ###### rf = RandomForestClassifier() start_time = time.time() rf.fit(X, y) runtime = str(time.time() - start_time) y_train = rf.predict(X) y_test = rf.predict(X_test) print_classification_stats("Random Forest " + labelname, y, y_train, y_test, runtime) cv = cross_val_score(rf, X, y, cv=k_fold, scoring='mean_squared_error') print "CV Score: " + str(cv) print "CV Average: " + str(sum(cv) / float(len(cv))) print_line() ret_predictions['rf'] = np.concatenate((y_train, y_test)) # lo,hi = prediction_Error_Bootstrap(rf, X, y) # print ".95 Confidence Interval: " + str(lo) + " - " + str(hi) # # K NEAREST NEIGHBORS ###### # neigh = KNeighborsClassifier(4) # start_time = time.time() # neigh.fit(X, y) # runtime = str(time.time() - start_time) # y_train = neigh.predict(X) # y_test = neigh.predict(X_test) # print_classification_stats("KNN " + labelname, y, y_train, y_test, runtime) # cv = cross_val_score(neigh, X, y, cv=k_fold, scoring='mean_squared_error') # print "CV Score: " + str(cv) # print "CV Average: " + str(sum(cv)/float(len(cv))) # print_line() # ret_predictions['knn'] = np.concatenate((y_train, y_test)) # lo,hi = prediction_Error_Bootstrap(neigh, X, y) # print ".95 Confidence Interval: " + str(lo) + " - " + str(hi) # # Linear SVM ###### # #svc = SVC(kernel='linear', C=0.025) # #start_time = time.time() # #svc.fit(X, y) # #runtime = str(time.time() - start_time) # #y_train = svc.predict(X) # #y_test = svc.predict(X_test) # #print_classification_stats("Linear SVM " + labelname, y, y_train, y_test, runtime) # #cv = cross_val_score(svc, X, y, cv=k_fold, scoring='mean_squared_error') # #print "CV Score: " + str(cv) # #print "CV Average: " + str(sum(cv)/float(len(cv))) # #print_line() # #ret_predictions['svc'] = np.concatenate((y_train, y_test)) # ## RBF SVM ###### # #rsvc = SVC(gamma=2, C=1) # #start_time = time.time() # #rsvc.fit(X, y) # #runtime = str(time.time() - start_time) # #y_train = rsvc.predict(X) # #y_test = rsvc.predict(X_test) # #print_classification_stats("RBF SVM " + labelname, y, y_train, y_test, runtime) # #cv = cross_val_score(rsvc, X, y, cv=k_fold, scoring='mean_squared_error') # #print "CV Score: " + str(cv) # #print "CV Average: " + str(sum(cv)/float(len(cv))) # #print_line() # #ret_predictions['rbf'] = np.concatenate((y_train, y_test)) # Gaussian Process ###### gp = GaussianProcessClassifier(1.0 * RBF(1.0), warm_start=True) start_time = time.time() gp.fit(X, y) runtime = str(time.time() - start_time) y_train = gp.predict(X) y_test = gp.predict(X_test) print_classification_stats("Gaussian Process " + labelname, y, y_train, y_test, runtime) cv = cross_val_score(gp, X, y, cv=k_fold, scoring='mean_squared_error') print "CV Score: " + str(cv) print "CV Average: " + str(sum(cv) / float(len(cv))) print_line() ret_predictions['gp'] = np.concatenate((y_train, y_test)) # lo,hi = prediction_Error_Bootstrap(gp, X, y) # print ".95 Confidence Interval: " + str(lo) + " - " + str(hi) # # Decision Tree ###### # dt = DecisionTreeClassifier() # start_time = time.time() # dt.fit(X, y) # runtime = str(time.time() - start_time) # y_train = dt.predict(X) # y_test = dt.predict(X_test) # print_classification_stats("Decision Tree " + labelname, y, y_train, y_test, runtime) # cv = cross_val_score(dt, X, y, cv=k_fold, scoring='mean_squared_error') # print "CV Score: " + str(cv) # print "CV Average: " + str(sum(cv)/float(len(cv))) # print_line() # ret_predictions['dt'] = np.concatenate((y_train, y_test)) # # Neural Net ###### # mlp = MLPClassifier() # start_time = time.time() # mlp.fit(X, y) # runtime = str(time.time() - start_time) # y_train = mlp.predict(X) # y_test = mlp.predict(X_test) # print_classification_stats("Neural Net " + labelname, y, y_train, y_test, runtime) # cv = cross_val_score(mlp, X, y, cv=k_fold, scoring='mean_squared_error') # print "CV Score: " + str(cv) # print "CV Average: " + str(sum(cv)/float(len(cv))) # print_line() # ret_predictions['mlp'] = np.concatenate((y_train, y_test)) # # AdaBoost Classifier ###### # ab = AdaBoostClassifier() # start_time = time.time() # ab.fit(X, y) # runtime = str(time.time() - start_time) # y_train = ab.predict(X) # y_test = ab.predict(X_test) # print_classification_stats("AdaBoost " + labelname, y, y_train, y_test, runtime) # cv = cross_val_score(ab, X, y, cv=k_fold, scoring='mean_squared_error') # print "CV Score: " + str(cv) # print "CV Average: " + str(sum(cv)/float(len(cv))) # print_line() # ret_predictions['ab'] = np.concatenate((y_train, y_test)) # lo,hi = prediction_Error_Bootstrap(ab, X, y) # print ".95 Confidence Interval: " + str(lo) + " - " + str(hi) # # Naive Bayes ###### # gnb = GaussianNB() # start_time = time.time() # gnb.fit(X, y) # runtime = str(time.time() - start_time) # y_train = gnb.predict(X) # y_test = gnb.predict(X_test) # print_classification_stats("Naive Bayes " + labelname, y, y_train, y_test, runtime) # cv = cross_val_score(gnb, X, y, cv=k_fold, scoring='mean_squared_error') # print "CV Score: " + str(cv) # print "CV Average: " + str(sum(cv)/float(len(cv))) # print_line() # ret_predictions['gnb'] = np.concatenate((y_train, y_test)) # # QDA ###### # qda = QuadraticDiscriminantAnalysis() # start_time = time.time() # qda.fit(X, y) # runtime = str(time.time() - start_time) # y_train = qda.predict(X) # y_test = qda.predict(X_test) # print_classification_stats("QDA " + labelname, y, y_train, y_test, runtime) # cv = cross_val_score(qda, X, y, cv=k_fold, scoring='mean_squared_error') # print "CV Score: " + str(cv) # print "CV Average: " + str(sum(cv)/float(len(cv))) # print_line() # ret_predictions['qda'] = np.concatenate((y_train, y_test)) # lo,hi = prediction_Error_Bootstrap(qda, X, y) # print ".95 Confidence Interval: " + str(lo) + " - " + str(hi) return ret_predictions
def main(): #Load Training Data -- 45 titles of 9 different genres, #5 Blues, 5 Classic Rock, 5 Classical, 5 Country, 5 Electronic, 5 Metal, #5 Hip Hop, 5 Jazz, 5 Pop #4 Features: Note Density(Avg. # of Notes Per Second, Initial Tempo, Bass Register Importance, #Amount of Arpeggiation) -- All previously normalized trainList = [[ -0.766678522671417, -1.4134894457127, 0.0122606256864645, 0.43151719310051 ], [ -1.25246164516747, -1.0661631348805, 1.66340797727991, 1.21419919717718 ], [ -1.40405821644502, -1.0661631348805, 0.403817590836005, -1.09638443503824 ], [ -1.3577680200252, 0.971484555335123, 0.649660918402909, -0.553046197585348 ], [ -1.12520463926371, -1.11247330965812, 2.62171635167137, 0.652347745189381 ], [ 2.43551771310301, 0.369452283225963, 0.434310848102859, -0.134141978619881 ], [ 0.477945144638293, -0.232579988883197, 0.0594181118767843, -0.0658562454473348 ], [ 0.870921477457971, -0.394665600604894, -0.400726511974132, 0.479514784984597 ], [ 1.45707350937701, -0.440975775382522, -0.308871070886021, 1.06130302355337 ], [ 0.317025254101156, 0.276831933670708, 0.0302952889775195, -0.313278979682085 ], [ 0.344564567366374, -0.811457173603543, 0.0058742100751454, -0.772179500134836 ], [ -1.43344191386319, -0.950387697936426, 0.189561226701406, -1.63399440732091 ], [ -0.509888362597354, 0.624158244502915, -1.64915124629092, -2.57380317251345 ], [ -0.688984501422195, -0.440975775382522, -0.573876378367778, -0.911725215749629 ], [ 1.0931214550796, 0.415762458003591, -0.239951378380251, -0.647473539619574 ], [ 0.305762422927743, 0.0915912345601968, 0.515319672402441, 0.0357254444966523 ], [ 0.0624347875482635, -0.37151051321608, -0.0786751179102309, -0.50150779894908 ], [ -1.15105910134567, -1.25140383399101, 0.486438451417823, -1.57481116359108 ], [ -0.705237572473972, 1.89768805088768, 1.29234485530951, 0.154049060043043 ], [ -0.044785112264018, -0.440975775382522, 0.14184574295472, 0.528489953548707 ], [ 0.890374866596739, 0.23052175889308, -0.728919490615075, -0.395906062340086 ], [ 0.338609204986195, 0.0915912345601968, 0.00240196126908647, 0.675915446802265 ], [ 0.702159802983765, 0.0452810597825691, 1.56736511491168, -0.454043070733069 ], [ 1.57109652803916, 0.161056496726638, -1.09636595497638, 1.14348078111971 ], [ -0.37196172410778, 0.554692982336474, -0.368561234334016, 0.907415824160339 ], [ 0.600507499201812, 1.4345863031114, 1.00839825378751, 0.955005214604942 ], [ 1.10244499892636, 2.17554909955344, 1.81454080734429, 0.363035159499528 ], [ 0.153359658817574, -1.18193857182456, 1.6354924054874, -0.00454647255469575 ], [ 0.916855697221869, 1.24934560400089, 1.93419178349293, 0.244811833289804 ], [ -0.0918102995735832, -0.927232610547612, 0.127335162294101, 0.0839276733003873 ], [ 1.37119861987878, -0.811457173603543, -1.52856876509287, 0.67106206208497 ], [ 0.93323082818666, -0.603061387104218, -0.400227918868385, 1.8010528221493 ], [ 1.07383037479694, -0.950387697936426, -0.573990235637437, 1.29493370762047 ], [ -0.583104951959534, -0.834612260992357, -1.43566596814499, 1.40195387262669 ], [ -1.19986159572819, -1.29771400876863, -0.12304791014598, 1.49648769523065 ], [ -0.372297246211269, 1.66613717699954, -1.11177695290247, -0.910589434995976 ], [ 0.399464128365263, -0.834612260992357, 0.160241864875368, -0.855590862944964 ], [ -1.14389931582131, -0.649371561881846, -1.38617972788128, -2.7441128533059 ], [ -0.771977276287568, 0.253676846281894, -0.83487270827995, 0.255986573229018 ], [ -1.36950601865057, -1.64504031960084, 0.0851051444620558, -2.01914851836497 ], [ -0.233730884591378, -0.927232610547612, 0.400787709093632, 0.196017786581242 ], [ 0.613634713378625, -0.209424901494383, 0.171104729047728, -0.688776703429684 ], [ -0.964928464375092, 1.48089647788903, -1.63760172273251, -1.13536431429426 ], [ 0.262997873689561, -0.880922435769985, 0.45336491398644, 0.160631572105278 ], [ 2.46876985514246, -1.11247330965812, -1.8129457869236, 1.20203427847653 ]] testList = [[ -1.42550367089293, -0.741991911437102, 1.03907139337304, -0.524103659257335 ], [ -1.4713199318185, -0.834612260992357, 0.610848921601832, -1.07153109849005 ], [ 0.0483469943547123, 1.06410490489038, 1.41483496062355, -0.736087518180374 ], [ -0.973697981011053, 0.693623506669357, -0.180733411152442, -1.16782656220974 ], [ -1.0181376270085, -1.0661631348805, -0.664143752552901, 0.270549075112729 ], [ 0.0740472545469077, -0.255735076272011, 0.582212294684255, -0.128785564777901 ], [ 0.71758688500379, 0.554692982336474, 0.133199237498739, 0.581912344088212 ], [ -0.137417909220219, 1.71244735177717, 0.60518579551654, -0.154413314075152 ], [ 0.271454224529076, -1.25140383399101, 0.613087382121505, -0.772701130690326 ], [ 0.864020291135146, -0.209424901494383, -0.191872343613909, 0.217888209405153 ], [ -0.810894246904719, 1.15672525444563, -0.172523673097193, -1.04070772100896 ], [ 3.20706768407871, 1.71244735177717, -0.643132149404525, -0.357730150445054 ], [ -0.250691095660947, -0.533596124937777, -1.30676064627265, -2.08822794623238 ], [ -0.795521491862785, 0.137901409337824, 0.150083129455051, -1.40763364196608 ], [ 0.372076487690884, -0.487285950160149, -0.849538792435035, -1.26744421775474 ], [ -0.65660384990208, -1.29771400876863, -0.180568359917071, 1.42353891497765 ], [ -0.77988391630925, -0.37151051321608, 1.21149957238249, -0.365286553173383 ], [ -1.30604172925065, -0.139959639327942, -0.229290080643909, -1.25699057202116 ], [ -1.04110227888866, -1.01985296010287, -0.330316732364811, 1.54321130870156 ], [ 0.61373886561153, -0.00102911499505855, -0.2808348691897, 0.53675115196638 ], [ 0.49875146997193, -0.0936494645503139, -0.832360238451796, 0.651893184309962 ], [ 2.82271114057672, -0.139959639327942, 0.0440903522786204, 0.402991197507736 ], [ 0.0463327895684618, 0.137901409337824, -0.523226280915796, 1.40875258840225 ], [ -0.0296713299119058, 0.323142108448335, -1.02406080673175, 0.319768352634448 ], [ 0.781983086248411, 1.9439982256653, 0.181776380407751, 1.84028755471701 ], [ 0.782278454549117, -0.533596124937777, -0.276684483711903, -0.534461148257612 ], [ -0.0876172574377816, 0.554692982336474, 1.72524522236284, 0.0156455721085213 ], [ 0.203902110434939, 1.24934560400089, 1.79628504094392, 0.437922107143642 ], [ 1.32483987974607, 2.63865084732972, 1.71067873124182, 0.660006834959962 ], [ 0.851227474200866, 2.17554909955344, 1.95118989770912, 0.772316815484984 ], [ -0.403488104826915, -0.139959639327942, -2.05730647943417, 1.34945888234721 ], [ -1.15063158429676, -0.950387697936426, 0.336974769326176, -1.58007717324087 ], [ -0.532153920088487, 1.01779473011275, -1.56126596914715, -0.00905096105451718 ], [ -0.67501661729555, -0.139959639327942, -0.952051167678154, -0.0422238467014761 ], [ -0.310849370568786, 0.0915912345601968, -0.471342095626615, 1.41103820392797 ], [ -0.208964531651521, 0.323142108448335, -0.361903894340191, -0.275842503944826 ], [ -1.04654416417428, -0.464130862771335, -0.949945286117449, 0.840457233938729 ], [ -0.951584146504103, 1.29565577877852, 0.323269958277516, -0.313860836548317 ], [ 0.184463802998577, -0.186269814105569, -0.672037942893625, 0.240047252985395 ], [ -0.0698549841159306, 2.54603049777446, -0.956718963479093, -0.654542118681162 ], [ 0.191338979070559, 0.253676846281894, -0.723803925093155, -0.137064058860264 ], [ 0.358325281596958, -0.139959639327942, -0.277426681630464, -0.0153182994681455 ], [ -1.43216073444622, -0.163114726716755, 1.53782098070242, 1.1224554353098 ], [ -0.536067966587311, -0.139959639327942, 1.0180011125623, 1.47349893131731 ], [ 0.666671687756801, -1.4134894457127, -1.89213174857686, 0.956901667933616 ]] X = [] Y = [ 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8 ] shuffle = list(zip(testList, Y)) testList, Y = zip(*shuffle) neigh = KNeighborsClassifier( n_neighbors=10 ) #With 10 neighbors, accuracy rate jumps from 0.28 to 0.35 svc = SVC(random_state=2, decision_function_shape='ovr') mlp = MLPClassifier(alpha=1) gaussian = GaussianProcessClassifier(1.0 * RBF(1.0), warm_start=True) dT = DecisionTreeClassifier() rF = RandomForestClassifier() nB = GaussianNB() aB = AdaBoostClassifier() qD = QuadraticDiscriminantAnalysis() kNNprediction = [] svcPrediction = [] mlpPrediction = [] gaussianPrediction = [] dTPrediction = [] rFPrediction = [] nBPrediction = [] aBPrediction = [] qDPrediction = [] tests = [] for song in trainList: X.append(song) neigh.fit(X, Y) svc.fit(X, Y) mlp.fit(X, Y) gaussian.fit(X, Y) dT.fit(X, Y) rF.fit(X, Y) nB.fit(X, Y) aB.fit(X, Y) qD.fit(X, Y) for song in testList: tests.append(song) kNNprediction.append(int(neigh.predict(song))) svcPrediction.append(int(svc.predict(song))) mlpPrediction.append(int(mlp.predict(song))) gaussianPrediction.append(gaussian.predict(song)) dTPrediction.append(dT.predict(song)) rFPrediction.append(rF.predict(song)) nBPrediction.append(nB.predict(song)) aBPrediction.append(aB.predict(song)) qDPrediction.append(qD.predict(song)) conMatrixKNN = sklearn.metrics.confusion_matrix(Y, kNNprediction) conMatrixSVC = sklearn.metrics.confusion_matrix(Y, svcPrediction) conMatrixMLP = sklearn.metrics.confusion_matrix(Y, mlpPrediction) conMatrixGaussian = sklearn.metrics.confusion_matrix(Y, gaussianPrediction) conMatrixDT = sklearn.metrics.confusion_matrix(Y, dTPrediction) conMatrixRF = sklearn.metrics.confusion_matrix(Y, rFPrediction) conMatrixNB = sklearn.metrics.confusion_matrix(Y, nBPrediction) conMatrixAB = sklearn.metrics.confusion_matrix(Y, aBPrediction) conMatrixQD = sklearn.metrics.confusion_matrix(Y, qDPrediction) accKNN = sklearn.metrics.accuracy_score(Y, kNNprediction) accSVC = sklearn.metrics.accuracy_score(Y, svcPrediction) accMLP = sklearn.metrics.accuracy_score(Y, mlpPrediction) accGaussian = sklearn.metrics.accuracy_score(Y, gaussianPrediction) accDT = sklearn.metrics.accuracy_score(Y, dTPrediction) accRF = sklearn.metrics.accuracy_score(Y, rFPrediction) accNB = sklearn.metrics.accuracy_score(Y, nBPrediction) accAB = sklearn.metrics.accuracy_score(Y, aBPrediction) accQD = sklearn.metrics.accuracy_score(Y, qDPrediction) enumeration = [ 'Blues', 'Blues', 'Blues', 'Blues', 'Blues', 'Classic Rock', 'Classic Rock', 'Classic Rock', 'Classic Rock', 'Classic Rock', 'Classical', 'Classical', 'Classical', 'Classical', 'Classical', 'Country', 'Country', 'Country', 'Country', 'Country', 'Electronic', 'Electronic', 'Electronic', 'Electronic', 'Electronic', 'Metal', 'Metal', 'Metal', 'Metal', 'Metal', 'Hip-Hop', 'Hip-Hop', 'Hip-Hop', 'Hip-Hop', 'Hip-Hop', 'Jazz', 'Jazz', 'Jazz', 'Jazz', 'Jazz', 'Pop', 'Pop', 'Pop', 'Pop', 'Pop' ] print("KNN Accuracy rate: " + str(accKNN) + "\n") print("SVC Accuracy rate: " + str(accSVC) + "\n") print("MLP Accuracy Rate: " + str(accMLP) + "\n") print("Gaussian Accuracy Rate: " + str(accGaussian) + "\n") print("Decision Tree Accuracy Rate: " + str(accDT) + "\n") print("Random Forest Accuracy Rate: " + str(accRF) + "\n") print("Naive Bayes Accuracy Rate: " + str(accNB) + "\n") print("AdaBoost Accuracy Rate: " + str(accAB) + "\n") print("Quadratic Discriminant Analysis Accuracy Rate: " + str(accMLP) + "\n")
print(pattern % ("Process", "Tree")) tree.fit(X_train, y_train) print(pattern % ("Tree", "Bayes")) bayes.fit(X_train, y_train) print("Finish training Bayes.") # make a predictions data = X_test target = y_test print("All classifiers trained. Start making predictions") pattern = "Making predictions for %s" print(pattern % "Forest") forest_results = forest.predict(X_test) print(pattern % "KNN") knn_results = knn.predict(X_test) print(pattern % "Process") process_results = process.predict(X_test) print(pattern % "Tree") tree_results = tree.predict(X_test) print(pattern % "Bayes") bayes_results = bayes.predict(X_test) # print metrics from sklearn.metrics import classification_report print "Forest results: " print classification_report(y_true=y_test, y_pred=forest_results) print "KNN results: " print classification_report(y_true=y_test, y_pred=knn_results) print "Gaussian process results: " print classification_report(y_true=y_test, y_pred=process_results) print "Decision Tree results: " print classification_report(y_true=y_test, y_pred=tree_results) print "Bayes results: "
def trainPredict(subjectid, makeplot=False): print("testing participant " + subjectid) # Load training data from the file matlab generates traindata = np.genfromtxt('csvdata/' + subjectid + '_sim.csv', delimiter=',', missing_values=['NaN', 'nan'], filling_values=None) # Clean + downsample this data trainx, trainy = cleandata(traindata, downsamplefactor=20) # Train a Gaussian Process anisokern = kernels.RBF() # default kernel gp = GaussianProcessClassifier(kernel=anisokern) # Initialize the GPC gp.fit(trainx, trainy) # train this class on the data trainx = trainy = None # Discard all training data to preserve memory # load test data testdata = np.genfromtxt('csvdata/' + subjectid + '_rival.csv', delimiter=',', missing_values=['NaN', 'nan'], filling_values=None) testx, testy = cleandata(testdata, downsamplefactor=4) # clean data testdata = None # clear from memory # work out percentage in percept for each data point: percentages, nextpercept = assign_percentage(testy) # get a prediction for all points in the test data: predicty = gp.predict(testx) proby = gp.predict_proba(testx) if makeplot: summaryplot(participant, testx, testy, predicty, proby, gp) # Summarise prediction by reported percept meanprediction = {'mean' + percept: proby[testy == value, 1].mean() for percept, value in perceptindices.iteritems()} predictiondev = {'stdev' + percept: proby[testy == value, 1].std() for percept, value in perceptindices.iteritems()} predictionaccuracy = {'acc' + percept: (predicty[testy == value] == testy[testy == value]).mean() for percept, value in perceptindices.iteritems()} # Summarise prediction by percentage in percept predictioncourse = {'timecourse' + percept + str(cutoff): proby[(testy == value) & (percentages < cutoff) & (percentages > cutoff - 0.1), 1].mean() for percept, value in perceptindices.iteritems() for cutoff in np.linspace(0.1, 1, 10)} # Summarise mixed percept time courses by the next percept nextcourse = {'nextcourse' + percept + str(cutoff): proby[(testy == 0) & (percentages < cutoff) & (percentages > cutoff - 0.1) & (nextpercept == perceptindices[percept]), 1].mean() for percept in ['highfreq', 'lowfreq'] for cutoff in np.linspace(0.1, 1, 10)} afterdominant = {'after' + percept + "_" + after + "_" + str(cutoff): proby[(testy == perceptindices[percept]) & (percentages < cutoff) & (percentages > cutoff - 0.1) & (nextpercept == perceptindices[after]), 1].mean() for percept, after in [('highfreq', 'mixed'), ('highfreq', 'lowfreq'), ('lowfreq', 'mixed'), ('lowfreq', 'highfreq')] for cutoff in np.linspace(0.1, 1, 10)} # Only return the summarised data return meanprediction, predictiondev, predictionaccuracy, \ predictioncourse, nextcourse, afterdominant
from sklearn.tree import DecisionTreeClassifier from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier from sklearn.naive_bayes import GaussianNB from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis from sklearn.metrics import accuracy_score data = pd.read_csv('finance_data.csv', index_col=['Ticker', 'Fiscal Year', 'Fiscal Period']) print(data.columns) Y = data.loc[:, 'pos_neg'] X = data.drop(columns=['pos_neg', 'shifted_chg', 'report_date']) X = scale(X.values) X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=.2, shuffle=False) h = .02 # step size in the mesh#i ##i3#fff kernal = 1.0 * RBF(1.0) gpc = GaussianProcessClassifier(kernel=kernal) gpc.fit(X_train, y_train) Z = gpc.predict(X_test) acc = accuracy_score(y_test, Z) print(acc) print(y_test[0:10]) print(Z[0:10])
[181, 85, 43]] Y = [ 'male', 'male', 'female', 'female', 'male', 'male', 'female', 'female', 'female', 'male', 'male' ] clf = clf.fit(X, Y) clf1 = clf1.fit(X, Y) clf2 = clf2.fit(X, Y) clf3 = clf3.fit(X, Y) clf4 = clf4.fit(X, Y) # clf5=clf5.fit(X,Y) prediction = clf.predict([[190, 70, 43]]) prediction1 = clf1.predict([[190, 70, 43]]) prediction2 = clf2.predict([[190, 70, 43]]) prediction3 = clf3.predict([[190, 70, 43]]) prediction4 = clf4.predict([[190, 70, 43]]) # prediction5=clf5.predict([[190,70,43]]) # # print(prediction) # print(prediction1) # print(prediction2) # print(prediction3) # print(prediction4) max_acc = max(prediction, prediction1, prediction2, prediction3, prediction4) if max_acc == prediction: print("SVM") elif max_acc == prediction1: print("LogisticRegression") elif max_acc == prediction2: print("DecisionTreeClassifier") elif max_acc == prediction3: print("GaussianNB")
#for st in range(mikos): # print('The gene',name[st],'is according to our algorithm',tava[st]) teliko=pd.DataFrame(list(zip(name,tava)),columns=['GENES','TARGET VALIDATION ']) teliko.to_excel('output_svc.xlsx', engine='xlsxwriter') if classifiers == 6 or classifiers == 0: #------------------------------------------------------------------------ #--------------------------PREDICTION PART RF--------------------------------- #------------------------------------------------------------------------- pred=[] for st in range(mikos): t=tixera[st] ti=[int(s) for s in t.split(',')] ni=gausian.predict([ti]) pred.append(ni[0]) #------------------------------------------------------------------------ #--------------------------Create final matrix --------------------------------- #------------------------------------------------------------------------- # given that in training 1 was as good target and as a bad one tava=[] for pr in pred : if pr > 0.95 : t=' GOOD potential target' tava.append(t) else : t=' BAD potential target' tava.append(t) name=[] for st in range(mikos):
ax.text(1.0, 2.5, r"$\Delta t_2 = %.1f$"%i[1], fontsize=18) #pl.draw() #nsnIa = np.array([len(tmp2[i-2]) for i in range(4)]).sum() #label = np.zeros(len(color)) #label[:nsnIa] = 1 if REFIT: clf.fit(X, y) pickle.dump(clf, open(thisdir+"/GPmodel.pkl", 'wb')) else: # load the model from disk clf = pickle.load(open(thisdir+"/GPmodel.pkl", 'rb')) y_pred = clf.predict(X) #in sample accuracy accuracy = accuracy_score(y, y_pred) print("Accuracy (train) for %0.1f%% " % (accuracy * 100)) #calculate probability everywhere in the phase space ''' xx = np.linspace(phasespace_complete[:,0].min()-0.1, phasespace_complete[:,0].max()+0.1, 100) yy = np.linspace(phasespace_complete[:,1].min()-0.1, phasespace_complete[:,1].max()+0.1, 100).T ''' resolution = 50 xx = np.linspace(xmin, xmax, resolution) yy = np.linspace(ymin, ymax, resolution) xx, yy = np.meshgrid(xx, yy)
class Classifier(object): def __init__(self, sample_dir, crossval=False, action="", name="", model=""): self.sample_dir = sample_dir self.ben_train_pd = None self.mal_train_pd = None self.all_train_pd = None self.groundtruth_tr_pd = None self.clf = None self.two_gram = None self.crossval = crossval #self.action = action self.name = name self.model = model self.outdict = {} self.outdict["name"] = name #self.outdict["action"] = action self.result = [] self.status = [] self.files = [] self.num_files = 0 self.global_log = "" if self.name == "tra": self.load_groundtruth() def log(self, msg): time_str = time.strftime('%X %x %Z') log_msg = "[%s] %s\n" % (time_str, msg) print log_msg self.global_log += log_msg def dump_log(self, out_pn): with open(out_pn, 'a') as f: f.write(self.global_log) def classify_one(self, in_pn): #current_sample_pd = pd.DataFrame.from_csv(in_pn, engine='python') current_sample_pd = pd.read_csv(in_pn, header=None, engine='python') result = self.clf.predict(current_sample_pd)[0] if result == 1: return "ben", current_sample_pd elif result == 0: return "mal", current_sample_pd def dump_json(self, out_pn, input_pn, null_file=False): fls = [] with open(input_pn, 'r') as fh: input_dict = json.load(fh) filelist = input_dict["files"] for name in filelist: fls.append(None) if null_file: self.outdict["files"] = fls self.outdict["num_files"] = len(fls) tmp_result = [] tmp_status = [] for i in xrange(len(fls)): tmp_result.append(self.result[0]) tmp_status.append(self.status[0]) self.outdict["status"] = tmp_status self.outdict["result"] = tmp_result else: self.outdict["result"] = self.result self.outdict["files"] = self.files self.outdict["status"] = self.status self.outdict["num_files"] = len(self.files) self.outdict["model"] = self.model with open(out_pn, 'w') as f: f.write(json.dumps(self.outdict, indent=4, separators=(',', ': '))) # should read json file to know the groundtruth def classify_all(self, json_file): basedir = "/tmp/output" with open(json_file, 'r') as fh: input_dict = json.load(fh) filelist = input_dict["files"] tag = input_dict["tags"] for i in xrange(len(filelist)): groundtruth = tag[i]["tag_b"] #in_pn = os.path.join(basedir, groundtruth, filelist[i]) in_pn = os.path.join(basedir, "%scsv" % groundtruth, filelist[i] + ".csv") current_sample_pd = pd.read_csv(in_pn, header=None, engine='python') result_num = self.clf.predict(current_sample_pd)[0] if result_num == 1: result = "ben" elif result_num == 0: result = "mal" self.log("** Classifying %s:" % filelist[i]) if result == groundtruth: tag_dict = {"tag_a": groundtruth, "tag_b": result} status = "correct" msg = "GroundTruth: %s, Predict: %s, result: %s" % \ (groundtruth, result, "CORRECT") self.dump_indv_result(filelist[i], msg) else: tag_dict = {"tag_a": groundtruth, "tag_b": result} status = "incorrect" msg = "GroundTruth: %s, Predict: %s, result: %s" % \ (groundtruth, result, "INCORRECT") self.dump_indv_result(filelist[i], msg) self.log(" >> classified as %s. This is %s result" % (result.upper(), status.upper())) self.files.append(filelist[i]) self.status.append(status) self.result.append(tag_dict) def classify_one_array(self, in_array): result = self.clf.predict(in_array)[0] if result == 1: return "ben", in_array elif result == 0: return "mal", in_array def dump_indv_result(self, filename, msg): out_dir = os.path.join("/mnt", "output") out_pn = os.path.join(out_dir, filename + ".log.txt") with open(out_pn, 'w') as f: f.write(msg + "\n") def perturb_candidate(self, json_file): #os.system("ls -al /tmp/output/malcsv") with open(json_file, 'r') as fh: input_dict = json.load(fh) filelist = input_dict["files"] tag = input_dict["tags"] ben_sample_pd = self.ben_train_pd # let's classify this sample for i in xrange(len(filelist)): pert_candidate = [] self.log("[*] Extracting information from %s" % filelist[i]) groundtruth = tag[i]["tag_b"] csv_pn = os.path.join("/tmp", "output", "malcsv", filelist[i] + ".csv") result, current_array = self.classify_one(csv_pn) perturbed_array = current_array.copy().values[0] if result == "ben": self.log(" >>> doesn't necessary to perturb %s" % filelist[i]) continue closest_benign = find_closest_benign(ben_sample_pd, perturbed_array) #print closest_benign #target_perturb_idx, diff = get_highest_diff_feature(closest_benign, perturbed_array) #print target_perturb_idx, diff #comp_two_arrays(closest_benign, perturbed_array) sortedlist = get_highest_negdiff_feature( closest_benign, perturbed_array, 100) self.log(" >>> should minimize this index") for item in sortedlist: two_gram = ret_twogram_from_idx(self.two_gram, item[0]) self.log("idx %d, value %d, two-gram: %s" % (item[0], item[1], two_gram)) if two_gram is not None: if "|" in two_gram: gram1, gram2 = two_gram.split("|") if gram1 not in pert_candidate: pert_candidate.append(gram1) if gram2 not in pert_candidate: pert_candidate.append(gram2) patch_candidate, caller_callee = self.extract_addr( filelist[i], pert_candidate) #dump_addr(patch_candidate, PATCH_ADDR_FILE) dump_addr(caller_callee, PATCH_ADDR_FILE) # nullify call by addr in_pn = os.path.join("/mnt", "input", filelist[i]) out_pn = os.path.join("/mnt", "output", filelist[i] + "_pert") self.log(" >>> generating perturbed file %s" % filelist[i]) patch_bin(in_pn, out_pn, PATCH_ADDR_FILE) # record it at the json if os.path.getsize(out_pn) > 0: tag_dict = { "tag_a": filelist[i], "tag_b": str(os.path.getsize(out_pn)) } status = "success" self.dump_indv_result(filelist[i], "success") else: tag_dict = { "tag_a": filelist[i], "tag_b": str(os.path.getsize(out_pn)) } status = "fail" self.dump_indv_result(filelist[i], "fail") self.files.append(filelist[i] + "_pert") self.status.append(status) self.result.append(tag_dict) def extract_addr(self, filename, pert_candidate): file_pn = os.path.join("/mnt", "input", filename) out = {} # find line with "call" command output = _objdump_extract_calls(file_pn) call_list, addr_list, caller_callee = extract_caller(output) #print caller_callee for i in xrange(len(call_list)): out[addr_list[i]] = call_list[i] return out, caller_callee # deprecated def gen_perturb_one(self, in_pn, out_pn): perturbation_count = 0 result, current_array = self.classify_one(in_pn) self.log(" >>> current sample classified as %s" % result) if result == "ben": self.log(" >>> doesn't necessary to perturb") return ben_sample_pd = load_csv_files(BEN_SAMPLE_DIR, "ben", sample=True) perturbed_array = current_array.copy().values[0] closest_benign = find_closest_benign(ben_sample_pd, perturbed_array) while True: perturbation_count += 1 #comp_two_arrays(closest_benign, perturbed_array) target_perturb_idx, diff = get_highest_diff_feature( closest_benign, perturbed_array) self.log(ret_distance(closest_benign, perturbed_array)) # increase count perturbed_array[target_perturb_idx] += diff self.log(" >>> perturbing %dth index (current feature diff %d)" % (target_perturb_idx, diff)) result = self.classify_one_array([perturbed_array])[0] if result == "ben": self.log(" >>> found successful perturbation") break if perturbation_count > 300: self.log("count over") break def load_groundtruth(self): self.mal_train_pd = load_csv_files(self.sample_dir, "malcsv") self.ben_train_pd = load_csv_files(self.sample_dir, "bencsv") self.all_train_pd = pd.DataFrame( np.vstack([self.ben_train_pd, self.mal_train_pd])) self.all_train_pd = np.nan_to_num(self.all_train_pd) self.groundtruth_tr_pd = pd.Series([1] * len(self.ben_train_pd) + [0] * len(self.mal_train_pd)) def train(self, model, argument=None): if model == "nn": #assert (isinstance(argument, int), "Argument should be defined") n_neighbors = argument self.clf = neighbors.KNeighborsClassifier(n_neighbors) elif model == "rf": max_depth = argument[0] random_state = argument[1] self.clf = RandomForestClassifier(max_depth=max_depth, random_state=random_state) elif model == "neural": solver = argument[0] hidden_size = argument[1] random_state = argument[2] self.clf = MLPClassifier(solver=solver, alpha=1e-5, \ hidden_layer_sizes=hidden_size, random_state=random_state) elif model == 'gaussian': kernel_val = argument[0] RBF_val = argument[1] self.clf = GaussianProcessClassifier(kernel=kernel_val * RBF(length_scale=RBF_val), optimizer=None) elif model == "svm": self.clf = svm.SVC() self.clf.fit(self.all_train_pd, y=self.groundtruth_tr_pd) def ret_report(self): if self.crossval == True: self.log("Cross-validation result") predict_tr = cross_validation.cross_val_predict(self.clf, self.all_train_pd,\ y=self.groundtruth_tr_pd, cv=3, n_jobs=8) cm = confusion_matrix(self.groundtruth_tr_pd, predict_tr) a, b, c, d = cm.ravel() report = metrics.classification_report(self.groundtruth_tr_pd, predict_tr) else: predict_tr = self.clf.predict(self.all_train_pd) cm = confusion_matrix(self.groundtruth_tr_pd, predict_tr) a, b, c, d = cm.ravel() report = metrics.classification_report(self.groundtruth_tr_pd, predict_tr) try: print_matrix(cm) self.log(cm) except: pass self.log(report) #log(report) def load_model(self, in_pn): with open(in_pn, 'rb') as f: pickle_obj = pickle.load(f) self.clf = pickle_obj["model"] self.two_gram = pickle_obj["two-gram"] self.ben_train_pd = pickle_obj["benign"] def save_model(self, out_pn): self.log("[*] Saving model now!") #assume that feature_mlsploit.py collectly generate two_gram_mini.pkl (/mnt/output/) with open("/mnt/output/two_gram_mini.pkl") as f_two: two_grams = pickle.load(f_two) model_name = out_pn out = {} out["model"] = self.clf out["two-gram"] = two_grams out["benign"] = self.ben_train_pd with open(model_name, 'wb') as f: pickle.dump(out, f) # take care of output.json file self.result.append({ "tag_a": "%s classifier" % self.model, "tag_b": "NP" }) self.files.append(os.path.basename(out_pn)) if os.path.getsize(model_name) > 0: self.status.append("success") self.dump_indv_result(model_name, "model generation success!") else: self.status.append("fail") self.dump_indv_result(model_name, "model generation fail!")
N, d = X.shape N = np.int(1797) Ntrain = np.int(800) Ntest = np.int(250) Xtrain = X[0:Ntrain - 1, :] ytrain = y[0:Ntrain - 1] Xtest = X[N - Ntest:N, :] ytest = y[N - Ntest:N] #kernel = 1.0 * RBF([1.0]) #isotropic kernel #kernel = DotProduct(1.0) kernel = Matern(0.5) gpc_rbf = GaussianProcessClassifier(kernel=kernel).fit(Xtrain, ytrain) yp_train = gpc_rbf.predict(Xtrain) train_error_rate = np.mean(np.not_equal(yp_train, ytrain)) yp_test = gpc_rbf.predict(Xtest) test_error_rate = np.mean(np.not_equal(yp_test, ytest)) #print('Training error rate') #print(train_error_rate) print('Test error rate') print(test_error_rate) #testing set 100 # tsize = [500, 600, 700, 800, 900, 1000, 1100, 1200, 1300, 1400, 1500] # radial = [90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90] # dot = [7, 11, 8, 8, 7, 9, 8, 8, 7, 8, 7] # matern = [32, 31, 27, 25, 22, 19, 19, 18, 17, 17, 17] # # plt.figure(2)
outputs = params.n_max_target_attr start_time = time.time() if n_problem_type == 'Classification': try: kernel = 1.0 * RBF(1.0) model = GaussianProcessClassifier(kernel=kernel, random_state=0) # Train model model.fit(X_train, y_train) #prediction #Time computation start start_timeFun = time.time() y_pred = model.predict(X_train) errorTrn = model.score(X_train, y_train) funcEvltime = (time.time() - start_timeFun) / len(X_test) #Report #y_pred = model.predict(X_test) errorTst = model.score(X_test, y_test) now = datetime.now() current_time = now.strftime("%H:%M:%S") print(current_time, ' ', exp_num, data_file, ' ', runOpt, ' test accuracy:', errorTst) #collection error = [errorTrn, errorTst, 'none', funcEvltime, 'empty'] data_results_coll.update({ str(data_file.split('.')[0]) + "_" + str(exp_num) + "_" + runOpt:
print( "Negative log predictive density of validation set with rbf kernel %.3f" % neg_lpd_rbf_v) neg_lpd_matern_v = -np.mean( np.log( gp_matern_fix.predict_proba(X_test)[np.arange(len(X_test)), y_test])) print( "Negative log predictive density of validation set with matern kernel %.3f" % neg_lpd_matern_v) nlpd_rbf_t[i] = neg_lpd_rbf_t nlpd_matern_t[i] = neg_lpd_matern_t nlpd_rbf_v[i] = neg_lpd_rbf_v nlpd_matern_v[i] = neg_lpd_matern_v accuracy_rbf[i] = accuracy_score(y_train, gp_rbf_fix.predict(X_train)) print("Accuracy for X_train with rbf kernel: %.5f" % accuracy_rbf[i]) print("Accuracy for X_test with rbf kernel: %.5f" % accuracy_score(y_test, gp_rbf_fix.predict(X_test))) accuracy_matern[i] = accuracy_score(y_train, gp_matern_fix.predict(X_train)) print("Accuracy for X_train with matern kernel: %.5f" % accuracy_matern[i]) print("Accuracy for X_test with matern kernel: %.5f\n" % accuracy_score(y_test, gp_matern_fix.predict(X_test))) print("Average accuracy with rbf kernel: %.5f" % np.mean(accuracy_rbf)) print("Average accuracy with matern kernel: %.5f" % np.mean(accuracy_matern)) print( "Average negative log predictive density of training set with rbf kernel: %.5f" % np.mean(nlpd_rbf_t))
#X_train = X #Y_train = Y_enc #X_test = data_test[:,1:6] #Y_test = data_test[] clf = tree.DecisionTreeClassifier() clf.fit(X_train,Y_train) Y_pred = clf.predict(X_test) clf2 = GradientBoostingClassifier(learning_rate=0.1,n_estimators=200,max_depth=3) clf2.fit(X_train,Y_train) Y_pred2 = clf2.predict(X_test) clf3 = LogisticRegressionCV(multi_class='multinomial',max_iter=500,Cs=30) clf3.fit(X_train,Y_train) Y_pred3 = clf3.predict(X_test) clf4 = SGDClassifier(alpha=0.02) clf4.fit(X_train,Y_train) Y_pred4 = clf4.predict(X_test) clf5 = GaussianProcessClassifier() clf5.fit(X_train,Y_train) Y_pred5 = clf5.predict(X_test) print(metrics.accuracy_score(Y_test,Y_pred2)) # clf5.fit(X,Y) # Y_pred = clf5.predict()
for bool, feature in zip(mask, df.columns[1:].tolist()): if bool: new_features.append(feature) #print(new_features) stats.text = str(new_features) x_train_original, x_test_original, y_train_original, y_test_original = train_test_split( X_new, y, test_size=0.25) #For standardizing data #clf = svm.LinearSVC(random_state=0) clf = GaussianProcessClassifier() clf.fit(x_train_original, y_train_original) predictions = clf.predict(x_test_original) #print("Accuracy =", accuracy_score(y_test_original,predictions)) #print(np.unique(predictions)) tn, fp, fn, tp = confusion_matrix(y_test_original, predictions).ravel() fruits = ['True Positive', 'False Positive', 'True Negative', 'False Negative'] #fruits = [tp, fp, tn, fn] #counts = [0, 0, 0, 0] counts = [tp, fp, tn, fn] source = ColumnDataSource(data=dict(fruits=fruits, counts=counts)) p = figure(x_range=fruits, plot_height=350, toolbar_location=None, title="Counts")
''' 高斯过程分类: 预测采用类概率的形式 其他的和高斯过程回归也挺相似的 ''' rlf = GaussianProcessClassifier(kernel=None, optimizer='fmin_l_bfgs_b', n_restarts_optimizer=0, max_iter_predict=100, warm_start=False, copy_X_train=True, random_state=None, multi_class='one_vs_rest', n_jobs=1) rlf.fit(trainX, trainY) rlf.score(testX, testY) preY = rlf.predict(testX) rlf.log_marginal_likelihood_value_ ''' kernel 核函数 optimizer 传递给核函数的参数集 n_restarts_optimizer 每次优化时,是否允许从指定的阈值空间中随机抽样开始执行 max_iter_predict 牛顿法在逼近预测值时的最大迭代次数 warm_start 不太懂 copy_X_train 永久保存数据集到对象中 random_state 随机器 multi_class 多类的问题的处理方法,1v1,还是1v剩余还是什么 n_jobs CPU计算的数量 '''
print(test_data.info()) # In[57]: X_predict = scaler.transform(test_data[feature_names]) # In[58]: #using linear discriminant analysis 'svm' y_predict = svm.predict(X_predict) print(y_predict) # In[59]: y_pre_gpc = GPC.predict(X_predict) print(y_pre_gpc) # In[60]: y_pre_gpc_RBF = gpc_rbf.predict(X_predict) print(y_pre_gpc_RBF) # In[61]: y_pre_SVM3 = svm3.predict(X_predict) print(y_pre_SVM3) # In[62]: tdata_ori = pd.read_csv(
pca = PCA(n_components=different_components[i]) principal_component_train.append(pca.fit_transform(train_data)) principal_component_test.append(pca.transform(test_data)) #Then it is needed to train a Gaussian Classifier using the data in each subspace. from sklearn.gaussian_process import GaussianProcessClassifier from sklearn.gaussian_process.kernels import RBF kernel = RBF(1.0) predict = [] score_train = [] score_test = [] for i in range(0, different_components.size): print(i) gpc = GaussianProcessClassifier(kernel=kernel, random_state=0).fit( principal_component_train[i], train_labels) predict.append(gpc.predict(principal_component_test[i])) score_train.append(gpc.score(principal_component_train[i], train_labels)) score_test.append(gpc.score(principal_component_test[i], test_labels)) #1.4 #Plot classification error vs. the number of components used for each subspace, and discuss #your results. Compute the classification error for both the training set and the test set #(training is always done using the training set), and provide two plots. plt.pyplot.plot(different_components, score_train) plt.pyplot.show() plt.pyplot.plot(different_components, score_test) plt.pyplot.show()
gpc.fit(X, y) scores = cross_val_score(dtc, X, y) scores1 = cross_val_score(rfc, X, y) scores2 = cross_val_score(etc, X, y) scores3 = cross_val_score(abc, X, y).mean() scores4 = cross_val_score(gbc, X, y).mean() scores5 = cross_val_score(vcf, X, y).mean() scores6 = cross_val_score(gpc, X, y).mean() # 预测测试, 对应的标签是 0, 1, 2, 1 test = [ [4.0, 3.1, 1.1, 0.1], [6.7, 3.1, 4.1, 1.4], [7.1, 3.2, 6.1, 1.9], [6.3, 2.9, 4.2, 1.4], ] y_pred0 = dtc.predict(test) y_pred = rfc.predict(test) y_pred1 = etc.predict(test) y_pred2 = abc.predict(test) y_pred3 = gbc.predict(test) y_cvf = vcf.predict(test) y_gpc = gpc.predict(test) print(y_pred0, y_pred, y_pred1, y_pred2, y_pred3) print(scores.mean(), scores1.mean(), scores2.mean(), scores3, scores4, abc.feature_importances_) print(y_cvf, scores5, y_gpc, scores6)
# Specify Gaussian Processes with fixed and optimized hyperparameters gp_fix = GaussianProcessClassifier(kernel=1.0 * RBF(length_scale=1.0), optimizer=None) gp_fix.fit(X[:train_size], y[:train_size]) gp_opt = GaussianProcessClassifier(kernel=1.0 * RBF(length_scale=1.0)) gp_opt.fit(X[:train_size], y[:train_size]) print("Log Marginal Likelihood (initial): %.3f" % gp_fix.log_marginal_likelihood(gp_fix.kernel_.theta)) print("Log Marginal Likelihood (optimized): %.3f" % gp_opt.log_marginal_likelihood(gp_opt.kernel_.theta)) print("Accuracy: %.3f (initial) %.3f (optimized)" % (accuracy_score(y[:train_size], gp_fix.predict(X[:train_size])), accuracy_score(y[:train_size], gp_opt.predict(X[:train_size])))) print("Log-loss: %.3f (initial) %.3f (optimized)" % (log_loss(y[:train_size], gp_fix.predict_proba(X[:train_size])[:, 1]), log_loss(y[:train_size], gp_opt.predict_proba(X[:train_size])[:, 1]))) # Plot posteriors plt.figure(0) plt.scatter(X[:train_size, 0], y[:train_size], c='k', label="Train data", edgecolors=(0, 0, 0)) plt.scatter(X[train_size:, 0], y[train_size:], c='g', label="Test data", edgecolors=(0, 0, 0)) X_ = np.linspace(0, 5, 100) plt.plot(X_, gp_fix.predict_proba(X_[:, np.newaxis])[:, 1], 'r', label="Initial kernel: %s" % gp_fix.kernel_)
def voting_svm(self): """Voting implementation of SVM for a unique epoch""" per_neuron_prediction = [] """ STRUCTURE: -> Key neurons -> Each epoch -> Number of tasks (~100) -> Results for each neuron """ # Choosing features # train data print("test") for neuron in self.features_index: # for good neurons neuron_votes = [] X_for_neuron = [] for example in range(len(self.X_train)): # for each of tasks X_for_neuron.append([self.X_train[example][self.epoch*self.neuron_num + neuron]]) X_test = [] for example in range(len(self.X_test)): # for each of tasks X_test.append([self.X_test[example][self.epoch*self.neuron_num + neuron]]) clf = GPC() # prediction on individual neuron clf.fit(X_for_neuron, self.y_train) # add predictions to data for each sample pred = clf.predict(X_test) neuron_votes.append(pred) per_neuron_prediction.append(neuron_votes) # test data accuracy = 0 print(per_neuron_prediction[0]) print(len(self.X_test)) features_num = len(self.features_index) # check if voting legnth is even """ if len(per_neuron_prediction)%2==0: del per_neuron_prediction[-1] features_num =- 1 """ print(per_neuron_prediction) # for each testing task per session per epoch for test_task in range(len(self.X_test)): # count the most number of votes as predicted by SVC # classifier per individual neuron temp_task = [] for neuron in range(features_num): temp_task.append(per_neuron_prediction[neuron][0][test_task]) vote_result = mode(temp_task) if vote_result == self.y_test[test_task]: accuracy += 1 print("ACCURACY {}".format(accuracy/(test_task+1))) accuracy = accuracy/len(self.X_test) return accuracy