def trainGaussianProcessClassifier(X, Y, verbose=False): if verbose: print("Training Gaussian Process Classifier") length_scale = [1 for i in range(len(X[0]))] clf = GaussianProcessClassifier(1.0 * RBF(length_scale), warm_start=True, random_state=42, n_jobs=-1) clf.fit(X, Y) return clf
def make_gaussianprocess(X_train, X_test, y_train, y_test,): model = GaussianProcessClassifier() model.fit(X_train, y_train) y_pred = model.predict(X_test) get_classification_metrics(y_pred, y_test) return model
def gpc(X_train, y_train, X_test, y_test, lime_flag=False, kernel=1.0 * RBF(1.0), optimizer='fmin_l_bfgs_b', n_restarts_optimizer=0,warm_start=False , random_state=42, n_jobs=-1 , max_iter_predict= 1000 , copy_X_train=True ): ''' Parameters: X_train, y_train, X_test, y_test- Learning set lime_flag- enable or disable lime ''' start_time = time.time() # cretae instance gpc= GaussianProcessClassifier(kernel=1.0 * RBF(1.0) , optimizer=optimizer, n_restarts_optimizer=n_restarts_optimizer, max_iter_predict=max_iter_predict , warm_start=warm_start, copy_X_train=copy_X_train, random_state=random_state, n_jobs=n_jobs) gpc.fit(X_train,y_train) #Predict on test set y_pred= gpc.predict(X_test) # understand the model through lime if lime_flag: lime_explainer(X_train, y_train, X_test, y_test, df_row=2, model_predictor= gpc, alogorithm_name="gpc") time_end=time.time() - start_time # Scores model_evaluation(X_train,y_train, X_test, y_test,y_pred, gpc, time_end, alg_name='gpc') # resturn model object return gpc
def compute_per_gaussian(self, max_iter=100): """Compute SVM per feature""" print(len(self.X_train)) print(len(self.X_train[0])) # per feature for feature_index in range(int(len(self.X[0]))): X_train_mod = [] # define training dataset for example in range(len(self.X_train)): # for each example (469) X_train_mod.append([self.X_train[example][self.counter]]) X_test_mod = [] # define testing dataset for example in range(len(self.X_test)): # for each example (469) X_test_mod.append([self.X_test[example][self.counter]]) clf = GPC(max_iter_predict=max_iter) # GPC model clf.fit(X_train_mod, self.y_train) # compute with only one feature score = clf.score(X_test_mod, self.y_test) self.features_accuracy.append(score) self.counter += 1
def main(): df_train = pd.read_csv('../train_dataset.csv') df_test = pd.read_csv('../test_dataset.csv') X_train, y_train = df_train.iloc[:, 2:], df_train.iloc[:, 0] X_test, y_test = df_test.iloc[:, 2:], df_test.iloc[:, 0] unique_labels = sorted(y_train.unique().tolist()) clf = GaussianProcessClassifier(max_iter_predict=500, warm_start=True, n_jobs=-1) clf.fit(X_train, y_train) print("\n\n{}\n".format(clf.score(X_test, y_test))) y_predicted = clf.predict(X_test) print("Generating confusion matrix figure... \n") stdfunc.plot_confusion_matrix(y_test, y_predicted, ml_name='GP', classes=unique_labels, title='Confusion matrix for Gaussian Process evaluation') print("Generating classification report figure... \n") stdfunc.plot_classification_report(y_test, y_predicted, ml_name='GP', classes=unique_labels, title='Classification report for Gaussian Process evaluation')
def test_warning_bounds(): kernel = RBF(length_scale_bounds=[1e-5, 1e-3]) gpc = GaussianProcessClassifier(kernel=kernel) assert_warns_message( ConvergenceWarning, "The optimal value found for " "dimension 0 of parameter " "length_scale is close to " "the specified upper bound " "0.001. Increasing the bound " "and calling fit again may " "find a better value.", gpc.fit, X, y) kernel_sum = (WhiteKernel(noise_level_bounds=[1e-5, 1e-3]) + RBF(length_scale_bounds=[1e3, 1e5])) gpc_sum = GaussianProcessClassifier(kernel=kernel_sum) with pytest.warns(None) as record: with warnings.catch_warnings(): # scipy 1.3.0 uses tostring which is deprecated in numpy warnings.filterwarnings("ignore", "tostring", DeprecationWarning) gpc_sum.fit(X, y) assert len(record) == 2 assert record[0].message.args[0] == ("The optimal value found for " "dimension 0 of parameter " "k1__noise_level is close to the " "specified upper bound 0.001. " "Increasing the bound and calling " "fit again may find a better value.") assert record[1].message.args[0] == ("The optimal value found for " "dimension 0 of parameter " "k2__length_scale is close to the " "specified lower bound 1000.0. " "Decreasing the bound and calling " "fit again may find a better value.") X_tile = np.tile(X, 2) kernel_dims = RBF(length_scale=[1., 2.], length_scale_bounds=[1e1, 1e2]) gpc_dims = GaussianProcessClassifier(kernel=kernel_dims) with pytest.warns(None) as record: with warnings.catch_warnings(): # scipy 1.3.0 uses tostring which is deprecated in numpy warnings.filterwarnings("ignore", "tostring", DeprecationWarning) gpc_dims.fit(X_tile, y) assert len(record) == 2 assert record[0].message.args[0] == ("The optimal value found for " "dimension 0 of parameter " "length_scale is close to the " "specified upper bound 100.0. " "Increasing the bound and calling " "fit again may find a better value.") assert record[1].message.args[0] == ("The optimal value found for " "dimension 1 of parameter " "length_scale is close to the " "specified upper bound 100.0. " "Increasing the bound and calling " "fit again may find a better value.")
def compute_per_gaussian(self, max_iter=100): """Compute SVM per feature""" # per feature for feature_index in range(int(len(X[0])/45)): X_train_mod = [] # define training dataset for example in range(len(self.X_train)): # for each example (469) X_train_mod.append([self.X_train[example][self.epoch*self.neuron_num + self.counter]]) X_test_mod = [] # define testing dataset for example in range(len(self.X_test)): # for each example (469) X_test_mod.append([self.X_test[example][self.epoch*self.neuron_num + self.counter]]) gamma = 1e-2 c = 10 kernel = 'linear' clf = GPC(max_iter_predict=max_iter) # GPC model clf.fit(X_train_mod, self.y_train) # compute with only one feature score = clf.score(X_test_mod, self.y_test) self.features_accuracy.append(score) self.counter += 1
def job(i): results = pd.DataFrame() prediction_voting = pd.DataFrame() df_train = pd.read_csv("preprocessed.csv") df_train = df_train.drop(["ID"], axis=1) y = df_train["Class"] X = df_train.drop(['Class'], axis=1) df_test = pd.read_csv("data/amazon_test.csv") df_test = df_test.drop(["ID"], axis=1) X_p = df_test alphas = [0.4] for alpha in alphas: result_row = {} result_row["fold"] = i result_row["alpha"] = alpha rf = GaussianProcessClassifier(n_jobs=-1) rf.fit(X, y) predicted = rf.predict(X_p) predicted_df = pd.DataFrame(predicted) predicted_df.columns = ["Class_" + str(alpha)] prediction_voting = pd.concat([prediction_voting, predicted_df], axis=1) predicted_df.to_csv("predicted_amazon_gauss.csv", sep=",", index=False) #result_row["score"] = round(rf.score(X_p,df_test["Class"]), 4) #confusion = confusion_matrix(df_test["Class"], predicted) #conf = pd.DataFrame(confusion) #conf.to_csv(target_path+"confusion_"+str(i)+"_"+str(alpha)+".csv", index=False) results = results.append(result_row, ignore_index=True) return prediction_voting
def build_classifier_gp(data, labels, **kwargs): linear_kernel = Sum(k1=Product(k1=DotProduct(sigma_0=0, sigma_0_bounds='fixed'), k2=ConstantKernel()), k2=ConstantKernel()) gp_clf = GaussianProcessClassifier(kernel=linear_kernel) gp_clf.fit(data, labels) id_pos_class = gp_clf.classes_ == labels.max() return gp_clf, gp_clf.predict_proba(data)[:, id_pos_class]
def predict_matches(preprocessed_matches, training_data): """Result: 2 - Home Team Wins, 1 - Draw, 0 - Away Team Wins""" X_cols = ["Overall Home", "rank Home", "Overall Away", "rank Away"] # Training algorithms X = training_data[X_cols] y_regr = training_data[["Goal Difference"]].values.ravel() y_class = training_data[["Simple Result"]].values.ravel() gpr = GaussianProcessRegressor(RationalQuadratic() + 10 * WhiteKernel(noise_level=10)) gpc = GaussianProcessClassifier(RationalQuadratic() + 10 * WhiteKernel(noise_level=10)) gpr.fit(X, y_regr) gpc.fit(X, y_class) print("Finished training") # Predicting new matches X_pred = preprocessed_matches[X_cols] y_regr_pred = gpr.predict(X_pred) y_class_pred = gpc.predict(X_pred) preprocessed_matches["Pred. Goal Difference"] = y_regr_pred preprocessed_matches["Pred. Result"] = y_class_pred predictions = preprocessed_matches[[ "Date", "Home Team Name", "Away Team Name", "Pred. Goal Difference", "Pred. Result" ]] return predictions
def GPR(X_hyper, Y_hyper, X_train, Y_train, X_validate, Y_validate, params): print "GPR training :" X_train_reduced = X_train X_validate_reduced = X_validate train_size = params['train_size'] test_size = params['test_size'] train = params['train'] if train: start = time.clock() kernel_rbf = 1.0 * RBF() clf = GaussianProcessClassifier(kernel=kernel_rbf, multi_class='one_vs_rest') clf.fit(X_train_reduced[:train_size, :], Y_train[:train_size]) writeObj('gaussian_model.pkl', clf) print "training took ", time.clock() - start, " s" Y_pred = clf.predict(X_validate_reduced[:test_size]) return Y_pred, clf else: clf = readObj('gaussian_model.pkl') Y_pred = clf.predict(X_validate_reduced[:test_size]) return Y_pred, clf
def train_on_pool(choice_function, X, y, pool_idcs, train_idcs, test_idcs, name): Xtest, ytest = X[test_idcs], y[test_idcs] accuracies, balances, n_points, train_idcs, pool_idcs = list(), list( ), list(), copy(train_idcs), copy(pool_idcs) gp = GaussianProcessClassifier(n_restarts_optimizer=25, kernel=Matern(), n_jobs=-1, random_state=42) #Add initial points while pool_idcs: Xtrain, ytrain = X[train_idcs], y[train_idcs] gp.fit(Xtrain, ytrain) preds = gp.predict(Xtest) accuracies.append(accuracy_score(ytest, preds)) n_points.append(len(train_idcs)) train_classes = np.unique(y[train_idcs], return_counts=True)[1] balances.append(max(train_classes) / sum(train_classes)) print( f"{len(train_idcs)}: {name}: {accuracies[-1]:.3}, class balance: {balances[-1]:.3}" ) y_pool_p = gp.predict_proba(X[pool_idcs]) chosen_idx = choice_function(y_pool_p) train_idcs.append(pool_idcs.pop(chosen_idx)) return n_points, accuracies, balances
def task3(feature_sets, label_sets): sets = ["A", "B", "crashes", "diabetes", "ionosphere"] kernel = 1.0 * RBF(1.0) for i in range(5): n = len(label_sets[i]) m = np.linspace(10, .6 * n, num=10, dtype=int) div = int(n * .4) x_train = feature_sets[i][div:] x_test = feature_sets[i][:div] y_train = label_sets[i][div:] y_test = label_sets[i][:div] gpc_errors = [] for j in range(10): gpc = GPC(kernel=kernel, random_state=0) gpc.fit(x_train[:m[j] - 1], np.ravel(y_train[:m[j] - 1])) gpc_errors.append(1 - gpc.score(x_test, np.ravel(y_test))) plt.legend() plt.ylabel("Error") plt.xlabel("M value") plt.title(sets[i]) plt.plot(m, gpc_errors, label="GPC") plt.show() return
def gaussian_process_classifier(X, y, X_train, y_train, X_test, y_test): gpc = GaussianProcessClassifier() gpc.fit(X_train, y_train) accuracy_gpc = cross_val_score(gpc, X, y).mean() print('Score: GaussianProcessClassifier {}'.format(accuracy_gpc)) predictions = gpc.predict(X_test) print(confusion_matrix(y_test, predictions))
def GPC(train, target, test): kernel = 1.0 * RBF(1.0) gpc = GaussianProcessClassifier(kernel=kernel, random_state=0) gpc.fit(train, target) #print("Score:",gpc.score(train, target)) prediction = gpc.predict_proba(test)[:, 1] return prediction
def get_new_labels_entropy(evaluated_set_X, evaluated_set_y, unevaluated_X, number_of_new_labels, _KRIGING=0): """ Get a set of parameter combinations according to their predicted label entropy """ if _KRIGING: clf = GaussianProcessClassifier() clf.fit(evaluated_set_X, calibration_condition(evaluated_set_y, calibration_threshold)) else: clf = fit_entropy_classifier(evaluated_set_X, evaluated_set_y, surrogate_model, surrogate_parameter_space) y_hat_probability = clf.predict_proba(unevaluated_X) y_hat_entropy = np.array(map(entropy, y_hat_probability)) y_hat_entropy /= y_hat_entropy.sum() unevaluated_X_size = unevaluated_X.shape[0] selections = np.random.choice(a=unevaluated_X_size, size=number_of_new_labels, replace=False, p=y_hat_entropy) return selections
def gaussianProcess(X_train, y_train, X_test, y_test, iteration): print("************ Gaussian Process Classification **************\n") gp_rbf_fix = GaussianProcessClassifier(kernel=76.5**2 * RBF(length_scale=179), optimizer=None) start_train_gp = time.time() gp_rbf_fix.fit(X_train, y_train) end_train_gp = time.time() training_time_gp = end_train_gp - start_train_gp print("Training GP model_selection %d took %.5f\n" % (iteration, training_time_gp)) predict_train_gp = gp_rbf_fix.predict(X_train) print("training accuracy") print(accuracy_score(y_train, predict_train_gp)) print("\n") start_test_gp = time.time() predict_test_gp = gp_rbf_fix.predict(X_test) end_test_gp = time.time() testing_time_gp = end_test_gp - start_test_gp print("Testing GP model_selection %d took %.5f\n" % (iteration, training_time_gp)) print("testing accuracy") print(accuracy_score(y_test, predict_test_gp)) print("\n") return training_time_gp, testing_time_gp
def GP_Classifier(i): x_data, y_data = data_select(i) gpc = GaussianProcessClassifier(random_state=53) # split validation X_train, X_test, Y_train, Y_test = train_test_split(x_data, y_data, test_size=0.25, random_state=53) gpc.fit(X_train, np.ravel(Y_train, order='C')) train_score = gpc.score(X_train, Y_train) test_score = gpc.score(X_test, Y_test) print('Train Acc: %.3f, Test Acc: %.3f' % (train_score, test_score)) # K-fold validation kfold = model_selection.KFold(n_splits=10) results_kfold = model_selection.cross_val_score(gpc, x_data, np.ravel(y_data, order='C'), cv=kfold) print("Accuracy: %.2f%%" % (results_kfold.mean() * 100.0)) # leave one out validatoin loocv = LeaveOneOut() results_loocv = model_selection.cross_val_score(gpc, x_data, np.ravel(y_data, order='C'), cv=loocv) print("Accuracy: %.2f%%" % (results_loocv.mean() * 100.0))
def test_custom_optimizer(): """ Test that GPC can use externally defined optimizers. """ # Define a dummy optimizer that simply tests 50 random hyperparameters def optimizer(obj_func, initial_theta, bounds): rng = np.random.RandomState(0) theta_opt, func_min = \ initial_theta, obj_func(initial_theta, eval_gradient=False) for _ in range(50): theta = np.atleast_1d( rng.uniform(np.maximum(-2, bounds[:, 0]), np.minimum(1, bounds[:, 1]))) f = obj_func(theta, eval_gradient=False) if f < func_min: theta_opt, func_min = theta, f return theta_opt, func_min for kernel in kernels: if kernel == fixed_kernel: continue gpc = GaussianProcessClassifier(kernel=kernel, optimizer=optimizer) gpc.fit(X, y_mc) # Checks that optimizer improved marginal likelihood assert_greater(gpc.log_marginal_likelihood(gpc.kernel_.theta), gpc.log_marginal_likelihood(kernel.theta))
def TrainMyClassifierGPR(X_train, y_train, **kwargs): if 'kernel' in kwargs: gpc = GPC(multi_class='one_vs_rest', **kwargs) else: kern = RBF(length_scale=0.4) gpc = GPC(kernel=kern, multi_class='one_vs_rest') gpc.fit(X_train, y_train) return gpc
def gaussian_process_classifier(self, X, Y): from sklearn.gaussian_process import GaussianProcessClassifier clf = GaussianProcessClassifier() clf.fit(X, Y) return clf
def gaussian_process_classifier(self): """ Gaussian process classification (GPC) based on Laplace approximation. :return:probability, conf_matrix """ model = GaussianProcessClassifier() model.fit(self.__x_train, self.__y_train) self.__model = model
def gpc_sklearn(ax, x, y, kernel, optimizer="fmin_l_bfgs_b"): """ Implemented with GaussianProcessClassifier in sklearn.gaussisan_process. The implementation is based on Algorithm 3.1, 3.2, and 5.1 of GPML. The Laplace approximation is used for approximating the non-Gaussian posterior by a Gaussian. The implementation is restricted to using the logistic link function. INPUT: ax: an Axes object x: (N, ) np.array y: (N, ) np.array kernel: sklearn.gaussian_process.kernels object. Used to initialize GaussianProcessClassifier optimizer : string or callable. Can either be one of the internally supported optimizers for optimizing the kernel's parameters, specified by a string, or an externally defined optimizer passed as a callable. If a callable is passed, it must have the signature. If None is passed, the kernel's parameters are kept [ ax: an Axes object """ # Fit GaussianProcessClassification and LinearRegression models gpc = GaussianProcessClassifier(kernel=kernel, optimizer=optimizer) gpc.fit(x[:, np.newaxis], y) print("\nLearned kernel: %s" % gpc.kernel_) y_ = gpc.predict_proba(x[:, np.newaxis])[:, 1] xs = np.linspace(np.min(x), np.max(x), 1000) ys = gpc.predict_proba(xs[:, np.newaxis])[:, 1] # lr = LinearRegression() # lr.fit(x[:, np.newaxis], y) # x needs to be 2d for LinearRegression # Plot # ax.plot(x, y, 'r.', markersize=12, alpha = 0.2) ax.plot(xs, ys, markersize=12, alpha=0.2) # ax.plot(x, lr.predict(x[:, np.newaxis]), 'b-') # ax.set_xlim(-0.1, 1.1) # ax.set_ylim(-0.1, 1.1) # compute ece and acc after calibration ece = EceEval(np.array([1 - y_, y_]).T, y, num_bins=100) y_predict = y_ > 0.5 acc = (y_predict == y).mean() ax.text(0.05, 0.8, 'ECE=%.4f\nACC=%.4f' % (ece, acc), size=14, ha='left', va='center', bbox={ 'facecolor': 'green', 'alpha': 0.5, 'pad': 4 }) return ax
def trainGP(self, kernel="RBF", param=1.0): model = GaussianProcessClassifier(1.0 * RBF(1.0)) model.fit(self.train, self.trainTgt) trainOut = model.predict(self.train) trainError = np.mean(self.trainTgt.ravel() == trainOut.ravel()) * 100 print("Training Error: ", trainError) return model
def gaussian_process_models(x_train, y_train): from sklearn.gaussian_process import GaussianProcessClassifier classifier1 = GaussianProcessClassifier() classifier1.fit(x_train, y_train) print('GaussianProcessClassifier training accuracy: ', classifier1.score(x_train, y_train)) return classifier1
def do_gpc(X_test, Y_test, X_train, Y_train): # creating a classifier of loss function "hinge" and penalty function "l2" clf = GaussianProcessClassifier(kernel=1.0 * RBF(length_scale=1.0)) print "starts fitting" print clf.fit(X_train, Y_train) print "finished fitting, starts predictions" Y_pred = clf.predict(X_test) print "finished predictions" return Y_pred
def train_l2_gaussian(x_train, x_test, y_train, y_test): clf = GaussianProcessClassifier() clf.fit(x_train, y_train) if y_test is not None: print('GaussianProcessClassifier:', clf.score(x_test, y_test)) else: print('GaussianProcessClassifier:', clf.score(x_train, y_train)) return np.reshape(clf.predict(x_train), (-1, 1))
def estimate_depth(self): kernel = 1.5 * kernels.RBF(length_scale=1.0, length_scale_bounds=(0, 3.0)) clf = GaussianProcessClassifier(optimizer=None, n_restarts_optimizer=9, kernel=kernel) input_data = np.hstack((self.le_centers, self.re_centers)) clf.fit(input_data, self.ids.ravel()) self.regressor = clf
def GPAL(X, Y, train_ind, candidate_ind, test_ind, sample='En', kernel='rbf', Niter=500, eta=10): ourRes = [] train_index = train_ind.copy() test_index = test_ind.copy() candidate_index = candidate_ind.copy() varRes = [] enRes = [] for i in range(Niter): print(i) if (kernel == 'linear'): dotkernel = DotProduct(sigma_0=1) model = GPC(kernel=dotkernel) else: model = GPC() model.fit(X[train_index], Y[train_index]) ourRes.append(model.score(X[test_index, :], Y[test_index])) print(ourRes[-1]) if (sample == 'rand'): sampleIndex = np.random.randint(len(candidate_index)) elif (sample == 'En'): proba = model.predict_proba(X[candidate_index, :]) en = sp.stats.entropy(proba.T) sampleScore = en sampleIndex = np.argmax(sampleScore) elif (sample == 'var'): model.predict_proba(X[candidate_index, :]) meanVar = np.zeros(len(candidate_index)) for tem in model.base_estimator_.estimators_: meanVar = meanVar + tem.var sampleIndex = np.argmax(meanVar) elif (sample == 'varEN'): proba = model.predict_proba(X[candidate_index, :]) en = sp.stats.entropy(proba.T) meanVar = np.zeros(len(candidate_index)) enRes.append(np.mean(en)) for tem in model.base_estimator_.estimators_: meanVar = meanVar + tem.var sampleIndex = np.argmax(meanVar / len(np.unique(Y)) * eta + en) varRes.append(np.mean(meanVar)) print('max var %f----selected var %f-----selected en %f ' % (np.max(meanVar), meanVar[sampleIndex], en[sampleIndex])) sampleIndex = candidate_index[sampleIndex] train_index = train_index + [sampleIndex] candidate_index = [ x for x in candidate_index if x not in [sampleIndex] ] return [ourRes, varRes, enRes]
def test_multi_class(kernel): # Test GPC for multi-class classification problems. gpc = GaussianProcessClassifier(kernel=kernel) gpc.fit(X, y_mc) y_prob = gpc.predict_proba(X2) assert_almost_equal(y_prob.sum(1), 1) y_pred = gpc.predict(X2) assert_array_equal(np.argmax(y_prob, 1), y_pred)
def test_sklearn_40(self): iris = datasets.load_iris() irisd = pd.DataFrame(iris.data,columns=iris.feature_names) irisd['Species'] = iris.target target = 'Species' features = irisd.columns.drop('Species') model = GaussianProcessClassifier() model.fit(irisd[features],irisd[target]) with self.assertRaises(TypeError): skl_to_pmml(model,features,target,"no_pipeline.pmml")
def test_multi_class(kernel): # Test GPC for multi-class classification problems. gpc = GaussianProcessClassifier(kernel=kernel) gpc.fit(X, y_mc) y_prob = gpc.predict_proba(X2) assert_almost_equal(y_prob.sum(1), 1) y_pred = gpc.predict(X2) assert_array_equal(np.argmax(y_prob, 1), y_pred)
def test_multi_class_n_jobs(kernel): # Test that multi-class GPC produces identical results with n_jobs>1. gpc = GaussianProcessClassifier(kernel=kernel) gpc.fit(X, y_mc) gpc_2 = GaussianProcessClassifier(kernel=kernel, n_jobs=2) gpc_2.fit(X, y_mc) y_prob = gpc.predict_proba(X2) y_prob_2 = gpc_2.predict_proba(X2) assert_almost_equal(y_prob, y_prob_2)
def test_custom_optimizer(kernel): # Test that GPC can use externally defined optimizers. # Define a dummy optimizer that simply tests 50 random hyperparameters def optimizer(obj_func, initial_theta, bounds): rng = np.random.RandomState(0) theta_opt, func_min = \ initial_theta, obj_func(initial_theta, eval_gradient=False) for _ in range(50): theta = np.atleast_1d(rng.uniform(np.maximum(-2, bounds[:, 0]), np.minimum(1, bounds[:, 1]))) f = obj_func(theta, eval_gradient=False) if f < func_min: theta_opt, func_min = theta, f return theta_opt, func_min gpc = GaussianProcessClassifier(kernel=kernel, optimizer=optimizer) gpc.fit(X, y_mc) # Checks that optimizer improved marginal likelihood assert_greater(gpc.log_marginal_likelihood(gpc.kernel_.theta), gpc.log_marginal_likelihood(kernel.theta))
def trainModel(subjectid): # Load training data from the file matlab generates traindata = np.genfromtxt('csvdata/' + subjectid + '_sim.csv', delimiter=',', missing_values=['NaN', 'nan'], filling_values=None) trainx, trainy = cleandata(traindata, downsamplefactor=20) # Train a Gaussian Process anisokern = kernels.RBF() # default kernel gp = GaussianProcessClassifier(kernel=anisokern) # Initialize the GPC gp.fit(trainx, trainy) # train this class on the data trainx = trainy = None # Discard all training data to preserve memory # Load test data testdata = np.genfromtxt('csvdata/' + subjectid + '_rival.csv', delimiter=',', missing_values=['NaN', 'nan'], filling_values=None) testx, testy = cleandata(testdata, downsamplefactor=4) # clean data return gp, testx, testy
def plot(df, options): UNIQ_GROUPS = df.group.unique() UNIQ_GROUPS.sort() sns.set_style("white") grppal = sns.color_palette("Set2", len(UNIQ_GROUPS)) print '# UNIQ GROUPS', UNIQ_GROUPS cent_stats = df.groupby( ['position', 'group', 'side']).apply(stats_per_group) cent_stats.reset_index(inplace=True) import time from sklearn import preprocessing from sklearn.gaussian_process import GaussianProcessRegressor, GaussianProcessClassifier from sklearn.gaussian_process.kernels import Matern, WhiteKernel, ExpSineSquared, ConstantKernel, RBF ctlDF = cent_stats[ cent_stats['group'] == 0 ] TNRightDF = cent_stats[ cent_stats['group'] != 0] TNRightDF = TNRightDF[TNRightDF['side'] == 'right'] dataDf = pd.concat([ctlDF, TNRightDF], ignore_index=True) print dataDf yDf = dataDf['group'] == 0 yDf = yDf.astype(int) y = yDf.values print y print y.shape XDf = dataDf[['position', 'values']] X = XDf.values X = preprocessing.scale(X) print X print X.shape # kernel = ConstantKernel() + Matern(length_scale=mean, nu=3 / 2) + \ # WhiteKernel(noise_level=1e-10) kernel = 1**2 * Matern(length_scale=1, nu=1.5) + \ WhiteKernel(noise_level=0.1) figure = plt.figure(figsize=(10, 6)) stime = time.time() gp = GaussianProcessClassifier(kernel) gp.fit(X, y) print gp.kernel_ print gp.log_marginal_likelihood() print("Time for GPR fitting: %.3f" % (time.time() - stime)) # create a mesh to plot in h = 0.1 x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1 y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1 xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h)) plt.figure(figsize=(10, 5)) # Plot the predicted probabilities. For that, we will assign a color to # each point in the mesh [x_min, m_max]x[y_min, y_max]. Z = gp.predict_proba(np.c_[xx.ravel(), yy.ravel()]) Z = Z[:,1] print Z print Z.shape # Put the result into a color plot Z = Z.reshape((xx.shape[0], xx.shape[1])) print Z.shape plt.imshow(Z, extent=(x_min, x_max, y_min, y_max), origin="lower") # Plot also the training points plt.scatter(X[:, 0], X[:, 1], c=np.array(["r", "g"])[y]) plt.xlabel('position') plt.ylabel('normalized val') plt.xlim(xx.min(), xx.max()) plt.ylim(yy.min(), yy.max()) plt.xticks(()) plt.yticks(()) plt.title("%s, LML: %.3f" % ("TN vs. Control", gp.log_marginal_likelihood(gp.kernel_.theta))) plt.tight_layout() if options.title: plt.suptitle(options.title) if options.output: plt.savefig(options.output, dpi=150) if options.is_show: plt.show()
def trainPredict(subjectid, makeplot=False): print("testing participant " + subjectid) # Load training data from the file matlab generates traindata = np.genfromtxt('csvdata/' + subjectid + '_sim.csv', delimiter=',', missing_values=['NaN', 'nan'], filling_values=None) # Clean + downsample this data trainx, trainy = cleandata(traindata, downsamplefactor=20) # Train a Gaussian Process anisokern = kernels.RBF() # default kernel gp = GaussianProcessClassifier(kernel=anisokern) # Initialize the GPC gp.fit(trainx, trainy) # train this class on the data trainx = trainy = None # Discard all training data to preserve memory # load test data testdata = np.genfromtxt('csvdata/' + subjectid + '_rival.csv', delimiter=',', missing_values=['NaN', 'nan'], filling_values=None) testx, testy = cleandata(testdata, downsamplefactor=4) # clean data testdata = None # clear from memory # work out percentage in percept for each data point: percentages, nextpercept = assign_percentage(testy) # get a prediction for all points in the test data: predicty = gp.predict(testx) proby = gp.predict_proba(testx) if makeplot: summaryplot(participant, testx, testy, predicty, proby, gp) # Summarise prediction by reported percept meanprediction = {'mean' + percept: proby[testy == value, 1].mean() for percept, value in perceptindices.iteritems()} predictiondev = {'stdev' + percept: proby[testy == value, 1].std() for percept, value in perceptindices.iteritems()} predictionaccuracy = {'acc' + percept: (predicty[testy == value] == testy[testy == value]).mean() for percept, value in perceptindices.iteritems()} # Summarise prediction by percentage in percept predictioncourse = {'timecourse' + percept + str(cutoff): proby[(testy == value) & (percentages < cutoff) & (percentages > cutoff - 0.1), 1].mean() for percept, value in perceptindices.iteritems() for cutoff in np.linspace(0.1, 1, 10)} # Summarise mixed percept time courses by the next percept nextcourse = {'nextcourse' + percept + str(cutoff): proby[(testy == 0) & (percentages < cutoff) & (percentages > cutoff - 0.1) & (nextpercept == perceptindices[percept]), 1].mean() for percept in ['highfreq', 'lowfreq'] for cutoff in np.linspace(0.1, 1, 10)} afterdominant = {'after' + percept + "_" + after + "_" + str(cutoff): proby[(testy == perceptindices[percept]) & (percentages < cutoff) & (percentages > cutoff - 0.1) & (nextpercept == perceptindices[after]), 1].mean() for percept, after in [('highfreq', 'mixed'), ('highfreq', 'lowfreq'), ('lowfreq', 'mixed'), ('lowfreq', 'highfreq')] for cutoff in np.linspace(0.1, 1, 10)} # Only return the summarised data return meanprediction, predictiondev, predictionaccuracy, \ predictioncourse, nextcourse, afterdominant
X = np.array([[-4.61611719, -6.00099547], [4.10469096, 5.32782448], [0.00000000, -0.50000000], [-6.17289014, -4.6984743], [1.3109306, -6.93271427], [-5.03823144, 3.10584743], [-2.87600388, 6.74310541], [5.21301203, 4.26386883]]) # Observations y = np.array(g(X) > 0, dtype=int) # Instanciate and fit Gaussian Process Model kernel = C(0.1, (1e-5, np.inf)) * DotProduct(sigma_0=0.1) ** 2 gp = GaussianProcessClassifier(kernel=kernel) gp.fit(X, y) print("Learned kernel: %s " % gp.kernel_) # Evaluate real function and the predicted probability res = 50 x1, x2 = np.meshgrid(np.linspace(- lim, lim, res), np.linspace(- lim, lim, res)) xx = np.vstack([x1.reshape(x1.size), x2.reshape(x2.size)]).T y_true = g(xx) y_prob = gp.predict_proba(xx)[:, 1] y_true = y_true.reshape((res, res)) y_prob = y_prob.reshape((res, res)) # Plot the probabilistic classification iso-values fig = plt.figure(1)
def pltshow(mplpyplot): mplpyplot.show() # nodebox section end # Generate data train_size = 50 rng = np.random.RandomState(0) X = rng.uniform(0, 5, 100)[:, np.newaxis] y = np.array(X[:, 0] > 2.5, dtype=int) # Specify Gaussian Processes with fixed and optimized hyperparameters gp_fix = GaussianProcessClassifier(kernel=1.0 * RBF(length_scale=1.0), optimizer=None) gp_fix.fit(X[:train_size], y[:train_size]) gp_opt = GaussianProcessClassifier(kernel=1.0 * RBF(length_scale=1.0)) gp_opt.fit(X[:train_size], y[:train_size]) print("Log Marginal Likelihood (initial): %.3f" % gp_fix.log_marginal_likelihood(gp_fix.kernel_.theta)) print("Log Marginal Likelihood (optimized): %.3f" % gp_opt.log_marginal_likelihood(gp_opt.kernel_.theta)) print("Accuracy: %.3f (initial) %.3f (optimized)" % (accuracy_score(y[:train_size], gp_fix.predict(X[:train_size])), accuracy_score(y[:train_size], gp_opt.predict(X[:train_size])))) print("Log-loss: %.3f (initial) %.3f (optimized)" % (log_loss(y[:train_size], gp_fix.predict_proba(X[:train_size])[:, 1]), log_loss(y[:train_size], gp_opt.predict_proba(X[:train_size])[:, 1])))