def trainGaussianProcessClassifier(X, Y, verbose=False):
    if verbose:
        print("Training Gaussian Process Classifier")
    length_scale = [1 for i in range(len(X[0]))]
    clf = GaussianProcessClassifier(1.0 * RBF(length_scale), warm_start=True, random_state=42, n_jobs=-1)
    clf.fit(X, Y)
    return clf
Beispiel #2
0
def make_gaussianprocess(X_train, X_test, y_train, y_test,):
    model = GaussianProcessClassifier()
    model.fit(X_train, y_train)

    y_pred = model.predict(X_test)
    get_classification_metrics(y_pred, y_test)
    return model
Beispiel #3
0
def gpc(X_train, y_train, X_test, y_test, lime_flag=False, kernel=1.0 * RBF(1.0),
        optimizer='fmin_l_bfgs_b',
        n_restarts_optimizer=0,warm_start=False ,
        random_state=42, n_jobs=-1 ,
        max_iter_predict= 1000 ,
        copy_X_train=True ):
    
    '''
    Parameters:
    X_train, y_train, X_test, y_test- Learning set
    lime_flag-  enable or disable lime
    '''
    start_time          = time.time()
    # cretae instance
    gpc= GaussianProcessClassifier(kernel=1.0 * RBF(1.0) ,
                                   optimizer=optimizer, 
                                   n_restarts_optimizer=n_restarts_optimizer,
                                   max_iter_predict=max_iter_predict ,
                                   warm_start=warm_start, 
                                   copy_X_train=copy_X_train,
                                   random_state=random_state,
                                   n_jobs=n_jobs)

    gpc.fit(X_train,y_train)
    #Predict on test set
    y_pred= gpc.predict(X_test)
    # understand the model through lime
    if lime_flag:
        lime_explainer(X_train, y_train, X_test, y_test, df_row=2,  model_predictor= gpc, alogorithm_name="gpc")                                                
    time_end=time.time() - start_time
    # Scores
    model_evaluation(X_train,y_train, X_test, y_test,y_pred, gpc, time_end, alg_name='gpc') 
    # resturn model object
    return gpc
Beispiel #4
0
    def compute_per_gaussian(self, max_iter=100):
        """Compute SVM per feature"""

        print(len(self.X_train))
        print(len(self.X_train[0]))

        # per feature
        for feature_index in range(int(len(self.X[0]))):
            X_train_mod = []
            # define training dataset
            for example in range(len(self.X_train)):  # for each example (469)
                X_train_mod.append([self.X_train[example][self.counter]])

            X_test_mod = []
            # define testing dataset
            for example in range(len(self.X_test)):  # for each example (469)
                X_test_mod.append([self.X_test[example][self.counter]])

            clf = GPC(max_iter_predict=max_iter)  # GPC model
            clf.fit(X_train_mod, self.y_train)  # compute with only one feature
            score = clf.score(X_test_mod, self.y_test)

            self.features_accuracy.append(score)

            self.counter += 1
def main():

    df_train = pd.read_csv('../train_dataset.csv')
    df_test = pd.read_csv('../test_dataset.csv')

    X_train, y_train = df_train.iloc[:, 2:], df_train.iloc[:, 0]
    X_test, y_test = df_test.iloc[:, 2:], df_test.iloc[:, 0]
    unique_labels = sorted(y_train.unique().tolist())

    clf = GaussianProcessClassifier(max_iter_predict=500, warm_start=True, n_jobs=-1)

    clf.fit(X_train, y_train)

    print("\n\n{}\n".format(clf.score(X_test, y_test)))

    y_predicted = clf.predict(X_test)

    print("Generating confusion matrix figure... \n")
    stdfunc.plot_confusion_matrix(y_test, y_predicted, ml_name='GP',
                                  classes=unique_labels,
                                  title='Confusion matrix for Gaussian Process evaluation')

    print("Generating classification report figure... \n")
    stdfunc.plot_classification_report(y_test, y_predicted, ml_name='GP',
                                       classes=unique_labels,
                                       title='Classification report for Gaussian Process evaluation')
def test_warning_bounds():
    kernel = RBF(length_scale_bounds=[1e-5, 1e-3])
    gpc = GaussianProcessClassifier(kernel=kernel)
    assert_warns_message(
        ConvergenceWarning, "The optimal value found for "
        "dimension 0 of parameter "
        "length_scale is close to "
        "the specified upper bound "
        "0.001. Increasing the bound "
        "and calling fit again may "
        "find a better value.", gpc.fit, X, y)

    kernel_sum = (WhiteKernel(noise_level_bounds=[1e-5, 1e-3]) +
                  RBF(length_scale_bounds=[1e3, 1e5]))
    gpc_sum = GaussianProcessClassifier(kernel=kernel_sum)
    with pytest.warns(None) as record:
        with warnings.catch_warnings():
            # scipy 1.3.0 uses tostring which is deprecated in numpy
            warnings.filterwarnings("ignore", "tostring", DeprecationWarning)
            gpc_sum.fit(X, y)

    assert len(record) == 2
    assert record[0].message.args[0] == ("The optimal value found for "
                                         "dimension 0 of parameter "
                                         "k1__noise_level is close to the "
                                         "specified upper bound 0.001. "
                                         "Increasing the bound and calling "
                                         "fit again may find a better value.")

    assert record[1].message.args[0] == ("The optimal value found for "
                                         "dimension 0 of parameter "
                                         "k2__length_scale is close to the "
                                         "specified lower bound 1000.0. "
                                         "Decreasing the bound and calling "
                                         "fit again may find a better value.")

    X_tile = np.tile(X, 2)
    kernel_dims = RBF(length_scale=[1., 2.], length_scale_bounds=[1e1, 1e2])
    gpc_dims = GaussianProcessClassifier(kernel=kernel_dims)

    with pytest.warns(None) as record:
        with warnings.catch_warnings():
            # scipy 1.3.0 uses tostring which is deprecated in numpy
            warnings.filterwarnings("ignore", "tostring", DeprecationWarning)
            gpc_dims.fit(X_tile, y)

    assert len(record) == 2
    assert record[0].message.args[0] == ("The optimal value found for "
                                         "dimension 0 of parameter "
                                         "length_scale is close to the "
                                         "specified upper bound 100.0. "
                                         "Increasing the bound and calling "
                                         "fit again may find a better value.")

    assert record[1].message.args[0] == ("The optimal value found for "
                                         "dimension 1 of parameter "
                                         "length_scale is close to the "
                                         "specified upper bound 100.0. "
                                         "Increasing the bound and calling "
                                         "fit again may find a better value.")
    def compute_per_gaussian(self, max_iter=100):
        """Compute SVM per feature"""
        # per feature
        for feature_index in range(int(len(X[0])/45)):
            X_train_mod = []
            # define training dataset
            for example in range(len(self.X_train)):   # for each example (469)
                X_train_mod.append([self.X_train[example][self.epoch*self.neuron_num + self.counter]])

            X_test_mod = []
            # define testing dataset
            for example in range(len(self.X_test)):   # for each example (469)
                X_test_mod.append([self.X_test[example][self.epoch*self.neuron_num + self.counter]])

            gamma = 1e-2
            c = 10
            kernel = 'linear'

            clf = GPC(max_iter_predict=max_iter)  # GPC model
            clf.fit(X_train_mod, self.y_train) # compute with only one feature
            score = clf.score(X_test_mod, self.y_test)

            self.features_accuracy.append(score)

            self.counter += 1
Beispiel #8
0
def job(i):

    results = pd.DataFrame()
    prediction_voting = pd.DataFrame()
    df_train = pd.read_csv("preprocessed.csv")
    df_train = df_train.drop(["ID"], axis=1)
    y = df_train["Class"]
    X = df_train.drop(['Class'], axis=1)
    df_test = pd.read_csv("data/amazon_test.csv")
    df_test = df_test.drop(["ID"], axis=1)
    X_p = df_test

    alphas = [0.4]

    for alpha in alphas:
        result_row = {}

        result_row["fold"] = i
        result_row["alpha"] = alpha

        rf = GaussianProcessClassifier(n_jobs=-1)
        rf.fit(X, y)
        predicted = rf.predict(X_p)
        predicted_df = pd.DataFrame(predicted)
        predicted_df.columns = ["Class_" + str(alpha)]
        prediction_voting = pd.concat([prediction_voting, predicted_df],
                                      axis=1)
        predicted_df.to_csv("predicted_amazon_gauss.csv", sep=",", index=False)
        #result_row["score"] = round(rf.score(X_p,df_test["Class"]), 4)
        #confusion = confusion_matrix(df_test["Class"], predicted)
        #conf = pd.DataFrame(confusion)
        #conf.to_csv(target_path+"confusion_"+str(i)+"_"+str(alpha)+".csv", index=False)
        results = results.append(result_row, ignore_index=True)
    return prediction_voting
Beispiel #9
0
def build_classifier_gp(data, labels, **kwargs):
    linear_kernel = Sum(k1=Product(k1=DotProduct(sigma_0=0, sigma_0_bounds='fixed'), k2=ConstantKernel()),
                        k2=ConstantKernel())
    gp_clf = GaussianProcessClassifier(kernel=linear_kernel)
    gp_clf.fit(data, labels)
    id_pos_class = gp_clf.classes_ == labels.max()
    return gp_clf, gp_clf.predict_proba(data)[:, id_pos_class]
Beispiel #10
0
def predict_matches(preprocessed_matches, training_data):
    """Result: 2 - Home Team Wins, 1 - Draw, 0 - Away Team Wins"""

    X_cols = ["Overall Home", "rank Home", "Overall Away", "rank Away"]

    # Training algorithms
    X = training_data[X_cols]
    y_regr = training_data[["Goal Difference"]].values.ravel()
    y_class = training_data[["Simple Result"]].values.ravel()

    gpr = GaussianProcessRegressor(RationalQuadratic() +
                                   10 * WhiteKernel(noise_level=10))
    gpc = GaussianProcessClassifier(RationalQuadratic() +
                                    10 * WhiteKernel(noise_level=10))

    gpr.fit(X, y_regr)
    gpc.fit(X, y_class)
    print("Finished training")

    # Predicting new matches
    X_pred = preprocessed_matches[X_cols]
    y_regr_pred = gpr.predict(X_pred)
    y_class_pred = gpc.predict(X_pred)

    preprocessed_matches["Pred. Goal Difference"] = y_regr_pred
    preprocessed_matches["Pred. Result"] = y_class_pred

    predictions = preprocessed_matches[[
        "Date", "Home Team Name", "Away Team Name", "Pred. Goal Difference",
        "Pred. Result"
    ]]

    return predictions
Beispiel #11
0
def GPR(X_hyper, Y_hyper, X_train, Y_train, X_validate, Y_validate, params):
    print "GPR training :"

    X_train_reduced = X_train
    X_validate_reduced = X_validate

    train_size = params['train_size']
    test_size = params['test_size']
    train = params['train']

    if train:
        start = time.clock()
        kernel_rbf = 1.0 * RBF()

        clf = GaussianProcessClassifier(kernel=kernel_rbf,
                                        multi_class='one_vs_rest')
        clf.fit(X_train_reduced[:train_size, :], Y_train[:train_size])

        writeObj('gaussian_model.pkl', clf)
        print "training took ", time.clock() - start, " s"

        Y_pred = clf.predict(X_validate_reduced[:test_size])
        return Y_pred, clf
    else:
        clf = readObj('gaussian_model.pkl')
        Y_pred = clf.predict(X_validate_reduced[:test_size])

        return Y_pred, clf
Beispiel #12
0
def train_on_pool(choice_function, X, y, pool_idcs, train_idcs, test_idcs,
                  name):
    Xtest, ytest = X[test_idcs], y[test_idcs]
    accuracies, balances, n_points, train_idcs, pool_idcs = list(), list(
    ), list(), copy(train_idcs), copy(pool_idcs)

    gp = GaussianProcessClassifier(n_restarts_optimizer=25,
                                   kernel=Matern(),
                                   n_jobs=-1,
                                   random_state=42)

    #Add initial points

    while pool_idcs:
        Xtrain, ytrain = X[train_idcs], y[train_idcs]
        gp.fit(Xtrain, ytrain)

        preds = gp.predict(Xtest)

        accuracies.append(accuracy_score(ytest, preds))
        n_points.append(len(train_idcs))

        train_classes = np.unique(y[train_idcs], return_counts=True)[1]
        balances.append(max(train_classes) / sum(train_classes))
        print(
            f"{len(train_idcs)}: {name}: {accuracies[-1]:.3}, class balance: {balances[-1]:.3}"
        )

        y_pool_p = gp.predict_proba(X[pool_idcs])
        chosen_idx = choice_function(y_pool_p)

        train_idcs.append(pool_idcs.pop(chosen_idx))

    return n_points, accuracies, balances
Beispiel #13
0
def task3(feature_sets, label_sets):
    sets = ["A", "B", "crashes", "diabetes", "ionosphere"]
    kernel = 1.0 * RBF(1.0)
    for i in range(5):
        n = len(label_sets[i])
        m = np.linspace(10, .6 * n, num=10, dtype=int)
        div = int(n * .4)
        x_train = feature_sets[i][div:]
        x_test = feature_sets[i][:div]
        y_train = label_sets[i][div:]
        y_test = label_sets[i][:div]
        gpc_errors = []
        for j in range(10):
            gpc = GPC(kernel=kernel, random_state=0)
            gpc.fit(x_train[:m[j] - 1], np.ravel(y_train[:m[j] - 1]))
            gpc_errors.append(1 - gpc.score(x_test, np.ravel(y_test)))

        plt.legend()
        plt.ylabel("Error")
        plt.xlabel("M value")
        plt.title(sets[i])
        plt.plot(m, gpc_errors, label="GPC")
        plt.show()

    return
Beispiel #14
0
def gaussian_process_classifier(X, y, X_train, y_train, X_test, y_test):
    gpc = GaussianProcessClassifier()
    gpc.fit(X_train, y_train)
    accuracy_gpc = cross_val_score(gpc, X, y).mean()
    print('Score: GaussianProcessClassifier {}'.format(accuracy_gpc))
    predictions = gpc.predict(X_test)
    print(confusion_matrix(y_test, predictions))
Beispiel #15
0
def GPC(train, target, test):
  kernel = 1.0 * RBF(1.0)
  gpc = GaussianProcessClassifier(kernel=kernel, random_state=0)
  gpc.fit(train, target)
  #print("Score:",gpc.score(train, target))
  prediction = gpc.predict_proba(test)[:, 1]
  return prediction
Beispiel #16
0
def get_new_labels_entropy(evaluated_set_X,
                           evaluated_set_y,
                           unevaluated_X,
                           number_of_new_labels,
                           _KRIGING=0):
    """ Get a set of parameter combinations according to their predicted label entropy
    
    
    
    """
    if _KRIGING:
        clf = GaussianProcessClassifier()
        clf.fit(evaluated_set_X,
                calibration_condition(evaluated_set_y, calibration_threshold))
    else:
        clf = fit_entropy_classifier(evaluated_set_X, evaluated_set_y,
                                     surrogate_model,
                                     surrogate_parameter_space)

    y_hat_probability = clf.predict_proba(unevaluated_X)
    y_hat_entropy = np.array(map(entropy, y_hat_probability))
    y_hat_entropy /= y_hat_entropy.sum()
    unevaluated_X_size = unevaluated_X.shape[0]

    selections = np.random.choice(a=unevaluated_X_size,
                                  size=number_of_new_labels,
                                  replace=False,
                                  p=y_hat_entropy)
    return selections
Beispiel #17
0
def gaussianProcess(X_train, y_train, X_test, y_test, iteration):
    print("************ Gaussian Process Classification **************\n")
    gp_rbf_fix = GaussianProcessClassifier(kernel=76.5**2 *
                                           RBF(length_scale=179),
                                           optimizer=None)
    start_train_gp = time.time()
    gp_rbf_fix.fit(X_train, y_train)
    end_train_gp = time.time()
    training_time_gp = end_train_gp - start_train_gp
    print("Training GP model_selection %d took %.5f\n" %
          (iteration, training_time_gp))

    predict_train_gp = gp_rbf_fix.predict(X_train)
    print("training accuracy")
    print(accuracy_score(y_train, predict_train_gp))
    print("\n")

    start_test_gp = time.time()
    predict_test_gp = gp_rbf_fix.predict(X_test)
    end_test_gp = time.time()
    testing_time_gp = end_test_gp - start_test_gp
    print("Testing GP model_selection %d took %.5f\n" %
          (iteration, training_time_gp))
    print("testing accuracy")
    print(accuracy_score(y_test, predict_test_gp))
    print("\n")

    return training_time_gp, testing_time_gp
def GP_Classifier(i):
    x_data, y_data = data_select(i)
    gpc = GaussianProcessClassifier(random_state=53)
    # split validation
    X_train, X_test, Y_train, Y_test = train_test_split(x_data,
                                                        y_data,
                                                        test_size=0.25,
                                                        random_state=53)
    gpc.fit(X_train, np.ravel(Y_train, order='C'))
    train_score = gpc.score(X_train, Y_train)
    test_score = gpc.score(X_test, Y_test)
    print('Train Acc: %.3f, Test Acc: %.3f' % (train_score, test_score))
    # K-fold validation
    kfold = model_selection.KFold(n_splits=10)
    results_kfold = model_selection.cross_val_score(gpc,
                                                    x_data,
                                                    np.ravel(y_data,
                                                             order='C'),
                                                    cv=kfold)
    print("Accuracy: %.2f%%" % (results_kfold.mean() * 100.0))
    # leave one out validatoin
    loocv = LeaveOneOut()
    results_loocv = model_selection.cross_val_score(gpc,
                                                    x_data,
                                                    np.ravel(y_data,
                                                             order='C'),
                                                    cv=loocv)
    print("Accuracy: %.2f%%" % (results_loocv.mean() * 100.0))
def test_custom_optimizer():
    """ Test that GPC can use externally defined optimizers. """

    # Define a dummy optimizer that simply tests 50 random hyperparameters
    def optimizer(obj_func, initial_theta, bounds):
        rng = np.random.RandomState(0)
        theta_opt, func_min = \
            initial_theta, obj_func(initial_theta, eval_gradient=False)
        for _ in range(50):
            theta = np.atleast_1d(
                rng.uniform(np.maximum(-2, bounds[:, 0]),
                            np.minimum(1, bounds[:, 1])))
            f = obj_func(theta, eval_gradient=False)
            if f < func_min:
                theta_opt, func_min = theta, f
        return theta_opt, func_min

    for kernel in kernels:
        if kernel == fixed_kernel:
            continue
        gpc = GaussianProcessClassifier(kernel=kernel, optimizer=optimizer)
        gpc.fit(X, y_mc)
        # Checks that optimizer improved marginal likelihood
        assert_greater(gpc.log_marginal_likelihood(gpc.kernel_.theta),
                       gpc.log_marginal_likelihood(kernel.theta))
Beispiel #20
0
def TrainMyClassifierGPR(X_train, y_train, **kwargs):
    if 'kernel' in kwargs:
        gpc = GPC(multi_class='one_vs_rest', **kwargs)
    else:
        kern = RBF(length_scale=0.4)
        gpc = GPC(kernel=kern, multi_class='one_vs_rest')
    gpc.fit(X_train, y_train)
    return gpc
def gaussian_process_classifier(self, X, Y):
    from sklearn.gaussian_process import GaussianProcessClassifier

    clf = GaussianProcessClassifier()

    clf.fit(X, Y)

    return clf
Beispiel #22
0
 def gaussian_process_classifier(self):
     """
     Gaussian process classification (GPC) based on Laplace approximation.
     :return:probability, conf_matrix
     """
     model = GaussianProcessClassifier()
     model.fit(self.__x_train, self.__y_train)
     self.__model = model
Beispiel #23
0
def gpc_sklearn(ax, x, y, kernel, optimizer="fmin_l_bfgs_b"):
    """
    Implemented with GaussianProcessClassifier in sklearn.gaussisan_process.
    The implementation is based on Algorithm 3.1, 3.2, and 5.1 of GPML. 
    The Laplace approximation is used for approximating the non-Gaussian posterior by a Gaussian.
    The implementation is restricted to using the logistic link function.
    
    INPUT:
        ax: an Axes object
        x: (N, ) np.array
        y: (N, ) np.array
        kernel: sklearn.gaussian_process.kernels object. Used to initialize GaussianProcessClassifier
        optimizer : 
            string or callable.
            Can either be one of the internally supported optimizers for optimizing the kernel's parameters,
            specified by a string, or an externally defined optimizer passed as a callable.
            If a callable is passed, it must have the signature.
            If None is passed, the kernel's parameters are kept [
        ax: an Axes object
    """
    # Fit GaussianProcessClassification and LinearRegression models
    gpc = GaussianProcessClassifier(kernel=kernel, optimizer=optimizer)
    gpc.fit(x[:, np.newaxis], y)
    print("\nLearned kernel: %s" % gpc.kernel_)
    y_ = gpc.predict_proba(x[:, np.newaxis])[:, 1]

    xs = np.linspace(np.min(x), np.max(x), 1000)
    ys = gpc.predict_proba(xs[:, np.newaxis])[:, 1]

    # lr = LinearRegression()
    # lr.fit(x[:, np.newaxis], y)  # x needs to be 2d for LinearRegression

    # Plot
    # ax.plot(x, y, 'r.', markersize=12, alpha = 0.2)
    ax.plot(xs, ys, markersize=12, alpha=0.2)

    # ax.plot(x, lr.predict(x[:, np.newaxis]), 'b-')
    # ax.set_xlim(-0.1, 1.1)
    # ax.set_ylim(-0.1, 1.1)

    # compute ece and acc after calibration
    ece = EceEval(np.array([1 - y_, y_]).T, y, num_bins=100)
    y_predict = y_ > 0.5
    acc = (y_predict == y).mean()

    ax.text(0.05,
            0.8,
            'ECE=%.4f\nACC=%.4f' % (ece, acc),
            size=14,
            ha='left',
            va='center',
            bbox={
                'facecolor': 'green',
                'alpha': 0.5,
                'pad': 4
            })

    return ax
Beispiel #24
0
    def trainGP(self, kernel="RBF", param=1.0):
        model = GaussianProcessClassifier(1.0 * RBF(1.0))
        model.fit(self.train, self.trainTgt)

        trainOut = model.predict(self.train)
        trainError = np.mean(self.trainTgt.ravel() == trainOut.ravel()) * 100
        print("Training Error: ", trainError)

        return model
Beispiel #25
0
def gaussian_process_models(x_train, y_train):
    from sklearn.gaussian_process import GaussianProcessClassifier
    classifier1 = GaussianProcessClassifier()
    classifier1.fit(x_train, y_train)

    print('GaussianProcessClassifier training accuracy: ',
          classifier1.score(x_train, y_train))

    return classifier1
Beispiel #26
0
def do_gpc(X_test, Y_test, X_train, Y_train):
    # creating a classifier of loss function "hinge" and penalty function "l2"
    clf = GaussianProcessClassifier(kernel=1.0 * RBF(length_scale=1.0))
    print "starts fitting"
    print clf.fit(X_train, Y_train)
    print "finished fitting, starts predictions"
    Y_pred = clf.predict(X_test)
    print "finished predictions"
    return Y_pred
Beispiel #27
0
def train_l2_gaussian(x_train, x_test, y_train, y_test):
    clf = GaussianProcessClassifier()
    clf.fit(x_train, y_train)

    if y_test is not None:
        print('GaussianProcessClassifier:', clf.score(x_test, y_test))
    else:
        print('GaussianProcessClassifier:', clf.score(x_train, y_train))
    return np.reshape(clf.predict(x_train), (-1, 1))
Beispiel #28
0
 def estimate_depth(self):
     kernel = 1.5 * kernels.RBF(length_scale=1.0,
                                length_scale_bounds=(0, 3.0))
     clf = GaussianProcessClassifier(optimizer=None,
                                     n_restarts_optimizer=9,
                                     kernel=kernel)
     input_data = np.hstack((self.le_centers, self.re_centers))
     clf.fit(input_data, self.ids.ravel())
     self.regressor = clf
def GPAL(X,
         Y,
         train_ind,
         candidate_ind,
         test_ind,
         sample='En',
         kernel='rbf',
         Niter=500,
         eta=10):
    ourRes = []
    train_index = train_ind.copy()
    test_index = test_ind.copy()
    candidate_index = candidate_ind.copy()
    varRes = []
    enRes = []
    for i in range(Niter):
        print(i)
        if (kernel == 'linear'):
            dotkernel = DotProduct(sigma_0=1)
            model = GPC(kernel=dotkernel)
        else:
            model = GPC()
        model.fit(X[train_index], Y[train_index])
        ourRes.append(model.score(X[test_index, :], Y[test_index]))
        print(ourRes[-1])
        if (sample == 'rand'):
            sampleIndex = np.random.randint(len(candidate_index))
        elif (sample == 'En'):
            proba = model.predict_proba(X[candidate_index, :])
            en = sp.stats.entropy(proba.T)
            sampleScore = en
            sampleIndex = np.argmax(sampleScore)
        elif (sample == 'var'):
            model.predict_proba(X[candidate_index, :])
            meanVar = np.zeros(len(candidate_index))
            for tem in model.base_estimator_.estimators_:
                meanVar = meanVar + tem.var
            sampleIndex = np.argmax(meanVar)
        elif (sample == 'varEN'):
            proba = model.predict_proba(X[candidate_index, :])
            en = sp.stats.entropy(proba.T)
            meanVar = np.zeros(len(candidate_index))
            enRes.append(np.mean(en))

            for tem in model.base_estimator_.estimators_:
                meanVar = meanVar + tem.var
            sampleIndex = np.argmax(meanVar / len(np.unique(Y)) * eta + en)
            varRes.append(np.mean(meanVar))
            print('max var %f----selected var %f-----selected en %f ' %
                  (np.max(meanVar), meanVar[sampleIndex], en[sampleIndex]))
        sampleIndex = candidate_index[sampleIndex]
        train_index = train_index + [sampleIndex]
        candidate_index = [
            x for x in candidate_index if x not in [sampleIndex]
        ]
    return [ourRes, varRes, enRes]
Beispiel #30
0
def test_multi_class(kernel):
    # Test GPC for multi-class classification problems.
    gpc = GaussianProcessClassifier(kernel=kernel)
    gpc.fit(X, y_mc)

    y_prob = gpc.predict_proba(X2)
    assert_almost_equal(y_prob.sum(1), 1)

    y_pred = gpc.predict(X2)
    assert_array_equal(np.argmax(y_prob, 1), y_pred)
 def test_sklearn_40(self):
     iris = datasets.load_iris()
     irisd = pd.DataFrame(iris.data,columns=iris.feature_names)
     irisd['Species'] = iris.target
     target = 'Species'
     features = irisd.columns.drop('Species')
     model = GaussianProcessClassifier()
     model.fit(irisd[features],irisd[target])
     with self.assertRaises(TypeError):
         skl_to_pmml(model,features,target,"no_pipeline.pmml")
Beispiel #32
0
def test_multi_class(kernel):
    # Test GPC for multi-class classification problems.
    gpc = GaussianProcessClassifier(kernel=kernel)
    gpc.fit(X, y_mc)

    y_prob = gpc.predict_proba(X2)
    assert_almost_equal(y_prob.sum(1), 1)

    y_pred = gpc.predict(X2)
    assert_array_equal(np.argmax(y_prob, 1), y_pred)
Beispiel #33
0
def test_multi_class_n_jobs(kernel):
    # Test that multi-class GPC produces identical results with n_jobs>1.
    gpc = GaussianProcessClassifier(kernel=kernel)
    gpc.fit(X, y_mc)

    gpc_2 = GaussianProcessClassifier(kernel=kernel, n_jobs=2)
    gpc_2.fit(X, y_mc)

    y_prob = gpc.predict_proba(X2)
    y_prob_2 = gpc_2.predict_proba(X2)
    assert_almost_equal(y_prob, y_prob_2)
Beispiel #34
0
def test_custom_optimizer(kernel):
    # Test that GPC can use externally defined optimizers.
    # Define a dummy optimizer that simply tests 50 random hyperparameters
    def optimizer(obj_func, initial_theta, bounds):
        rng = np.random.RandomState(0)
        theta_opt, func_min = \
            initial_theta, obj_func(initial_theta, eval_gradient=False)
        for _ in range(50):
            theta = np.atleast_1d(rng.uniform(np.maximum(-2, bounds[:, 0]),
                                              np.minimum(1, bounds[:, 1])))
            f = obj_func(theta, eval_gradient=False)
            if f < func_min:
                theta_opt, func_min = theta, f
        return theta_opt, func_min

    gpc = GaussianProcessClassifier(kernel=kernel, optimizer=optimizer)
    gpc.fit(X, y_mc)
    # Checks that optimizer improved marginal likelihood
    assert_greater(gpc.log_marginal_likelihood(gpc.kernel_.theta),
                   gpc.log_marginal_likelihood(kernel.theta))
def trainModel(subjectid):
    # Load training data from the file matlab generates
    traindata = np.genfromtxt('csvdata/' + subjectid +
                              '_sim.csv', delimiter=',',
                              missing_values=['NaN', 'nan'],
                              filling_values=None)
    trainx, trainy = cleandata(traindata, downsamplefactor=20)

    # Train a Gaussian Process
    anisokern = kernels.RBF()  # default kernel
    gp = GaussianProcessClassifier(kernel=anisokern)  # Initialize the GPC
    gp.fit(trainx, trainy)  # train this class on the data
    trainx = trainy = None  # Discard all training data to preserve memory

    # Load test data
    testdata = np.genfromtxt('csvdata/' + subjectid +
                             '_rival.csv', delimiter=',',
                             missing_values=['NaN', 'nan'],
                             filling_values=None)
    testx, testy = cleandata(testdata, downsamplefactor=4)  # clean data

    return gp, testx, testy
def plot(df, options):

    UNIQ_GROUPS = df.group.unique()
    UNIQ_GROUPS.sort()

    sns.set_style("white")
    grppal = sns.color_palette("Set2", len(UNIQ_GROUPS))

    print '# UNIQ GROUPS', UNIQ_GROUPS

    cent_stats = df.groupby(
        ['position', 'group', 'side']).apply(stats_per_group)
    cent_stats.reset_index(inplace=True)

    import time
    from sklearn import preprocessing
    from sklearn.gaussian_process import GaussianProcessRegressor, GaussianProcessClassifier
    from sklearn.gaussian_process.kernels import Matern, WhiteKernel, ExpSineSquared, ConstantKernel, RBF


    ctlDF = cent_stats[ cent_stats['group'] == 0 ]

    TNRightDF = cent_stats[ cent_stats['group'] != 0]
    TNRightDF = TNRightDF[TNRightDF['side'] == 'right']

    dataDf = pd.concat([ctlDF, TNRightDF], ignore_index=True)
    print dataDf

    yDf = dataDf['group'] == 0
    yDf = yDf.astype(int)
    y = yDf.values
    print y
    print y.shape

    XDf = dataDf[['position', 'values']]
    X = XDf.values
    X = preprocessing.scale(X)
    print X
    print X.shape
    

    # kernel = ConstantKernel() + Matern(length_scale=mean, nu=3 / 2) + \
    # WhiteKernel(noise_level=1e-10)
    
    kernel = 1**2 * Matern(length_scale=1, nu=1.5) + \
        WhiteKernel(noise_level=0.1)

    figure = plt.figure(figsize=(10, 6))


    stime = time.time()
    gp = GaussianProcessClassifier(kernel)
    gp.fit(X, y)

    print gp.kernel_
    print gp.log_marginal_likelihood()

    print("Time for GPR fitting: %.3f" % (time.time() - stime))


    # create a mesh to plot in
    h = 0.1
    x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
    y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
    xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
                        np.arange(y_min, y_max, h))

    plt.figure(figsize=(10, 5))
    
    # Plot the predicted probabilities. For that, we will assign a color to
    # each point in the mesh [x_min, m_max]x[y_min, y_max].

    Z = gp.predict_proba(np.c_[xx.ravel(), yy.ravel()])
    Z = Z[:,1]
    print Z
    print Z.shape
    # Put the result into a color plot
    Z = Z.reshape((xx.shape[0], xx.shape[1]))
    print Z.shape
    plt.imshow(Z, extent=(x_min, x_max, y_min, y_max), origin="lower")

    # Plot also the training points
    plt.scatter(X[:, 0], X[:, 1], c=np.array(["r", "g"])[y])
    plt.xlabel('position')
    plt.ylabel('normalized val')
    plt.xlim(xx.min(), xx.max())
    plt.ylim(yy.min(), yy.max())
    plt.xticks(())
    plt.yticks(())
    plt.title("%s, LML: %.3f" %
            ("TN vs. Control", gp.log_marginal_likelihood(gp.kernel_.theta)))

    plt.tight_layout()


    if options.title:
        plt.suptitle(options.title)

    if options.output:
        plt.savefig(options.output, dpi=150)

    if options.is_show:
        plt.show()
def trainPredict(subjectid, makeplot=False):
    print("testing participant " + subjectid)
    # Load training data from the file matlab generates
    traindata = np.genfromtxt('csvdata/' + subjectid +
                              '_sim.csv', delimiter=',',
                              missing_values=['NaN', 'nan'],
                              filling_values=None)
    # Clean + downsample this data
    trainx, trainy = cleandata(traindata, downsamplefactor=20)

    # Train a Gaussian Process
    anisokern = kernels.RBF()  # default kernel
    gp = GaussianProcessClassifier(kernel=anisokern)  # Initialize the GPC
    gp.fit(trainx, trainy)  # train this class on the data
    trainx = trainy = None  # Discard all training data to preserve memory

    # load test data
    testdata = np.genfromtxt('csvdata/' + subjectid +
                             '_rival.csv', delimiter=',',
                             missing_values=['NaN', 'nan'],
                             filling_values=None)
    testx, testy = cleandata(testdata, downsamplefactor=4)  # clean data

    testdata = None  # clear from memory
    # work out percentage in percept for each data point:
    percentages, nextpercept = assign_percentage(testy)

    # get a prediction for all points in the test data:
    predicty = gp.predict(testx)
    proby = gp.predict_proba(testx)

    if makeplot:
        summaryplot(participant, testx, testy, predicty, proby, gp)

    # Summarise prediction by reported percept
    meanprediction = {'mean' + percept:
                      proby[testy == value, 1].mean()
                      for percept, value in perceptindices.iteritems()}
    predictiondev = {'stdev' + percept:
                     proby[testy == value, 1].std()
                     for percept, value in perceptindices.iteritems()}
    predictionaccuracy = {'acc' + percept:
                          (predicty[testy == value] ==
                           testy[testy == value]).mean()
                          for percept, value in perceptindices.iteritems()}
    # Summarise prediction by percentage in percept
    predictioncourse = {'timecourse' + percept + str(cutoff):
                        proby[(testy == value) &
                              (percentages < cutoff) &
                              (percentages > cutoff - 0.1), 1].mean()
                        for percept, value in perceptindices.iteritems()
                        for cutoff in np.linspace(0.1, 1, 10)}

    # Summarise mixed percept time courses by the next percept
    nextcourse = {'nextcourse' + percept + str(cutoff):
                  proby[(testy == 0) &
                        (percentages < cutoff) &
                        (percentages > cutoff - 0.1) &
                        (nextpercept == perceptindices[percept]), 1].mean()
                  for percept in ['highfreq', 'lowfreq']
                  for cutoff in np.linspace(0.1, 1, 10)}

    afterdominant = {'after' + percept + "_" + after + "_" + str(cutoff):
                     proby[(testy == perceptindices[percept]) &
                           (percentages < cutoff) &
                           (percentages > cutoff - 0.1) &
                           (nextpercept == perceptindices[after]), 1].mean()
                     for percept, after in [('highfreq', 'mixed'),
                                            ('highfreq', 'lowfreq'),
                                            ('lowfreq', 'mixed'),
                                            ('lowfreq', 'highfreq')]
                     for cutoff in np.linspace(0.1, 1, 10)}

    # Only return the summarised data
    return meanprediction, predictiondev, predictionaccuracy, \
        predictioncourse, nextcourse, afterdominant
X = np.array([[-4.61611719, -6.00099547],
              [4.10469096, 5.32782448],
              [0.00000000, -0.50000000],
              [-6.17289014, -4.6984743],
              [1.3109306, -6.93271427],
              [-5.03823144, 3.10584743],
              [-2.87600388, 6.74310541],
              [5.21301203, 4.26386883]])

# Observations
y = np.array(g(X) > 0, dtype=int)

# Instanciate and fit Gaussian Process Model
kernel = C(0.1, (1e-5, np.inf)) * DotProduct(sigma_0=0.1) ** 2
gp = GaussianProcessClassifier(kernel=kernel)
gp.fit(X, y)
print("Learned kernel: %s " % gp.kernel_)

# Evaluate real function and the predicted probability
res = 50
x1, x2 = np.meshgrid(np.linspace(- lim, lim, res),
                     np.linspace(- lim, lim, res))
xx = np.vstack([x1.reshape(x1.size), x2.reshape(x2.size)]).T

y_true = g(xx)
y_prob = gp.predict_proba(xx)[:, 1]
y_true = y_true.reshape((res, res))
y_prob = y_prob.reshape((res, res))

# Plot the probabilistic classification iso-values
fig = plt.figure(1)
Beispiel #39
0
    def pltshow(mplpyplot):
        mplpyplot.show()
# nodebox section end



# Generate data
train_size = 50
rng = np.random.RandomState(0)
X = rng.uniform(0, 5, 100)[:, np.newaxis]
y = np.array(X[:, 0] > 2.5, dtype=int)

# Specify Gaussian Processes with fixed and optimized hyperparameters
gp_fix = GaussianProcessClassifier(kernel=1.0 * RBF(length_scale=1.0),
                                   optimizer=None)
gp_fix.fit(X[:train_size], y[:train_size])

gp_opt = GaussianProcessClassifier(kernel=1.0 * RBF(length_scale=1.0))
gp_opt.fit(X[:train_size], y[:train_size])

print("Log Marginal Likelihood (initial): %.3f"
      % gp_fix.log_marginal_likelihood(gp_fix.kernel_.theta))
print("Log Marginal Likelihood (optimized): %.3f"
      % gp_opt.log_marginal_likelihood(gp_opt.kernel_.theta))

print("Accuracy: %.3f (initial) %.3f (optimized)"
      % (accuracy_score(y[:train_size], gp_fix.predict(X[:train_size])),
         accuracy_score(y[:train_size], gp_opt.predict(X[:train_size]))))
print("Log-loss: %.3f (initial) %.3f (optimized)"
      % (log_loss(y[:train_size], gp_fix.predict_proba(X[:train_size])[:, 1]),
         log_loss(y[:train_size], gp_opt.predict_proba(X[:train_size])[:, 1])))