def test_lml_improving():
    """ Test that hyperparameter-tuning improves log-marginal likelihood. """
    for kernel in kernels:
        if kernel == fixed_kernel: continue
        gpc = GaussianProcessClassifier(kernel=kernel).fit(X, y)
        assert_greater(gpc.log_marginal_likelihood(gpc.kernel_.theta),
                       gpc.log_marginal_likelihood(kernel.theta))
Beispiel #2
0
def test_predict_consistent():
    """ Check binary predict decision has also predicted probability above 0.5.
    """
    for kernel in kernels:
        gpc = GaussianProcessClassifier(kernel=kernel).fit(X, y)
        assert_array_equal(gpc.predict(X),
                           gpc.predict_proba(X)[:, 1] >= 0.5)
Beispiel #3
0
def build_classifier_gp(data, labels, **kwargs):
    linear_kernel = Sum(k1=Product(k1=DotProduct(sigma_0=0, sigma_0_bounds='fixed'), k2=ConstantKernel()),
                        k2=ConstantKernel())
    gp_clf = GaussianProcessClassifier(kernel=linear_kernel)
    gp_clf.fit(data, labels)
    id_pos_class = gp_clf.classes_ == labels.max()
    return gp_clf, gp_clf.predict_proba(data)[:, id_pos_class]
Beispiel #4
0
def test_converged_to_local_maximum(kernel):
    # Test that we are in local maximum after hyperparameter-optimization.
    gpc = GaussianProcessClassifier(kernel=kernel).fit(X, y)

    lml, lml_gradient = \
        gpc.log_marginal_likelihood(gpc.kernel_.theta, True)

    assert np.all((np.abs(lml_gradient) < 1e-4) |
                  (gpc.kernel_.theta == gpc.kernel_.bounds[:, 0]) |
                  (gpc.kernel_.theta == gpc.kernel_.bounds[:, 1]))
Beispiel #5
0
def test_multi_class(kernel):
    # Test GPC for multi-class classification problems.
    gpc = GaussianProcessClassifier(kernel=kernel)
    gpc.fit(X, y_mc)

    y_prob = gpc.predict_proba(X2)
    assert_almost_equal(y_prob.sum(1), 1)

    y_pred = gpc.predict(X2)
    assert_array_equal(np.argmax(y_prob, 1), y_pred)
Beispiel #6
0
def test_lml_gradient(kernel):
    # Compare analytic and numeric gradient of log marginal likelihood.
    gpc = GaussianProcessClassifier(kernel=kernel).fit(X, y)

    lml, lml_gradient = gpc.log_marginal_likelihood(kernel.theta, True)
    lml_gradient_approx = \
        approx_fprime(kernel.theta,
                      lambda theta: gpc.log_marginal_likelihood(theta,
                                                                False),
                      1e-10)

    assert_almost_equal(lml_gradient, lml_gradient_approx, 3)
Beispiel #7
0
def test_multi_class_n_jobs(kernel):
    # Test that multi-class GPC produces identical results with n_jobs>1.
    gpc = GaussianProcessClassifier(kernel=kernel)
    gpc.fit(X, y_mc)

    gpc_2 = GaussianProcessClassifier(kernel=kernel, n_jobs=2)
    gpc_2.fit(X, y_mc)

    y_prob = gpc.predict_proba(X2)
    y_prob_2 = gpc_2.predict_proba(X2)
    assert_almost_equal(y_prob, y_prob_2)
Beispiel #8
0
def test_random_starts():
    # Test that an increasing number of random-starts of GP fitting only
    # increases the log marginal likelihood of the chosen theta.
    n_samples, n_features = 25, 2
    rng = np.random.RandomState(0)
    X = rng.randn(n_samples, n_features) * 2 - 1
    y = (np.sin(X).sum(axis=1) + np.sin(3 * X).sum(axis=1)) > 0

    kernel = C(1.0, (1e-2, 1e2)) \
        * RBF(length_scale=[1e-3] * n_features,
              length_scale_bounds=[(1e-4, 1e+2)] * n_features)
    last_lml = -np.inf
    for n_restarts_optimizer in range(5):
        gp = GaussianProcessClassifier(
            kernel=kernel, n_restarts_optimizer=n_restarts_optimizer,
            random_state=0).fit(X, y)
        lml = gp.log_marginal_likelihood(gp.kernel_.theta)
        assert_greater(lml, last_lml - np.finfo(np.float32).eps)
        last_lml = lml
Beispiel #9
0
def test_custom_optimizer(kernel):
    # Test that GPC can use externally defined optimizers.
    # Define a dummy optimizer that simply tests 50 random hyperparameters
    def optimizer(obj_func, initial_theta, bounds):
        rng = np.random.RandomState(0)
        theta_opt, func_min = \
            initial_theta, obj_func(initial_theta, eval_gradient=False)
        for _ in range(50):
            theta = np.atleast_1d(rng.uniform(np.maximum(-2, bounds[:, 0]),
                                              np.minimum(1, bounds[:, 1])))
            f = obj_func(theta, eval_gradient=False)
            if f < func_min:
                theta_opt, func_min = theta, f
        return theta_opt, func_min

    gpc = GaussianProcessClassifier(kernel=kernel, optimizer=optimizer)
    gpc.fit(X, y_mc)
    # Checks that optimizer improved marginal likelihood
    assert_greater(gpc.log_marginal_likelihood(gpc.kernel_.theta),
                   gpc.log_marginal_likelihood(kernel.theta))
def trainModel(subjectid):
    # Load training data from the file matlab generates
    traindata = np.genfromtxt('csvdata/' + subjectid +
                              '_sim.csv', delimiter=',',
                              missing_values=['NaN', 'nan'],
                              filling_values=None)
    trainx, trainy = cleandata(traindata, downsamplefactor=20)

    # Train a Gaussian Process
    anisokern = kernels.RBF()  # default kernel
    gp = GaussianProcessClassifier(kernel=anisokern)  # Initialize the GPC
    gp.fit(trainx, trainy)  # train this class on the data
    trainx = trainy = None  # Discard all training data to preserve memory

    # Load test data
    testdata = np.genfromtxt('csvdata/' + subjectid +
                             '_rival.csv', delimiter=',',
                             missing_values=['NaN', 'nan'],
                             filling_values=None)
    testx, testy = cleandata(testdata, downsamplefactor=4)  # clean data

    return gp, testx, testy
Beispiel #11
0
def return_model(mode, **kwargs):

    if inspect.isclass(mode):
        assert getattr(
            mode, 'fit',
            None) is not None, 'Custom model family should have a fit() method'
        model = mode(**kwargs)
    elif mode == 'logistic':
        solver = kwargs.get('solver', 'liblinear')
        n_jobs = kwargs.get('n_jobs', None)
        C = kwargs.get('C', 1.)
        max_iter = kwargs.get('max_iter', 5000)
        model = LogisticRegression(solver=solver,
                                   n_jobs=n_jobs,
                                   C=C,
                                   max_iter=max_iter,
                                   random_state=666)
    elif mode == 'Tree':
        model = DecisionTreeClassifier(random_state=666)
    elif mode == 'RandomForest':
        n_estimators = kwargs.get('n_estimators', 50)
        model = RandomForestClassifier(n_estimators=n_estimators,
                                       random_state=666)
    elif mode == 'GB':
        n_estimators = kwargs.get('n_estimators', 50)
        model = GradientBoostingClassifier(n_estimators=n_estimators,
                                           random_state=666)
    elif mode == 'AdaBoost':
        n_estimators = kwargs.get('n_estimators', 50)
        model = AdaBoostClassifier(n_estimators=n_estimators, random_state=666)
    elif mode == 'SVC':
        kernel = kwargs.get('kernel', 'rbf')
        model = SVC(kernel=kernel, random_state=666)
    elif mode == 'LinearSVC':
        model = LinearSVC(loss='hinge', random_state=666)
    elif mode == 'GP':
        model = GaussianProcessClassifier(random_state=666)
    elif mode == 'KNN':
        n_neighbors = kwargs.get('n_neighbors', 5)
        model = KNeighborsClassifier(n_neighbors=n_neighbors)
    elif mode == 'NB':
        model = MultinomialNB()
    elif mode == 'linear':
        model = LinearRegression(random_state=666)
    elif mode == 'ridge':
        alpha = kwargs.get('alpha', 1.0)
        model = Ridge(alpha=alpha, random_state=666)
    elif 'conv' in mode:
        tf.reset_default_graph()
        address = kwargs.get('address', 'weights/conv')
        hidden_units = kwargs.get('hidden_layer_sizes', [20])
        activation = kwargs.get('activation', 'relu')
        weight_decay = kwargs.get('weight_decay', 1e-4)
        learning_rate = kwargs.get('learning_rate', 0.001)
        max_iter = kwargs.get('max_iter', 1000)
        dropout = kwargs.get('dropout', 0.)
        early_stopping = kwargs.get('early_stopping', 10)
        warm_start = kwargs.get('warm_start', False)
        batch_size = kwargs.get('batch_size', 256)
        kernel_sizes = kwargs.get('kernel_sizes', [5])
        strides = kwargs.get('strides', [5])
        channels = kwargs.get('channels', [1])
        validation_fraction = kwargs.get('validation_fraction', 0.)
        global_averaging = kwargs.get('global_averaging', 0.)
        optimizer = kwargs.get('optimizer', 'sgd')
        if mode == 'conv':
            model = CShapNN(mode='classification',
                            batch_size=batch_size,
                            max_epochs=max_iter,
                            learning_rate=learning_rate,
                            dropout=dropout,
                            weight_decay=weight_decay,
                            validation_fraction=validation_fraction,
                            early_stopping=early_stopping,
                            optimizer=optimizer,
                            warm_start=warm_start,
                            address=address,
                            hidden_units=hidden_units,
                            strides=strides,
                            global_averaging=global_averaging,
                            kernel_sizes=kernel_sizes,
                            channels=channels,
                            random_seed=666)
        elif mode == 'conv_reg':
            model = CShapNN(mode='regression',
                            batch_size=batch_size,
                            max_epochs=max_iter,
                            learning_rate=learning_rate,
                            dropout=dropout,
                            weight_decay=weight_decay,
                            validation_fraction=validation_fraction,
                            early_stopping=early_stopping,
                            optimizer=optimizer,
                            warm_start=warm_start,
                            address=address,
                            hidden_units=hidden_units,
                            strides=strides,
                            global_averaging=global_averaging,
                            kernel_sizes=kernel_sizes,
                            channels=channels,
                            random_seed=666)
    elif 'NN' in mode:
        solver = kwargs.get('solver', 'adam')
        hidden_layer_sizes = kwargs.get('hidden_layer_sizes', (20, ))
        if isinstance(hidden_layer_sizes, list):
            hidden_layer_sizes = list(hidden_layer_sizes)
        activation = kwargs.get('activation', 'relu')
        learning_rate_init = kwargs.get('learning_rate', 0.001)
        max_iter = kwargs.get('max_iter', 5000)
        early_stopping = kwargs.get('early_stopping', False)
        warm_start = kwargs.get('warm_start', False)
        batch_size = kwargs.get('batch_size', 'auto')

        if mode == 'NN':
            model = MLPClassifier(solver=solver,
                                  hidden_layer_sizes=hidden_layer_sizes,
                                  activation=activation,
                                  learning_rate_init=learning_rate_init,
                                  warm_start=warm_start,
                                  max_iter=max_iter,
                                  early_stopping=early_stopping,
                                  batch_size=batch_size)
        if mode == 'NN_reg':
            model = MLPRegressor(solver=solver,
                                 hidden_layer_sizes=hidden_layer_sizes,
                                 activation=activation,
                                 learning_rate_init=learning_rate_init,
                                 warm_start=warm_start,
                                 max_iter=max_iter,
                                 early_stopping=early_stopping,
                                 batch_size=batch_size)
    else:
        raise ValueError("Invalid mode!")
    return model
Beispiel #12
0
classifier = np.all(np.unique(Y.to_numpy()) == [0, 1])
outputs = Y.shape[1]

# separate the data into training and testing
if TIME_SERIES:
    test_idx = X.index.values[-int(X.shape[0] / 5):]
else:
    np.random.seed(1)
    test_idx = np.random.choice(a=X.index.values,
                                size=int(X.shape[0] / 5),
                                replace=False)
train_idx = np.array(list(set(X.index.values) - set(test_idx)))

# set up the model
if classifier:
    model = MultiOutputClassifier(GaussianProcessClassifier(random_state=42))
else:
    model = MultiOutputRegressor(GaussianProcessRegressor(random_state=42))

# train the model
model.fit(X.iloc[train_idx, :], Y.iloc[train_idx, :])

# In[2]: Collect the predictions

# predict training and testing data
train_predict = pd.DataFrame(model.predict(X.iloc[train_idx, :]),
                             columns=Y.columns)
test_predict = pd.DataFrame(model.predict(X.iloc[test_idx, :]),
                            columns=Y.columns)

# reshape all of the predictions into a single table
def grid_search_pipeline(X, y): 
    """ making pipeline and gridsearchcv"""
    clf = Pipeline(steps=[  
                        ('scaler', StandardScaler()) 
                        , ('clf', LogisticRegression())
                   ]) 

    param_grid = [
                 { 'clf': [DecisionTreeClassifier(random_state=24, max_leaf_nodes=1000, class_weight='balanced')], 
             'clf__max_depth':  [5, 7, 10, 11, 12, 13, 14,  15, 16, 17]
                    }, 
                 { 'clf': [ExtraTreeClassifier(max_features=None, random_state=24)], 
             'clf__max_depth': [15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 30, 50, 100]
                    }, 
                 { 'clf': [ExtraTreesClassifier(max_features=None, n_jobs=-1, random_state=24)], 
             'clf__n_estimators':  [1, 2, 5, 7, 10, 15, 20], 
             'clf__max_depth':  [2, 4, 6, 8, 10, 12, 15, 25, 30]
                    }, 
                 { 'clf': [RandomForestClassifier(max_features=None, n_jobs=-1, random_state=24)], 
             'clf__n_estimators':  [61, 62, 63, 64, 65, 66, 67, 68, 69, 70], 
             'clf__max_depth':  [2, 4, 6, 8, 10, 12, 15, 25, 30]
                    },
              { 'clf': [KNeighborsClassifier(weights='distance', n_jobs = -1)], 
              'clf__n_neighbors':  list(range(1,30)),
               'clf__leaf_size': list(range(1,30))   # estimator__ prefix to get through MOC
                  }, 
              {'clf': [RadiusNeighborsClassifier(weights='distance', n_jobs = -1 )], 
               'clf__radius': [300, 200, 100],
               'clf__leaf_size': [100, 50, 15]
               },
                { 'clf': [RidgeClassifier(max_iter=110, class_weight='balanced', random_state=24)], 
             'clf__alpha': [100000, 50000, 10000, 5000]
                  },
                 { 'clf': [LogisticRegression(penalty='elasticnet', random_state=24, max_iter=1000, n_jobs = -1, l1_ratio=.5)], 
             'clf__solver':   ['saga'],
             'clf__multi_class':   ['auto', 'ovr', 'multinomial']
                   }, 
                 { 'clf': [LogisticRegressionCV(penalty='elasticnet',  n_jobs=-1, random_state=24, l1_ratios=[.5])], 
             'clf__solver':   ['saga'],
             'clf__multi_class':   ['auto', 'ovr', 'multinomial']
                    },  
                 { 'clf': [RidgeClassifierCV(cv = 5, class_weight='balanced')], 
             'clf__class_weight':   [None, 'balanced']
                  }, 
                 { 'clf': [GaussianNB()]
                  }, 
                 { 'clf': [NearestCentroid()], 
             'clf__shrink_threshold':   [6,5,4, 4.5, 3, 3.5, 2,1]
                  }, 
              { 'clf': [LinearSVC(dual = False, random_state=24, max_iter=1000000000)], 
               'clf__C': np.linspace(0,10,200).tolist(), 
               'clf__multi_class': ['ovr', 'crammer_singer']
               },  #based on https://scikit-learn.org/stable/modules/svm.html, param C trades off misclassification of training examples against simplicity of the decision surface. A low C makes the decision surface smooth, while a high C aims at classifying all training examples correctly, exponentially spaced to get good values
                 { 'clf': [LinearDiscriminantAnalysis()], 
             'clf__solver':   ['lsqr', 'eigen'], 
             'clf__shrinkage':   [None, 'auto'] 
                   }, 
                 { 'clf': [QuadraticDiscriminantAnalysis()]
                   },   
                 { 'clf': [LabelPropagation(kernel='knn', gamma=0, n_jobs=-1)], 
             'clf__n_neighbors':   np.arange(1, 21).tolist(), 
                   }, 
                 { 'clf': [LabelSpreading(kernel='knn', gamma=0, n_jobs=-1)], 
             'clf__n_neighbors':   np.arange(1, 21).tolist(), 
             'clf__alpha':  [.2, .4, .6, .8]
                   }, 
                 { 'clf': [NuSVC(max_iter=-1, random_state=24)], 
             'clf__nu':  [1, 10, 20], 
             'clf__kernel': ['linear', 'poly','rbf', 'sigmoid', 'precomputed'], 
               'clf__gamma':  ['scale', 'auto'], 
               'clf__shrinking':  [True,False], 
               'clf__class_weight':  [None, 'balanced'], 
               'clf__decision_function_shape':  ['ovo','ovr'], 
                   }, 
                 { 'clf': [SVC(probability=True, random_state=24)], 
             'clf__C':  [.2, .4, .6, .8], 
             'clf__kernel': ['linear', 'poly','rbf', 'sigmoid', 'precomputed'], 
               'clf__gamma':  ['scale', 'auto'], 
               'clf__shrinking':  [True,False], 
               'clf__class_weight':  [None, 'balanced'], 
               'clf__decision_function_shape':  ['ovo','ovr'], 
                   }, 
            { 'clf': [GaussianProcessClassifier(copy_X_train=False, random_state=24, n_jobs=-1)], 
             'clf__n_restarts_optimizer':  [1, 2]
                 }, 
                 { 'clf': [SGDClassifier(n_jobs=-1, random_state=24, )], 
             'clf__penalty':  ['l2', 'elasticnet'], 
             'clf__learning_rate': ['optimal', 'adaptive'], 
             'clf__eta0': [0.2, 0.4, 0.6, 0.8, 1, 1.2, 1.4],
             'clf__class_weight': [None, 'balanced'],
                   }, 
                 { 'clf': [Perceptron(n_jobs=-1, random_state=24, )], 
             'clf__penalty':  ['l2', 'elasticnet'], 
             'clf__class_weight': [None, 'balanced'],
                   }, 
                 { 'clf': [PassiveAggressiveClassifier(n_jobs=-1, random_state=24, average=True)], 
             'clf__C':  [.001, .01, .1, .2, .4, .6, .8, 1, 1.2], 
             'clf__class_weight': [None, 'balanced']
                   }, 
                 { 'clf': [GradientBoostingClassifier(random_state=24)], 
             'clf__learning_rate': [.06, .07, .08,.09], 
             'clf__n_estimators': [180, 190, 200],
             'clf__max_depth': [2, 4, 6, 8],
             'clf__init': [None, 'zero']
                    }, 
            ]
    

    scoring = {'balanced_accuracy': 'balanced_accuracy'
               , 'f1_micro': 'f1_micro'        
               ,'f1_macro': 'f1_macro'
               , 'f1_weighted': 'f1_weighted'
               ,'precision_micro': 'precision_micro'
               , 'precision_macro': 'precision_macro'
               ,'precision_weighted': 'precision_weighted'
               , 'recall_micro': 'recall_micro'
               ,'recall_macro': 'recall_macro'
               , 'recall_weighted': 'recall_weighted'
     }
    
    search = GridSearchCV(clf, param_grid, scoring=scoring, refit= 'balanced_accuracy', n_jobs=-1, cv=5, return_train_score=True)
    search.fit(X, y)
      
    print(f'Training Machine Learning Classifier for {SYSTEM} Color Categories: successful!')
    
    return search 
def plot(df, options):

    UNIQ_GROUPS = df.group.unique()
    UNIQ_GROUPS.sort()

    sns.set_style("white")
    grppal = sns.color_palette("Set2", len(UNIQ_GROUPS))

    print '# UNIQ GROUPS', UNIQ_GROUPS

    cent_stats = df.groupby(
        ['position', 'group', 'side']).apply(stats_per_group)
    cent_stats.reset_index(inplace=True)

    import time
    from sklearn import preprocessing
    from sklearn.gaussian_process import GaussianProcessRegressor, GaussianProcessClassifier
    from sklearn.gaussian_process.kernels import Matern, WhiteKernel, ExpSineSquared, ConstantKernel, RBF


    ctlDF = cent_stats[ cent_stats['group'] == 0 ]

    TNRightDF = cent_stats[ cent_stats['group'] != 0]
    TNRightDF = TNRightDF[TNRightDF['side'] == 'right']

    dataDf = pd.concat([ctlDF, TNRightDF], ignore_index=True)
    print dataDf

    yDf = dataDf['group'] == 0
    yDf = yDf.astype(int)
    y = yDf.values
    print y
    print y.shape

    XDf = dataDf[['position', 'values']]
    X = XDf.values
    X = preprocessing.scale(X)
    print X
    print X.shape
    

    # kernel = ConstantKernel() + Matern(length_scale=mean, nu=3 / 2) + \
    # WhiteKernel(noise_level=1e-10)
    
    kernel = 1**2 * Matern(length_scale=1, nu=1.5) + \
        WhiteKernel(noise_level=0.1)

    figure = plt.figure(figsize=(10, 6))


    stime = time.time()
    gp = GaussianProcessClassifier(kernel)
    gp.fit(X, y)

    print gp.kernel_
    print gp.log_marginal_likelihood()

    print("Time for GPR fitting: %.3f" % (time.time() - stime))


    # create a mesh to plot in
    h = 0.1
    x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
    y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
    xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
                        np.arange(y_min, y_max, h))

    plt.figure(figsize=(10, 5))
    
    # Plot the predicted probabilities. For that, we will assign a color to
    # each point in the mesh [x_min, m_max]x[y_min, y_max].

    Z = gp.predict_proba(np.c_[xx.ravel(), yy.ravel()])
    Z = Z[:,1]
    print Z
    print Z.shape
    # Put the result into a color plot
    Z = Z.reshape((xx.shape[0], xx.shape[1]))
    print Z.shape
    plt.imshow(Z, extent=(x_min, x_max, y_min, y_max), origin="lower")

    # Plot also the training points
    plt.scatter(X[:, 0], X[:, 1], c=np.array(["r", "g"])[y])
    plt.xlabel('position')
    plt.ylabel('normalized val')
    plt.xlim(xx.min(), xx.max())
    plt.ylim(yy.min(), yy.max())
    plt.xticks(())
    plt.yticks(())
    plt.title("%s, LML: %.3f" %
            ("TN vs. Control", gp.log_marginal_likelihood(gp.kernel_.theta)))

    plt.tight_layout()


    if options.title:
        plt.suptitle(options.title)

    if options.output:
        plt.savefig(options.output, dpi=150)

    if options.is_show:
        plt.show()
    from sklearn.tree import DecisionTreeClassifier
    from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
    from sklearn.naive_bayes import GaussianNB
    from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis

    h = .02  # step size in the mesh

    names = ["Nearest Neighbors", "Linear SVM", "RBF SVM", "Gaussian Process",
            "Decision Tree", "Random Forest", "Neural Net", "AdaBoost",
            "Naive Bayes", "QDA"]

    classifiers = [
        KNeighborsClassifier(3),
        SVC(kernel="linear", C=0.025),
        SVC(gamma=2, C=1),
        GaussianProcessClassifier(1.0 * RBF(1.0)),
        DecisionTreeClassifier(max_depth=5),
        RandomForestClassifier(max_depth=5, n_estimators=10, max_features=1),
        MLPClassifier(alpha=1, max_iter=1000),
        AdaBoostClassifier(),
        GaussianNB(),
        QuadraticDiscriminantAnalysis()]

    # X, y = make_classification(n_features=2, n_redundant=0, n_informative=2,
    #                         random_state=1, n_clusters_per_class=1)
    
    
    rng = np.random.RandomState(2)
    # X += 2 * rng.uniform(size=X.shape)
    linearly_separable = (X, y)
Beispiel #16
0
 def __init__(self, **kwargs):
     super(GaussianProcess, self).__init__()
     super(GaussianProcess, self).SetModel(
         GaussianProcessClassifier(random_state=42, **kwargs))
Beispiel #17
0
label = shuffle(label, random_state=41)[:5000]
Kfold = StratifiedKFold(n_splits=n_splits)

accuracy_rbf_training = np.zeros(n_splits)
accuracy_rbf_testing = np.zeros(n_splits)
accuracy_matern_traing = np.zeros(n_splits)
accuracy_matern_testing = np.zeros(n_splits)
nlpd_matern_t = np.zeros(n_splits)
nlpd_matern_v = np.zeros(n_splits)
best_kernel = None
best_nlpd = np.inf

for i, (train_index, test_index) in enumerate(Kfold.split(features, label)):
    X_train, X_test = features[train_index], features[test_index]
    y_train, y_test = label[train_index], label[test_index]
    gp_matern_fix = GaussianProcessClassifier(kernel=3.7**2 * Matern(length_scale=9.4, nu=1.5),
                                              optimizer=None)
    gp_matern_fix.fit(X_train,y_train)
    accuracy_matern_traing[i] = accuracy_score(y_train, gp_matern_fix.predict(X_train))
    accuracy_matern_testing[i] = accuracy_score(y_test, gp_matern_fix.predict(X_test))
    neg_lpd_matern_t = -np.mean(np.log(gp_matern_fix.predict_proba(X_train)[np.arange(len(X_train)), y_train]))
    neg_lpd_matern_v = -np.mean(np.log(gp_matern_fix.predict_proba(X_test)[np.arange(len(X_test)), y_test]))
    nlpd_matern_t[i] = neg_lpd_matern_t
    nlpd_matern_v[i] = neg_lpd_matern_v
print("Average training accuracy with matern kernel: %.5f" % np.mean(accuracy_matern_traing))
print("Average testing accuracy with matern kernel: %.5f" % np.mean(accuracy_matern_testing))
print("Average negative log predictive density of training set with matern kernel: %.5f"
      % np.mean(nlpd_matern_t))
print("Average negative log predictive density of validation set with matern kernel: %.5f"
      % np.mean(nlpd_matern_v))
print("Total elapsed time: %.5f" % (time.time()-elapsed))
aa.write("\n-------------------10-fold corss-validation result--------------------\n")
for i in n_restarts_optimizerR: 
    for j in max_iter_predictR:
#         print("\tthe param is :%s" % i)
#         print("\tthe param is :%s" % j)
        MCC=[]
        ACC=[]
        SN =[]
        SP=[]
        precision=[]
        NPV=[]
        F1=[]
        
        k=1  
        for train, test in cv.split(X, y):
            gpc=GaussianProcessClassifier(kernel = 1.0 * RBF(1.0),n_restarts_optimizer=i,max_iter_predict=j)
            y_true,y_pred =y[test], gpc.fit(X.iloc[train], y[train]).predict(X.iloc[test])
#             print("\tmatthews_corrcoef: %1.3f" % metrics.matthews_corrcoef(y_true, y_pred))
            MCCv=metrics.matthews_corrcoef(y_true, y_pred)
            MCC.append(MCCv)
    #         print("\taccuracy_score: %1.3f\n" % metrics.accuracy_score(y_true, y_pred))
            ACCv=metrics.accuracy_score(y_true, y_pred)
            ACC.append(ACCv)
            
            SNv,SPv,precisionv,NPVv,F1v = comE(y_true, y_pred)
            ## y_true, y_pred 
            SN.append(SNv)
            SP.append(SPv)
            precision.append(precisionv)
            NPV.append(NPVv)
            F1.append(F1v)
#CHALLENGE - ...and train them on our data
clf = clf.fit(X, Y)
# Calling decision tree classifier and fitting
clf1 =tree.DecisionTreeClassifier()
clfDT =clf1.fit(X, Y) 

#Calling support vector machine and fitting
clf2 = svm.SVC(probability=True)
clfSVC =clf2.fit(X, Y)  

#Calling KNeighbors classifier and fitting 
clf3 = KNeighborsClassifier(n_neighbors=3)
clfKN =clf3.fit(X, Y)  

#Calling gaussian_process classifier and fitting 
clf4 = GaussianProcessClassifier()
clfGP = clf4.fit(X, Y)

##Calling MLPClassifier and fitting 
clf5 = MLPClassifier(learning_rate='constant', learning_rate_init=0.001,)
clfMLP = clf5.fit(X, Y)

test = [[180, 80, 42]]
#Storing results 
predictionDT = clfDT.predict (test) 
predictionSVC = clfSVC.predict (test) 
predictionKN = clfKN.predict (test) 
predictionGP = clfGP.predict (test) 
predictionMLP = clfMLP.predict (test) 

#Storing probabilities
# Prediction time!
linear_models = []

linear_models.append(("LogisticRegression", LogisticRegression()))

kernel_models = []

kernel_models.append(("Linear Support Vector Classifier", LinearSVC()))
kernel_models.append(
    ("Support Vector Classifier", SVC(kernel="rbf", probability=True)))
kernel_models.append(("Nu Support Vector Classifer", NuSVC(probability=True)))

neighbor_models = [("K-nearest neighbours Ball",
                    KNeighborsClassifier(algorithm='ball_tree'))]

gaussian_models = [("Gaussian Process", GaussianProcessClassifier())]
deTree_models = [("Decision Tree", DecisionTreeClassifier())]

ensemble_models = []

ensemble_models.append(("Random forest", RandomForestClassifier()))
ensemble_models.append(("AdaBoost", AdaBoostClassifier()))
ensemble_models.append(("GradientBoosting", GradientBoostingClassifier()))

mlpNetwork_models = [("MLP NNetwork", MLPClassifier())]

model_families = [("Linear Models", linear_models),
                  ("Kernel Methods", kernel_models),
                  ("Neighbour", neighbor_models),
                  ("Gaussian Methods", gaussian_models),
                  ("Decision Tree", deTree_models),
    def gpc(self, dataset_array, label_array, data_teste):
        from sklearn.gaussian_process import GaussianProcessClassifier

        clf = GaussianProcessClassifier(max_iter_predict=300)
        clf.fit(dataset_array, label_array)
        return clf.predict(data_teste)
# Design of experiments
X = np.array([[-4.61611719, -6.00099547],
              [4.10469096, 5.32782448],
              [0.00000000, -0.50000000],
              [-6.17289014, -4.6984743],
              [1.3109306, -6.93271427],
              [-5.03823144, 3.10584743],
              [-2.87600388, 6.74310541],
              [5.21301203, 4.26386883]])

# Observations
y = np.array(g(X) > 0, dtype=int)

# Instanciate and fit Gaussian Process Model
kernel = C(0.1, (1e-5, np.inf)) * DotProduct(sigma_0=0.1) ** 2
gp = GaussianProcessClassifier(kernel=kernel)
gp.fit(X, y)
print("Learned kernel: %s " % gp.kernel_)

# Evaluate real function and the predicted probability
res = 50
x1, x2 = np.meshgrid(np.linspace(- lim, lim, res),
                     np.linspace(- lim, lim, res))
xx = np.vstack([x1.reshape(x1.size), x2.reshape(x2.size)]).T

y_true = g(xx)
y_prob = gp.predict_proba(xx)[:, 1]
y_true = y_true.reshape((res, res))
y_prob = y_prob.reshape((res, res))

# Plot the probabilistic classification iso-values
Beispiel #23
0
    Test_X_B = Img_Bank[i].Raw_Img[:,:,2].ravel()
    
    Temp_X = np.array((Test_X_R,Test_X_G,Test_X_B)).transpose()
    Temp_Y = Img_Bank[i].Raw_Mask.ravel()
    
    Test_X = np.concatenate((Test_X,Temp_X))
    Test_Y = np.concatenate((Test_Y,Temp_Y))

#%% KNN learning trial.
from sklearn.neighbors import KNeighborsClassifier
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import RBF

#classifier = KNeighborsClassifier(n_neighbors=10, n_jobs=-1, verbose )
kernel = 1.0 * RBF(1.0)
classifier = GaussianProcessClassifier(kernel=kernel, random_state=0)

print('KNN - Learning Started')
classifier.fit(Test_X, Test_Y, )
print('KNN - Learning Finished')

print('KNN - Prediction Started')
Test_photo = 10
Test_X_R = Img_Bank[Test_photo].Raw_Img[:,:,0].ravel()
Test_X_G = Img_Bank[Test_photo].Raw_Img[:,:,1].ravel()
Test_X_B = Img_Bank[Test_photo].Raw_Img[:,:,2].ravel()
Test_X = np.array((Test_X_R,Test_X_G,Test_X_B)).transpose()

Y_Pred = classifier.predict(Test_X)
Y_Pred = np.reshape(Y_Pred,(384,512))
plt.matshow(Y_Pred)
print(classification_report(y_true, y_pred))
print()

"""## GaussianProcessClassifier"""

# Commented out IPython magic to ensure Python compatibility.
X_train, X_test, y_train, y_test = train_test_split(X_train, y_train, test_size=0.2)

tuned_parameters = [{'kernel':[1.0 * RBF(0.8),1.0 * RBF(0.9),1.0 * RBF(1.0),1.0 * RBF(1.1),1.0 * RBF(1.2),1.0 * RBF(1.3)]}
                    ]


print()

clf = GridSearchCV(
    GaussianProcessClassifier(), tuned_parameters, scoring='roc_auc', cv=10
)
clf.fit(X_train, y_train)

print("Best parameters set found on development set:")
print()
print(clf.best_params_)
print()
print("Grid scores on development set:")
print()
means = clf.cv_results_['mean_test_score']
stds = clf.cv_results_['std_test_score']
for mean, std, params in zip(means, stds, clf.cv_results_['params']):
    print("%0.3f (+/-%0.03f) for %r"
#           % (mean, std * 2, params))
print()
Beispiel #25
0
    'L2 logistic (Multinomial)':
    LogisticRegression(C=C,
                       penalty='l2',
                       solver='saga',
                       multi_class='multinomial',
                       max_iter=10000),
    'L2 logistic (OvR)':
    LogisticRegression(C=C,
                       penalty='l2',
                       solver='saga',
                       multi_class='ovr',
                       max_iter=10000),
    'Linear SVC':
    SVC(kernel='linear', C=C, probability=True, random_state=0),
    'GPC':
    GaussianProcessClassifier(kernel)
}

n_classifiers = len(classifiers)

plt.figure(figsize=(n_features * (X[:, 0].max() - X[:, 0].min()),
                    n_classifiers * (X[:, 1].max() - X[:, 1].min())))
plt.subplots_adjust(bottom=-.1, top=1)
xx = np.linspace(floor(X[:, 0].min()), ceil(X[:, 0].max()), 100)
yy = np.linspace(floor(X[:, 1].min()), ceil(X[:, 1].max()), 100).T
xx, yy = np.meshgrid(xx, yy)
Xfull = np.c_[xx.ravel(), yy.ravel()]

for index, (name, classifier) in enumerate(classifiers.items()):
    classifier.fit(X, y)
Beispiel #26
0
def get_model(model_or_name, threads=-1, classify=False, seed=0):
    regression_models = {
        'xgboost': (XGBRegressor(max_depth=6,
                                 n_jobs=threads,
                                 random_state=seed), 'XGBRegressor'),
        'lightgbm': (LGBMRegressor(n_jobs=threads,
                                   random_state=seed,
                                   verbose=-1), 'LGBMRegressor'),
        'randomforest':
        (RandomForestRegressor(n_estimators=100,
                               n_jobs=threads), 'RandomForestRegressor'),
        'adaboost': (AdaBoostRegressor(), 'AdaBoostRegressor'),
        'linear': (LinearRegression(), 'LinearRegression'),
        'elasticnet': (ElasticNetCV(positive=True), 'ElasticNetCV'),
        'lasso': (LassoCV(positive=True), 'LassoCV'),
        'ridge': (Ridge(), 'Ridge'),
        'xgb.1k': (XGBRegressor(max_depth=6,
                                n_estimators=1000,
                                n_jobs=threads,
                                random_state=seed), 'XGBRegressor.1K'),
        'xgb.10k': (XGBRegressor(max_depth=6,
                                 n_estimators=10000,
                                 n_jobs=threads,
                                 random_state=seed), 'XGBRegressor.10K'),
        'lgbm.1k': (LGBMRegressor(n_estimators=1000,
                                  n_jobs=threads,
                                  random_state=seed,
                                  verbose=-1), 'LGBMRegressor.1K'),
        'lgbm.10k': (LGBMRegressor(n_estimators=10000,
                                   n_jobs=threads,
                                   random_state=seed,
                                   verbose=-1), 'LGBMRegressor.10K'),
        'rf.1k':
        (RandomForestRegressor(n_estimators=1000,
                               n_jobs=threads), 'RandomForestRegressor.1K'),
        'rf.10k': (RandomForestRegressor(n_estimators=10000, n_jobs=threads),
                   'RandomForestRegressor.10K')
    }

    classification_models = {
        'xgboost': (XGBClassifier(max_depth=6,
                                  n_jobs=threads,
                                  random_state=seed), 'XGBClassifier'),
        'lightgbm': (LGBMClassifier(n_jobs=threads,
                                    random_state=seed,
                                    verbose=-1), 'LGBMClassifier'),
        'randomforest':
        (RandomForestClassifier(n_estimators=100,
                                n_jobs=threads), 'RandomForestClassifier'),
        'adaboost': (AdaBoostClassifier(), 'AdaBoostClassifier'),
        'logistic': (LogisticRegression(), 'LogisticRegression'),
        'gaussian': (GaussianProcessClassifier(), 'GaussianProcessClassifier'),
        'knn': (KNeighborsClassifier(), 'KNeighborsClassifier'),
        'bayes': (GaussianNB(), 'GaussianNB'),
        'svm': (SVC(), 'SVC'),
        'xgb.1k': (XGBClassifier(max_depth=6,
                                 n_estimators=1000,
                                 n_jobs=threads,
                                 random_state=seed), 'XGBClassifier.1K'),
        'xgb.10k': (XGBClassifier(max_depth=6,
                                  n_estimators=10000,
                                  n_jobs=threads,
                                  random_state=seed), 'XGBClassifier.10K'),
        'lgbm.1k': (LGBMClassifier(n_estimators=1000,
                                   n_jobs=threads,
                                   random_state=seed,
                                   verbose=-1), 'LGBMClassifier.1K'),
        'lgbm.10k': (LGBMClassifier(n_estimators=1000,
                                    n_jobs=threads,
                                    random_state=seed,
                                    verbose=-1), 'LGBMClassifier.10K'),
        'rf.1k':
        (RandomForestClassifier(n_estimators=1000,
                                n_jobs=threads), 'RandomForestClassifier.1K'),
        'rf.10k': (RandomForestClassifier(n_estimators=10000, n_jobs=threads),
                   'RandomForestClassifier.10K')
    }

    if isinstance(model_or_name, str):
        if classify:
            model_and_name = classification_models.get(model_or_name.lower())
        else:
            model_and_name = regression_models.get(model_or_name.lower())
        if not model_and_name:
            raise Exception("unrecognized model: '{}'".format(model_or_name))
        else:
            model, name = model_and_name
    else:
        model = model_or_name
        name = re.search("\w+", str(model)).group(0)

    return model, name
Beispiel #27
0
    mask = NMBA > 0
    NMBA = mask * 1
    X = pd.concat([NMBA, Age, Berlin, Sex, Weight], axis=1)

collist = list(X.columns)
imp = Imputer(missing_values='NaN', strategy='most_frequent', axis=0)
imp.fit(X)
X = imp.transform(X)
X = pd.DataFrame(X, columns=collist)
X_train, X_test, Y_train, Y_test = \
    train_test_split(X,Y,test_size=0.1,random_state=1)

# Kernel
# myKernel = kernels.Sum(kernels.Matern(), kernels.RBF())

# myKernel = kernels.Sum(myKernel,kernels.RationalQuadratic())
# myKernel = kernels.Sum(myKernel,kernels.DotProduct())
myKernel = kernels.RBF()
myKernel = kernels.Sum(myKernel, kernels.DotProduct())
myKernel = kernels.Sum(myKernel, kernels.ConstantKernel())
# myKernel = kernels.Product(myKernel, kernels.DotProduct())
# myKernel = kernels.Sum(myKernel,kernels.ConstantKernel())
model = GaussianProcessClassifier(kernel=myKernel, warm_start=True, n_jobs=2)
model.fit(X_train, Y_train)
y_pred = model.predict(X_test)
predictions = [round(value) for value in y_pred]
accuracy = accuracy_score(Y_test, predictions)

print(round(accuracy, 2))
# filename = 'gp.pkl'
# pickle.dump(model, open(filename, 'wb'))
Beispiel #28
0
maxIter = 1000
tolerance = 1e-3

svc = SVC()
svmLinear = SVC(kernel='linear')
ridge = lm.SGDClassifier(loss='squared_loss',
                         penalty='l2',
                         alpha=0.5,
                         max_iter=maxIter)
logisitc = lm.LogisticRegression()
cartTree = tree.DecisionTreeClassifier()

from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import RBF

gp = GaussianProcessClassifier(kernel=RBF(), multi_class='one_vs_one')

# Bagging
from sklearn.ensemble import BaggingClassifier

cartTree_bagging = BaggingClassifier(cartTree,
                                     max_samples=0.7,
                                     max_features=1.0)

# Neural Network
from sklearn.neural_network import MLPClassifier
nn = MLPClassifier(solver='lbfgs', alpha=1e-6,\
                            hidden_layer_sizes=[18,12], random_state=1)

modelList = [['ridge', ridge], ['RBF SVM', svc], ['Linear SVM', svmLinear],
             ['Logistic', logisitc], ['CART Decision Tree', cartTree],
Beispiel #29
0
classifiers = {
    'mock': MockClassifier(),
    'nbayes': GaussianNB(),
    'logistic': LogisticRegression(random_state=42),
    #'adao': our.AdaBoostClassifier(n_estimators=200),
    'adas': their.AdaBoostClassifier(n_estimators=200, random_state=42),
    'forest': RandomForestClassifier(n_estimators=200, random_state=42),
    'mlp': MLPClassifier(random_state=42),
    'svm': SVC(probability=True, random_state=42),
    'knn': KNeighborsClassifier(3),
    'svc-linear': SVC(kernel="linear",
                      C=0.025,
                      probability=True,
                      random_state=42),
    'svc-rbf': SVC(gamma=2, C=1, probability=True, random_state=42),
    'gp': GaussianProcessClassifier(1.0 * RBF(1.0), random_state=42),
    'tree': DecisionTreeClassifier(max_depth=5, random_state=42),
    'qda': QuadraticDiscriminantAnalysis(reg_param=0.1),
    'lda': LinearDiscriminantAnalysis()
}

score_types = {
    'mock': 'predict_proba',
    'nbayes': 'predict_proba',
    'logistic': 'predict_proba',
    #'adao': 'predict_proba',
    'adas': 'predict_proba',
    'forest': 'predict_proba',
    'mlp': 'predict_proba',
    'svm': 'sigmoid',
    'knn': 'predict_proba',
Beispiel #30
0
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import RBF, DotProduct

from sklearn import metrics

from sklearn.model_selection import train_test_split

import matplotlib.pyplot as plt

train, test = train_test_split(importation.t, test_size=0.1)
spamtrain, spamtest = train_test_split(importation.valspam, test_size=0.1)

kernels = [1.0 * RBF(length_scale=1.0), 1.0 * DotProduct(sigma_0=1.0)**2]

for i, kernel in enumerate(kernels):
    clf = GaussianProcessClassifier(kernel=kernel, warm_start=True)

    clf.fit(train, spamtrain)

    expected = spamtest
    predicted = clf.predict(test)

    print(clf.score(train, spamtrain))

    #print("Classification report for classifier %s:\n%s\n"
    #     % (clf, metrics.classification_report(expected, predicted)))

    #print("Confusion matrix:\n%s" % metrics.confusion_matrix(expected, predicted))

    #print("LA SUITE...\n\n\n\n")
#remained_feature_indices=np.where(mask==1)

selected_features_Oulu_data = Oulu_data_train * mask
#selected_features_Oulu_data_test=Oulu_data_test*mask

print(selected_features_Oulu_data.shape)
print(Oulu_labels_train.shape)
#print(selected_features_Oulu_data_test.shape)
#print(Oulu_labels_test.shape)

#the gussian process classification
kernel = 1.0 * RBF(214)

gpc = GaussianProcessClassifier(kernel=kernel,
                                n_restarts_optimizer=5,
                                random_state=None,
                                multi_class="one_vs_rest",
                                max_iter_predict=100,
                                n_jobs=-1)
gpc = gpc.fit(selected_features_Oulu_data, Oulu_labels_train)

print('')
print('accuracy on trainingset:',
      gpc.score(selected_features_Oulu_data, Oulu_labels_train))
#print('accuracy on testset of the hospital:',gpc.score(selected_features_Oulu_data_test, Oulu_labels_test))
'''
probs = gpc.predict_proba(selected_features_Oulu_data_test) 
probs = probs[:, 1]  
auc = roc_auc_score(Oulu_labels_test, probs)  
print('AUC: %.2f' % auc)
print(f1_score(Oulu_labels_test, gpc.predict(selected_features_Oulu_data_test), average='weighted')) 
'''
Beispiel #32
0
def feature_training(feature_path,
                     results_path,
                     seg_name,
                     train_split=90,
                     cls_set="basic",
                     save_confusion_matrices=True):
    """
    Function to run the complete training pipeline
    :param feature_path: Path to the directory containing the NPY files with the features
    :param results_path: Path to the directory where the results should be saved
    :param seg_name: Name of the segmentation
    :param train_split: Percentage of data used for training, e.g. 80
    :param cls_set: Either a list of names of classifiers to be used, or a the single name of classifier; if not used
    all classifiers will be used
    :param save_confusion_matrices: True if the confusion matrices should be used
    :return: List of accuracies
    """
    split = (train_split / 100, (100 - train_split) / 100)
    (train_data, test_data), (train_label,
                              test_label) = load_features(split, feature_path)

    names = [
        "Nearest Neighbors", "Linear SVM", "RBF SVM", "Gaussian Process",
        "Decision Tree", "Random Forest", "Neural Net", "AdaBoost",
        "Naive Bayes", "QDA"
    ]

    classifiers = [
        KNeighborsClassifier(3),
        SVC(kernel="linear", C=0.025),
        SVC(gamma=2, C=1),
        GaussianProcessClassifier(1.0 * RBF(1.0)),
        DecisionTreeClassifier(max_depth=5),
        RandomForestClassifier(max_depth=5, n_estimators=10, max_features=1),
        MLPClassifier(alpha=1, max_iter=1000),
        AdaBoostClassifier(),
        GaussianNB(),
        QuadraticDiscriminantAnalysis()
    ]

    if type(cls_set) == list:
        if all(x in names for x in cls_set):
            classifiers = [
                classifiers[names.index(classifier)] for classifier in cls_set
            ]
            names = cls_set
    elif cls_set in names:
        classifiers = [classifiers[names.index(cls_set)]]
        names = [cls_set]

    confusion_matrices, accuracies = features_train_test(
        classifiers, train_data, train_label, test_data, test_label)

    accuracies = dict(zip(names, accuracies))

    if save_confusion_matrices:
        confusion_matrices = dict(zip(names, confusion_matrices))
        with open(
                os.path.join(results_path,
                             "confusion_matrices_seg_{}".format(seg_name)),
                "wb") as matrix_file:
            pickle.dump(confusion_matrices, matrix_file)

    return accuracies
def trainPredict(subjectid, makeplot=False):
    print("testing participant " + subjectid)
    # Load training data from the file matlab generates
    traindata = np.genfromtxt('csvdata/' + subjectid +
                              '_sim.csv', delimiter=',',
                              missing_values=['NaN', 'nan'],
                              filling_values=None)
    # Clean + downsample this data
    trainx, trainy = cleandata(traindata, downsamplefactor=20)

    # Train a Gaussian Process
    anisokern = kernels.RBF()  # default kernel
    gp = GaussianProcessClassifier(kernel=anisokern)  # Initialize the GPC
    gp.fit(trainx, trainy)  # train this class on the data
    trainx = trainy = None  # Discard all training data to preserve memory

    # load test data
    testdata = np.genfromtxt('csvdata/' + subjectid +
                             '_rival.csv', delimiter=',',
                             missing_values=['NaN', 'nan'],
                             filling_values=None)
    testx, testy = cleandata(testdata, downsamplefactor=4)  # clean data

    testdata = None  # clear from memory
    # work out percentage in percept for each data point:
    percentages, nextpercept = assign_percentage(testy)

    # get a prediction for all points in the test data:
    predicty = gp.predict(testx)
    proby = gp.predict_proba(testx)

    if makeplot:
        summaryplot(participant, testx, testy, predicty, proby, gp)

    # Summarise prediction by reported percept
    meanprediction = {'mean' + percept:
                      proby[testy == value, 1].mean()
                      for percept, value in perceptindices.iteritems()}
    predictiondev = {'stdev' + percept:
                     proby[testy == value, 1].std()
                     for percept, value in perceptindices.iteritems()}
    predictionaccuracy = {'acc' + percept:
                          (predicty[testy == value] ==
                           testy[testy == value]).mean()
                          for percept, value in perceptindices.iteritems()}
    # Summarise prediction by percentage in percept
    predictioncourse = {'timecourse' + percept + str(cutoff):
                        proby[(testy == value) &
                              (percentages < cutoff) &
                              (percentages > cutoff - 0.1), 1].mean()
                        for percept, value in perceptindices.iteritems()
                        for cutoff in np.linspace(0.1, 1, 10)}

    # Summarise mixed percept time courses by the next percept
    nextcourse = {'nextcourse' + percept + str(cutoff):
                  proby[(testy == 0) &
                        (percentages < cutoff) &
                        (percentages > cutoff - 0.1) &
                        (nextpercept == perceptindices[percept]), 1].mean()
                  for percept in ['highfreq', 'lowfreq']
                  for cutoff in np.linspace(0.1, 1, 10)}

    afterdominant = {'after' + percept + "_" + after + "_" + str(cutoff):
                     proby[(testy == perceptindices[percept]) &
                           (percentages < cutoff) &
                           (percentages > cutoff - 0.1) &
                           (nextpercept == perceptindices[after]), 1].mean()
                     for percept, after in [('highfreq', 'mixed'),
                                            ('highfreq', 'lowfreq'),
                                            ('lowfreq', 'mixed'),
                                            ('lowfreq', 'highfreq')]
                     for cutoff in np.linspace(0.1, 1, 10)}

    # Only return the summarised data
    return meanprediction, predictiondev, predictionaccuracy, \
        predictioncourse, nextcourse, afterdominant
Beispiel #34
0
def do_machinea_leaning_stuff(train_X, train_Y, test_X, test_Y):
    returnValue = []
    test_predict_Y = []

    # de facut ceva cu acest rezultat
    #f_classif(X, y);

    #Algoritmi de clasificare
    rfc = RandomForestClassifier(n_estimators=100, max_depth=2, random_state=0)
    rfc.fit(train_X, train_Y)
    test_predict_Y = rfc.predict(test_X)
    returnValue.append({
        'name':
        "RandomForestClassifier",
        'score':
        rfc.score(test_X, test_Y),
        'accuracy_naive':
        (test_Y != test_predict_Y).sum() * 1.0 / len(test_predict_Y),
        'accuracy_score':
        accuracy_score(test_Y, test_predict_Y),
        'classification_report':
        classification_report(test_Y, test_predict_Y)
    })

    etc = ExtraTreesClassifier()
    etc.fit(train_X, train_Y)
    test_predict_Y = etc.predict(test_X)
    returnValue.append({
        'name':
        "ExtraTreesClassifier",
        'score':
        etc.score(test_X, test_Y),
        'accuracy_naive':
        (test_Y != test_predict_Y).sum() * 1.0 / len(test_predict_Y),
        'accuracy_score':
        accuracy_score(test_Y, test_predict_Y),
        'classification_report':
        classification_report(test_Y, test_predict_Y)
    })

    gpc = GaussianProcessClassifier(random_state=0)
    gpc.fit(train_X, train_Y)
    test_predict_Y = gpc.predict(test_X)
    # TODO : poate folosim si asta print(gpc.predict_proba(test_X))
    returnValue.append({
        'name':
        "GaussianProcessClassifier",
        'score':
        gpc.score(test_X, test_Y),
        'accuracy_naive':
        (test_Y != test_predict_Y).sum() * 1.0 / len(test_predict_Y),
        'accuracy_score':
        accuracy_score(test_Y, test_predict_Y),
        'classification_report':
        classification_report(test_Y, test_predict_Y)
    })

    pac = PassiveAggressiveClassifier(max_iter=1000, random_state=0, tol=1e-3)
    pac.fit(train_X, train_Y)
    test_predict_Y = pac.predict(test_X)
    returnValue.append({
        'name':
        "PassiveAggressiveClassifier",
        'score':
        pac.score(test_X, test_Y),
        'accuracy_naive':
        (test_Y != test_predict_Y).sum() * 1.0 / len(test_predict_Y),
        'accuracy_score':
        accuracy_score(test_Y, test_predict_Y),
        'classification_report':
        classification_report(test_Y, test_predict_Y)
    })

    rc = RidgeClassifier()
    rc.fit(train_X, train_Y)
    test_predict_Y = rc.predict(test_X)
    returnValue.append({
        'name':
        "RidgeClassifier",
        'score':
        rc.score(test_X, test_Y),
        'accuracy_naive':
        (test_Y != test_predict_Y).sum() * 1.0 / len(test_predict_Y),
        'accuracy_score':
        accuracy_score(test_Y, test_predict_Y),
        'classification_report':
        classification_report(test_Y, test_predict_Y)
    })

    sgdc = SGDClassifier(max_iter=1000, tol=1e-3)
    sgdc.fit(train_X, train_Y)
    test_predict_Y = sgdc.predict(test_X)
    returnValue.append({
        'name':
        "SGDClassifier",
        'score':
        sgdc.score(test_X, test_Y),
        'accuracy_naive':
        (test_Y != test_predict_Y).sum() * 1.0 / len(test_predict_Y),
        'accuracy_score':
        accuracy_score(test_Y, test_predict_Y),
        'classification_report':
        classification_report(test_Y, test_predict_Y)
    })

    bnb = BernoulliNB()
    bnb.fit(train_X, train_Y)
    test_predict_Y = bnb.predict(test_X)
    returnValue.append({
        'name':
        "BernoulliNB",
        'score':
        bnb.score(test_X, test_Y),
        'accuracy_naive':
        (test_Y != test_predict_Y).sum() * 1.0 / len(test_predict_Y),
        'accuracy_score':
        accuracy_score(test_Y, test_predict_Y),
        'classification_report':
        classification_report(test_Y, test_predict_Y)
    })

    knnc = KNeighborsClassifier(n_neighbors=3)
    knnc.fit(train_X, train_Y)
    test_predict_Y = knnc.predict(test_X)
    returnValue.append({
        'name':
        "KNeighborsClassifier",
        'score':
        knnc.score(test_X, test_Y),
        'accuracy_naive':
        (test_Y != test_predict_Y).sum() * 1.0 / len(test_predict_Y),
        'accuracy_score':
        accuracy_score(test_Y, test_predict_Y),
        'classification_report':
        classification_report(test_Y, test_predict_Y)
    })

    mlpc = MLPClassifier()
    mlpc.fit(train_X, train_Y)
    test_predict_Y = mlpc.predict(test_X)
    returnValue.append({
        'name':
        "MLPClassifier",
        'score':
        mlpc.score(test_X, test_Y),
        'accuracy_naive':
        (test_Y != test_predict_Y).sum() * 1.0 / len(test_predict_Y),
        'accuracy_score':
        accuracy_score(test_Y, test_predict_Y),
        'classification_report':
        classification_report(test_Y, test_predict_Y)
    })

    label_prop_model = LabelPropagation()
    rng = np.random.RandomState(42)
    random_unlabeled_points = rng.rand(len(train_Y)) < 0.3
    labels = np.copy(train_Y)
    labels[random_unlabeled_points] = -1
    label_prop_model.fit(train_X, labels)
    test_predict_Y = label_prop_model.predict(test_X)
    returnValue.append({
        'name':
        "LabelPropagation",
        'score':
        label_prop_model.score(test_X, test_Y),
        'accuracy_naive':
        (test_Y != test_predict_Y).sum() * 1.0 / len(test_predict_Y),
        'accuracy_score':
        accuracy_score(test_Y, test_predict_Y),
        'classification_report':
        classification_report(test_Y, test_predict_Y)
    })

    lsvc = LinearSVC(random_state=0, tol=1e-5)
    lsvc.fit(train_X, train_Y)
    test_predict_Y = lsvc.predict(test_X)
    returnValue.append({
        'name':
        "LinearSVC",
        'score':
        label_prop_model.score(test_X, test_Y),
        'accuracy_naive':
        (test_Y != test_predict_Y).sum() * 1.0 / len(test_predict_Y),
        'accuracy_score':
        accuracy_score(test_Y, test_predict_Y),
        'classification_report':
        classification_report(test_Y, test_predict_Y)
    })

    svc = SVC(gamma='auto')
    svc.fit(train_X, train_Y)
    test_predict_Y = svc.predict(test_X)
    returnValue.append({
        'name':
        "SVC",
        'score':
        svc.score(test_X, test_Y),
        'accuracy_naive':
        (test_Y != test_predict_Y).sum() * 1.0 / len(test_predict_Y),
        'accuracy_score':
        accuracy_score(test_Y, test_predict_Y),
        'classification_report':
        classification_report(test_Y, test_predict_Y)
    })

    dtc = DecisionTreeClassifier(random_state=0)
    dtc.fit(train_X, train_Y)
    test_predict_Y = dtc.predict(test_X)
    returnValue.append({
        'name':
        "DecisionTreeClassifier",
        'score':
        dtc.score(test_X, test_Y),
        'accuracy_naive':
        (test_Y != test_predict_Y).sum() * 1.0 / len(test_predict_Y),
        'accuracy_score':
        accuracy_score(test_Y, test_predict_Y),
        'classification_report':
        classification_report(test_Y, test_predict_Y)
    })

    cccv = CalibratedClassifierCV()
    cccv.fit(train_X, train_Y)
    test_predict_Y = cccv.predict(test_X)
    returnValue.append({
        'name':
        "CalibratedClassifierCV",
        'score':
        cccv.score(test_X, test_Y),
        'accuracy_naive':
        (test_Y != test_predict_Y).sum() * 1.0 / len(test_predict_Y),
        'accuracy_score':
        accuracy_score(test_Y, test_predict_Y),
        'classification_report':
        classification_report(test_Y, test_predict_Y)
    })

    return returnValue
Beispiel #35
0
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import RBF, DotProduct


xx, yy = np.meshgrid(np.linspace(-3, 3, 50),
                     np.linspace(-3, 3, 50))
rng = np.random.RandomState(0)
X = rng.randn(200, 2)
Y = np.logical_xor(X[:, 0] > 0, X[:, 1] > 0)

# fit the model
plt.figure(figsize=(10, 5))
kernels = [1.0 * RBF(length_scale=1.0), 1.0 * DotProduct(sigma_0=1.0)**2]
for i, kernel in enumerate(kernels):
    clf = GaussianProcessClassifier(kernel=kernel, warm_start=True).fit(X, Y)

    # plot the decision function for each datapoint on the grid
    Z = clf.predict_proba(np.vstack((xx.ravel(), yy.ravel())).T)[:, 1]
    Z = Z.reshape(xx.shape)

    plt.subplot(1, 2, i + 1)
    image = plt.imshow(Z, interpolation='nearest',
                       extent=(xx.min(), xx.max(), yy.min(), yy.max()),
                       aspect='auto', origin='lower', cmap=plt.cm.PuOr_r)
    contours = plt.contour(xx, yy, Z, levels=[0], linewidths=2,
                           linetypes='--')
    plt.scatter(X[:, 0], X[:, 1], s=30, c=Y, cmap=plt.cm.Paired)
    plt.xticks(())
    plt.yticks(())
    plt.axis([-3, 3, -3, 3])
Beispiel #36
0
def GPC():
    return GaussianProcessClassifier(kernel)
from matplotlib import pyplot as plt

from sklearn.metrics.classification import accuracy_score, log_loss
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import RBF


# Generate data
train_size = 50
rng = np.random.RandomState(0)
X = rng.uniform(0, 5, 100)[:, np.newaxis]
y = np.array(X[:, 0] > 2.5, dtype=int)

# Specify Gaussian Processes with fixed and optimized hyperparameters
gp_fix = GaussianProcessClassifier(kernel=1.0 * RBF(length_scale=1.0), optimizer=None)
gp_fix.fit(X[:train_size], y[:train_size])

gp_opt = GaussianProcessClassifier(kernel=1.0 * RBF(length_scale=1.0))
gp_opt.fit(X[:train_size], y[:train_size])

print("Log Marginal Likelihood (initial): %.3f" % gp_fix.log_marginal_likelihood(gp_fix.kernel_.theta))
print("Log Marginal Likelihood (optimized): %.3f" % gp_opt.log_marginal_likelihood(gp_opt.kernel_.theta))

print(
    "Accuracy: %.3f (initial) %.3f (optimized)"
    % (
        accuracy_score(y[:train_size], gp_fix.predict(X[:train_size])),
        accuracy_score(y[:train_size], gp_opt.predict(X[:train_size])),
    )
)
Beispiel #38
0
        size(W, HEIGHT+dy+40)
else:
    def pltshow(mplpyplot):
        mplpyplot.show()
# nodebox section end



# Generate data
train_size = 50
rng = np.random.RandomState(0)
X = rng.uniform(0, 5, 100)[:, np.newaxis]
y = np.array(X[:, 0] > 2.5, dtype=int)

# Specify Gaussian Processes with fixed and optimized hyperparameters
gp_fix = GaussianProcessClassifier(kernel=1.0 * RBF(length_scale=1.0),
                                   optimizer=None)
gp_fix.fit(X[:train_size], y[:train_size])

gp_opt = GaussianProcessClassifier(kernel=1.0 * RBF(length_scale=1.0))
gp_opt.fit(X[:train_size], y[:train_size])

print("Log Marginal Likelihood (initial): %.3f"
      % gp_fix.log_marginal_likelihood(gp_fix.kernel_.theta))
print("Log Marginal Likelihood (optimized): %.3f"
      % gp_opt.log_marginal_likelihood(gp_opt.kernel_.theta))

print("Accuracy: %.3f (initial) %.3f (optimized)"
      % (accuracy_score(y[:train_size], gp_fix.predict(X[:train_size])),
         accuracy_score(y[:train_size], gp_opt.predict(X[:train_size]))))
print("Log-loss: %.3f (initial) %.3f (optimized)"
      % (log_loss(y[:train_size], gp_fix.predict_proba(X[:train_size])[:, 1]),
    ## TODO: Add any additional arguments that you will need to pass into your model
    parser.add_argument('--length_scale', type=float, default=1.0)
    parser.add_argument('--kernel_scaling', type=float, default=1.0)

    # args holds all passed-in arguments
    args = parser.parse_args()

    # Read in csv training file
    training_dir = args.data_dir
    train_data = pd.read_csv(os.path.join(training_dir, "train.csv"),
                             header=None,
                             names=None)

    # Labels are in the first column
    train_y = train_data.iloc[:, 0]
    train_x = train_data.iloc[:, 1:]

    ## --- Your code here --- ##

    ## TODO: Define a model
    model = GaussianProcessClassifier(args.kernel_scaling *
                                      RBF(args.length_scale))

    ## TODO: Train the model
    model.fit(train_x, train_y)

    ## --- End of your code  --- ##

    # Save the trained model
    joblib.dump(model, os.path.join(args.model_dir, "model.joblib"))
Beispiel #40
0
Ntrain_vec = [20, 50, 100, 500, 1000, 1500]

for i in Ntrain_vec:

    #N = np.int(1000)
    Ntrain = np.int(i)
    #Ntest = np.int(100)

    Xtrain = X[0:Ntrain - 1, :]
    ytrain = y[0:Ntrain - 1]
    Xtest = X[Ntrain:N, :]
    ytest = y[Ntrain:N]

    #kernel = 1.0 * RBF([20.0]) # isotropic kernel #Test error rate = 0.89
    kernel = DotProduct(20.0)  #Test error rate = 0.14
    gpc_rbf = GaussianProcessClassifier(kernel=kernel).fit(Xtrain, ytrain)
    yp_train = gpc_rbf.predict(Xtrain)

    train_error_rate = np.mean(np.not_equal(yp_train, ytrain))
    yp_test = gpc_rbf.predict(Xtest)
    test_error_rate = []
    test_error_rate.append(np.mean(np.not_equal(yp_test, ytest)))

    print('Training error rate')
    print(train_error_rate)
    print('Test error rate')
    print(test_error_rate)
"""
=====================================================
Confusion matrix for GP regression
=====================================================
Beispiel #41
0
def test_lml_improving(kernel):
    # Test that hyperparameter-tuning improves log-marginal likelihood.
    gpc = GaussianProcessClassifier(kernel=kernel).fit(X, y)
    assert_greater(gpc.log_marginal_likelihood(gpc.kernel_.theta),
                   gpc.log_marginal_likelihood(kernel.theta))
Beispiel #42
0
logging.basicConfig(level=logging.DEBUG)

h = .02  # step size in the mesh

names = [
    "Nearest Neighbors", "Linear SVM", "RBF SVM", "Gaussian Process",
    "Decision Tree", "Random Forest", "Neural Net", "AdaBoost", "Naive Bayes",
    "QDA", "Custom NN"
]

classifiers = [
    KNeighborsClassifier(3),
    SVC(kernel="linear", C=0.025),
    SVC(gamma=2, C=1),
    GaussianProcessClassifier(1.0 * RBF(1.0), warm_start=True),
    DecisionTreeClassifier(max_depth=5),
    RandomForestClassifier(max_depth=5, n_estimators=10, max_features=1),
    MLPClassifier(alpha=1),
    AdaBoostClassifier(),
    GaussianNB(),
    QuadraticDiscriminantAnalysis(),
    NeuralNetwork([
        NeuronLayer(10, 2, 'leaky_relu', True),
        NeuronLayer(1, 10, 'sigmoid', True)
    ],
                  learning_rate=0.1,
                  step_decay_factor=0.99)
]

X, y = make_classification(n_features=2,
Beispiel #43
0
#
# Initiate classifiers
#

names = [
    "Nearest Neighbors", "Linear SVM", "RBF SVM", "Gaussian Process",
    "Decision Tree", "Random Forest", "Neural Net", "AdaBoost", "Naive Bayes",
    "QDA"
]

classifiers = [
    KNeighborsClassifier(3),
    SVC(kernel="linear", C=0.025),
    SVC(gamma=2, C=1),
    GaussianProcessClassifier(),
    DecisionTreeClassifier(max_depth=15),
    RandomForestClassifier(max_depth=15, n_estimators=5, max_features=2),
    MLPClassifier(),
    AdaBoostClassifier(),
    GaussianNB()
]

# iterate over classifiers
for name, clf in zip(names, classifiers):
    clf.fit(X_train, y_train)
    score = clf.score(X_test, y_test)
    print('\n- Classifier:' + name + ' is scoring ' + str(score) + '.')

#
# Keeping the algo that outperforms the other without tweeking
    def selectNext(self):
        proba_in = np.ones(self.poolData[self.indicesUnknown].shape[0])
        proba_in *= self.model.predict_proba(
            self.poolData[self.indicesUnknown])[:, 1]

        predicted = []
        predictedFalse = []
        for p in proba_in:
            if p < 0.5:
                predicted.append(
                    0
                )  # if magically we know that the predicted label l is true, we can add it to training set and train
                predictedFalse.append(
                    1
                )  # if magically we know that the predicted label l is wrong, we add 1-l to training set and train
            else:
                predicted.append(1)
                predictedFalse.append(0)

        #so now think every case and train the classifier for each item separately to calculate expected error of classifier
        eRightList = []
        eWrongList = []
        for i in range(len(predicted)):
            if i in self.indicesUntrained:
                X_train = np.append(self.trainData, [self.poolData[i]], axis=0)
                y_trainRight = np.append(self.trainLabels, predicted[i])
                y_trainWrong = np.append(self.trainLabels, predictedFalse[i])
            else:
                X_train = copy.deepcopy(self.trainData)
                if self.voteAggMethod == 'majorityVoting':
                    selectedVote = self.aggregator.majorityVoting(
                        self.queriedVoteHistory, i)
                elif self.voteAggMethod == 'DS':
                    selectedVote = self.aggregator.DS(
                        self.queriedVoteHistory, self.queriedVoteOwnersHistory,
                        i)
                else:
                    print(
                        "no valid technique is selected, default vote aggregation technique -majotityVoting- is used!"
                    )
                    selectedVote = self.aggregator.majorityVoting(
                        self.queriedVoteHistory, i)
                y_trainRight = copy.deepcopy(self.trainLabels)
                y_trainWrong = copy.deepcopy(self.trainLabels)
                y_trainRight[np.where(self.indicesTrained == i)] = selectedVote
                y_trainWrong[np.where(self.indicesTrained == i)] = selectedVote
            self.model.fit(X_train, y_trainRight)
            scores = cross_val_score(self.model,
                                     X_train,
                                     y_trainRight,
                                     cv=5,
                                     scoring='f1')
            eRightList.append(scores.mean())
            self.model.fit(X_train, y_trainWrong)
            scores = cross_val_score(self.model,
                                     X_train,
                                     y_trainWrong,
                                     cv=5,
                                     scoring='f1')
            eWrongList.append(scores.mean())

        # update the active learner back
        self.model.fit(self.trainData, self.trainLabels)

        # creating bootstaps
        bootstraps = []
        for i in range(10):
            size = self.trainData.shape[0]
            bootstrap_idx = np.random.choice(range(size), size, replace=True)
            bst_x = self.trainData[bootstrap_idx, :]
            bst_y = self.trainLabels[bootstrap_idx]
            bootstraps.append([bst_x, bst_y])

        #defining ML classifiers to use in bootstrapped training sets
        clfs = [
            LogisticRegression(C=1., solver='lbfgs'),
            CalibratedClassifierCV(SVC(kernel="linear", C=0.025), cv=3),
            CalibratedClassifierCV(SVC(gamma=2, C=1), cv=3),
            GaussianProcessClassifier(1.0 * RBF(1.0)),
            CalibratedClassifierCV(DecisionTreeClassifier(max_depth=40), cv=3),
            CalibratedClassifierCV(RandomForestClassifier(max_depth=5,
                                                          n_estimators=10),
                                   cv=3),
            CalibratedClassifierCV(LinearSVC(class_weight='balanced', C=0.1),
                                   cv=3),
            AdaBoostClassifier(),
            GaussianNB(),
            CalibratedClassifierCV(RandomForestClassifier(n_estimators=50,
                                                          max_depth=40),
                                   cv=3)
        ]

        #apply multiprocessing to learn predicted label of each item in the pool by each ML classifier
        manager = multiprocessing.Manager()
        return_dict = manager.dict()
        jobs = []
        for i in range(10):
            p = multiprocessing.Process(target=self.bootstrap_computation,
                                        args=(i, clfs[i], bootstraps[i],
                                              return_dict))
            jobs.append(p)
            p.start()

        for proc in jobs:
            proc.join()

        predictions = []
        for key, value in return_dict.items():
            predictions.append(value)

        # calculate p_u values for each item
        p_u = []
        for i in range(len(predicted)):
            p_i = 0
            for j in range(len(predictions)):
                if predicted[i] == predictions[j][i]:
                    p_i += 1
            p_u.append(p_i / 10)

        # calculate expected error of each item in the pool
        expErrorList = []
        for i in range(len(p_u)):
            expErrorList.append(eWrongList[i] - p_u[i] *
                                (eWrongList[i] - eRightList[i]))

        # get the indexes of min n_instances
        expErrorList = np.array(expErrorList)
        idx = np.argsort(expErrorList)
        query_idx = idx[0:self.batchSize]
        selectedIndex = self.indicesUnknown[query_idx]
        # return the selected items that have minimum expected error
        return selectedIndex
Beispiel #45
0
def test_lml_precomputed():
    # Test that lml of optimized kernel is stored correctly.
    for kernel in kernels:
        gpc = GaussianProcessClassifier(kernel=kernel).fit(X, y)
        assert_almost_equal(gpc.log_marginal_likelihood(gpc.kernel_.theta),
                            gpc.log_marginal_likelihood(), 7)
Beispiel #46
0
Xs_train = []
Labels = []
for _ in range(5):
    d = resample(X_train, n_samples=502)
    Xs_train.append(d.iloc[:, :-1])
    Labels.append(d['labels'])

# # Gaussian Process Regression

# A task that was considerably more difficult was the prediction of MMSE, which itself is a very noisy measure [8]. We used Gaussian Process Regression[5] (GPR) to predict this measure using selected imaging features, demographic features as well as genetic features. The choice of GPR was motivated by the ability of such models to fit very noisy data [4].

# In[21]:

from sklearn.gaussian_process import GaussianProcessClassifier
gp = GaussianProcessClassifier(optimizer='Welch',
                               multi_class='one_vs_rest',
                               n_restarts_optimizer=5)

# In[22]:

gpt = []
for i in range(5):
    gp.fit(Xs_train[i], Labels[i])
    gpt.append(gp.predict(X_test))

# In[23]:

gpc = []
for i in range(len(y_test)):
    p = []
    for j in gpt:
# Design of experiments
X = np.array([[-4.61611719, -6.00099547],
              [4.10469096, 5.32782448],
              [0.00000000, -0.50000000],
              [-6.17289014, -4.6984743],
              [1.3109306, -6.93271427],
              [-5.03823144, 3.10584743],
              [-2.87600388, 6.74310541],
              [5.21301203, 4.26386883]])

# Observations
y = np.array(g(X) > 0, dtype=int)

# Instanciate and fit Gaussian Process Model
kernel = C(0.1, (1e-5, np.inf)) * DotProduct(sigma_0=0.1) ** 2
gp = GaussianProcessClassifier(kernel=kernel)
gp.fit(X, y)
print("Learned kernel: %s " % gp.kernel_)

# Evaluate real function and the predicted probability
res = 50
x1, x2 = np.meshgrid(np.linspace(- lim, lim, res),
                     np.linspace(- lim, lim, res))
xx = np.vstack([x1.reshape(x1.size), x2.reshape(x2.size)]).T

y_true = g(xx)
y_prob = gp.predict_proba(xx)[:, 1]
y_true = y_true.reshape((res, res))
y_prob = y_prob.reshape((res, res))

# Plot the probabilistic classification iso-values
from sklearn.datasets import load_iris
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import RBF
from sklearn.linear_model import LogisticRegression
X, y = load_iris(return_X_y=True)
kernel = 1.0 * RBF(1.0)
gpc = GaussianProcessClassifier(kernel=kernel, random_state=0).fit(X, y)
print(gpc.score(X, y)) 
clf = LogisticRegression(random_state=0, solver='lbfgs', multi_class='multinomial').fit(X, y)
print(clf.score(X,y))