Python GaussianProcessClassifier Beispiele, sklearn.gaussian_process.GaussianProcessClassifier Python Beispiele

Beispiel #1

0

Datei anzeigen

Datei: test_gpc.py Projekt: AlexanderFabisch/scikit-learn

def test_lml_improving():
    """ Test that hyperparameter-tuning improves log-marginal likelihood. """
    for kernel in kernels:
        if kernel == fixed_kernel: continue
        gpc = GaussianProcessClassifier(kernel=kernel).fit(X, y)
        assert_greater(gpc.log_marginal_likelihood(gpc.kernel_.theta),
                       gpc.log_marginal_likelihood(kernel.theta))

Beispiel #2

0

Datei anzeigen

Datei: test_gpc.py Projekt: 0664j35t3r/scikit-learn

def test_predict_consistent():
    """ Check binary predict decision has also predicted probability above 0.5.
    """
    for kernel in kernels:
        gpc = GaussianProcessClassifier(kernel=kernel).fit(X, y)
        assert_array_equal(gpc.predict(X),
                           gpc.predict_proba(X)[:, 1] >= 0.5)

Beispiel #3

0

Datei anzeigen

Datei: ml_utils.py Projekt: PaulZhutovsky/rsn_analysis

def build_classifier_gp(data, labels, **kwargs):
    linear_kernel = Sum(k1=Product(k1=DotProduct(sigma_0=0, sigma_0_bounds='fixed'), k2=ConstantKernel()),
                        k2=ConstantKernel())
    gp_clf = GaussianProcessClassifier(kernel=linear_kernel)
    gp_clf.fit(data, labels)
    id_pos_class = gp_clf.classes_ == labels.max()
    return gp_clf, gp_clf.predict_proba(data)[:, id_pos_class]

Beispiel #4

0

Datei anzeigen

Datei: test_gpc.py Projekt: allefpablo/scikit-learn

def test_converged_to_local_maximum(kernel):
    # Test that we are in local maximum after hyperparameter-optimization.
    gpc = GaussianProcessClassifier(kernel=kernel).fit(X, y)

    lml, lml_gradient = \
        gpc.log_marginal_likelihood(gpc.kernel_.theta, True)

    assert np.all((np.abs(lml_gradient) < 1e-4) |
                  (gpc.kernel_.theta == gpc.kernel_.bounds[:, 0]) |
                  (gpc.kernel_.theta == gpc.kernel_.bounds[:, 1]))

Beispiel #5

0

Datei anzeigen

Datei: test_gpc.py Projekt: allefpablo/scikit-learn

def test_multi_class(kernel):
    # Test GPC for multi-class classification problems.
    gpc = GaussianProcessClassifier(kernel=kernel)
    gpc.fit(X, y_mc)

    y_prob = gpc.predict_proba(X2)
    assert_almost_equal(y_prob.sum(1), 1)

    y_pred = gpc.predict(X2)
    assert_array_equal(np.argmax(y_prob, 1), y_pred)

Beispiel #6

0

Datei anzeigen

Datei: test_gpc.py Projekt: allefpablo/scikit-learn

def test_lml_gradient(kernel):
    # Compare analytic and numeric gradient of log marginal likelihood.
    gpc = GaussianProcessClassifier(kernel=kernel).fit(X, y)

    lml, lml_gradient = gpc.log_marginal_likelihood(kernel.theta, True)
    lml_gradient_approx = \
        approx_fprime(kernel.theta,
                      lambda theta: gpc.log_marginal_likelihood(theta,
                                                                False),
                      1e-10)

    assert_almost_equal(lml_gradient, lml_gradient_approx, 3)

Beispiel #7

0

Datei anzeigen

Datei: test_gpc.py Projekt: allefpablo/scikit-learn

def test_multi_class_n_jobs(kernel):
    # Test that multi-class GPC produces identical results with n_jobs>1.
    gpc = GaussianProcessClassifier(kernel=kernel)
    gpc.fit(X, y_mc)

    gpc_2 = GaussianProcessClassifier(kernel=kernel, n_jobs=2)
    gpc_2.fit(X, y_mc)

    y_prob = gpc.predict_proba(X2)
    y_prob_2 = gpc_2.predict_proba(X2)
    assert_almost_equal(y_prob, y_prob_2)

Beispiel #8

0

Datei anzeigen

Datei: test_gpc.py Projekt: BasilBeirouti/scikit-learn

def test_random_starts():
    # Test that an increasing number of random-starts of GP fitting only
    # increases the log marginal likelihood of the chosen theta.
    n_samples, n_features = 25, 2
    rng = np.random.RandomState(0)
    X = rng.randn(n_samples, n_features) * 2 - 1
    y = (np.sin(X).sum(axis=1) + np.sin(3 * X).sum(axis=1)) > 0

    kernel = C(1.0, (1e-2, 1e2)) \
        * RBF(length_scale=[1e-3] * n_features,
              length_scale_bounds=[(1e-4, 1e+2)] * n_features)
    last_lml = -np.inf
    for n_restarts_optimizer in range(5):
        gp = GaussianProcessClassifier(
            kernel=kernel, n_restarts_optimizer=n_restarts_optimizer,
            random_state=0).fit(X, y)
        lml = gp.log_marginal_likelihood(gp.kernel_.theta)
        assert_greater(lml, last_lml - np.finfo(np.float32).eps)
        last_lml = lml

Beispiel #9

0

Datei anzeigen

Datei: test_gpc.py Projekt: allefpablo/scikit-learn

def test_custom_optimizer(kernel):
    # Test that GPC can use externally defined optimizers.
    # Define a dummy optimizer that simply tests 50 random hyperparameters
    def optimizer(obj_func, initial_theta, bounds):
        rng = np.random.RandomState(0)
        theta_opt, func_min = \
            initial_theta, obj_func(initial_theta, eval_gradient=False)
        for _ in range(50):
            theta = np.atleast_1d(rng.uniform(np.maximum(-2, bounds[:, 0]),
                                              np.minimum(1, bounds[:, 1])))
            f = obj_func(theta, eval_gradient=False)
            if f < func_min:
                theta_opt, func_min = theta, f
        return theta_opt, func_min

    gpc = GaussianProcessClassifier(kernel=kernel, optimizer=optimizer)
    gpc.fit(X, y_mc)
    # Checks that optimizer improved marginal likelihood
    assert_greater(gpc.log_marginal_likelihood(gpc.kernel_.theta),
                   gpc.log_marginal_likelihood(kernel.theta))

Beispiel #10

0

Datei anzeigen

Datei: gaussian-process.py Projekt: janfreyberg/rivalry-eeg-gaussian-process

def trainModel(subjectid):
    # Load training data from the file matlab generates
    traindata = np.genfromtxt('csvdata/' + subjectid +
                              '_sim.csv', delimiter=',',
                              missing_values=['NaN', 'nan'],
                              filling_values=None)
    trainx, trainy = cleandata(traindata, downsamplefactor=20)

    # Train a Gaussian Process
    anisokern = kernels.RBF()  # default kernel
    gp = GaussianProcessClassifier(kernel=anisokern)  # Initialize the GPC
    gp.fit(trainx, trainy)  # train this class on the data
    trainx = trainy = None  # Discard all training data to preserve memory

    # Load test data
    testdata = np.genfromtxt('csvdata/' + subjectid +
                             '_rival.csv', delimiter=',',
                             missing_values=['NaN', 'nan'],
                             filling_values=None)
    testx, testy = cleandata(testdata, downsamplefactor=4)  # clean data

    return gp, testx, testy

Beispiel #11

0

Datei anzeigen

Datei: shap_utils.py Projekt: Denvert/ajax_data_value

def return_model(mode, **kwargs):

    if inspect.isclass(mode):
        assert getattr(
            mode, 'fit',
            None) is not None, 'Custom model family should have a fit() method'
        model = mode(**kwargs)
    elif mode == 'logistic':
        solver = kwargs.get('solver', 'liblinear')
        n_jobs = kwargs.get('n_jobs', None)
        C = kwargs.get('C', 1.)
        max_iter = kwargs.get('max_iter', 5000)
        model = LogisticRegression(solver=solver,
                                   n_jobs=n_jobs,
                                   C=C,
                                   max_iter=max_iter,
                                   random_state=666)
    elif mode == 'Tree':
        model = DecisionTreeClassifier(random_state=666)
    elif mode == 'RandomForest':
        n_estimators = kwargs.get('n_estimators', 50)
        model = RandomForestClassifier(n_estimators=n_estimators,
                                       random_state=666)
    elif mode == 'GB':
        n_estimators = kwargs.get('n_estimators', 50)
        model = GradientBoostingClassifier(n_estimators=n_estimators,
                                           random_state=666)
    elif mode == 'AdaBoost':
        n_estimators = kwargs.get('n_estimators', 50)
        model = AdaBoostClassifier(n_estimators=n_estimators, random_state=666)
    elif mode == 'SVC':
        kernel = kwargs.get('kernel', 'rbf')
        model = SVC(kernel=kernel, random_state=666)
    elif mode == 'LinearSVC':
        model = LinearSVC(loss='hinge', random_state=666)
    elif mode == 'GP':
        model = GaussianProcessClassifier(random_state=666)
    elif mode == 'KNN':
        n_neighbors = kwargs.get('n_neighbors', 5)
        model = KNeighborsClassifier(n_neighbors=n_neighbors)
    elif mode == 'NB':
        model = MultinomialNB()
    elif mode == 'linear':
        model = LinearRegression(random_state=666)
    elif mode == 'ridge':
        alpha = kwargs.get('alpha', 1.0)
        model = Ridge(alpha=alpha, random_state=666)
    elif 'conv' in mode:
        tf.reset_default_graph()
        address = kwargs.get('address', 'weights/conv')
        hidden_units = kwargs.get('hidden_layer_sizes', [20])
        activation = kwargs.get('activation', 'relu')
        weight_decay = kwargs.get('weight_decay', 1e-4)
        learning_rate = kwargs.get('learning_rate', 0.001)
        max_iter = kwargs.get('max_iter', 1000)
        dropout = kwargs.get('dropout', 0.)
        early_stopping = kwargs.get('early_stopping', 10)
        warm_start = kwargs.get('warm_start', False)
        batch_size = kwargs.get('batch_size', 256)
        kernel_sizes = kwargs.get('kernel_sizes', [5])
        strides = kwargs.get('strides', [5])
        channels = kwargs.get('channels', [1])
        validation_fraction = kwargs.get('validation_fraction', 0.)
        global_averaging = kwargs.get('global_averaging', 0.)
        optimizer = kwargs.get('optimizer', 'sgd')
        if mode == 'conv':
            model = CShapNN(mode='classification',
                            batch_size=batch_size,
                            max_epochs=max_iter,
                            learning_rate=learning_rate,
                            dropout=dropout,
                            weight_decay=weight_decay,
                            validation_fraction=validation_fraction,
                            early_stopping=early_stopping,
                            optimizer=optimizer,
                            warm_start=warm_start,
                            address=address,
                            hidden_units=hidden_units,
                            strides=strides,
                            global_averaging=global_averaging,
                            kernel_sizes=kernel_sizes,
                            channels=channels,
                            random_seed=666)
        elif mode == 'conv_reg':
            model = CShapNN(mode='regression',
                            batch_size=batch_size,
                            max_epochs=max_iter,
                            learning_rate=learning_rate,
                            dropout=dropout,
                            weight_decay=weight_decay,
                            validation_fraction=validation_fraction,
                            early_stopping=early_stopping,
                            optimizer=optimizer,
                            warm_start=warm_start,
                            address=address,
                            hidden_units=hidden_units,
                            strides=strides,
                            global_averaging=global_averaging,
                            kernel_sizes=kernel_sizes,
                            channels=channels,
                            random_seed=666)
    elif 'NN' in mode:
        solver = kwargs.get('solver', 'adam')
        hidden_layer_sizes = kwargs.get('hidden_layer_sizes', (20, ))
        if isinstance(hidden_layer_sizes, list):
            hidden_layer_sizes = list(hidden_layer_sizes)
        activation = kwargs.get('activation', 'relu')
        learning_rate_init = kwargs.get('learning_rate', 0.001)
        max_iter = kwargs.get('max_iter', 5000)
        early_stopping = kwargs.get('early_stopping', False)
        warm_start = kwargs.get('warm_start', False)
        batch_size = kwargs.get('batch_size', 'auto')

        if mode == 'NN':
            model = MLPClassifier(solver=solver,
                                  hidden_layer_sizes=hidden_layer_sizes,
                                  activation=activation,
                                  learning_rate_init=learning_rate_init,
                                  warm_start=warm_start,
                                  max_iter=max_iter,
                                  early_stopping=early_stopping,
                                  batch_size=batch_size)
        if mode == 'NN_reg':
            model = MLPRegressor(solver=solver,
                                 hidden_layer_sizes=hidden_layer_sizes,
                                 activation=activation,
                                 learning_rate_init=learning_rate_init,
                                 warm_start=warm_start,
                                 max_iter=max_iter,
                                 early_stopping=early_stopping,
                                 batch_size=batch_size)
    else:
        raise ValueError("Invalid mode!")
    return model

Beispiel #12

0

Datei anzeigen

classifier = np.all(np.unique(Y.to_numpy()) == [0, 1])
outputs = Y.shape[1]

# separate the data into training and testing
if TIME_SERIES:
    test_idx = X.index.values[-int(X.shape[0] / 5):]
else:
    np.random.seed(1)
    test_idx = np.random.choice(a=X.index.values,
                                size=int(X.shape[0] / 5),
                                replace=False)
train_idx = np.array(list(set(X.index.values) - set(test_idx)))

# set up the model
if classifier:
    model = MultiOutputClassifier(GaussianProcessClassifier(random_state=42))
else:
    model = MultiOutputRegressor(GaussianProcessRegressor(random_state=42))

# train the model
model.fit(X.iloc[train_idx, :], Y.iloc[train_idx, :])

# In[2]: Collect the predictions

# predict training and testing data
train_predict = pd.DataFrame(model.predict(X.iloc[train_idx, :]),
                             columns=Y.columns)
test_predict = pd.DataFrame(model.predict(X.iloc[test_idx, :]),
                            columns=Y.columns)

# reshape all of the predictions into a single table

Beispiel #13

0

Datei anzeigen

Datei: ColorMLPrediction40MulticlassGrid.py Projekt: Bluetenhonig/palettepen

def grid_search_pipeline(X, y): 
    """ making pipeline and gridsearchcv"""
    clf = Pipeline(steps=[  
                        ('scaler', StandardScaler()) 
                        , ('clf', LogisticRegression())
                   ]) 

    param_grid = [
                 { 'clf': [DecisionTreeClassifier(random_state=24, max_leaf_nodes=1000, class_weight='balanced')], 
             'clf__max_depth':  [5, 7, 10, 11, 12, 13, 14,  15, 16, 17]
                    }, 
                 { 'clf': [ExtraTreeClassifier(max_features=None, random_state=24)], 
             'clf__max_depth': [15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 30, 50, 100]
                    }, 
                 { 'clf': [ExtraTreesClassifier(max_features=None, n_jobs=-1, random_state=24)], 
             'clf__n_estimators':  [1, 2, 5, 7, 10, 15, 20], 
             'clf__max_depth':  [2, 4, 6, 8, 10, 12, 15, 25, 30]
                    }, 
                 { 'clf': [RandomForestClassifier(max_features=None, n_jobs=-1, random_state=24)], 
             'clf__n_estimators':  [61, 62, 63, 64, 65, 66, 67, 68, 69, 70], 
             'clf__max_depth':  [2, 4, 6, 8, 10, 12, 15, 25, 30]
                    },
              { 'clf': [KNeighborsClassifier(weights='distance', n_jobs = -1)], 
              'clf__n_neighbors':  list(range(1,30)),
               'clf__leaf_size': list(range(1,30))   # estimator__ prefix to get through MOC
                  }, 
              {'clf': [RadiusNeighborsClassifier(weights='distance', n_jobs = -1 )], 
               'clf__radius': [300, 200, 100],
               'clf__leaf_size': [100, 50, 15]
               },
                { 'clf': [RidgeClassifier(max_iter=110, class_weight='balanced', random_state=24)], 
             'clf__alpha': [100000, 50000, 10000, 5000]
                  },
                 { 'clf': [LogisticRegression(penalty='elasticnet', random_state=24, max_iter=1000, n_jobs = -1, l1_ratio=.5)], 
             'clf__solver':   ['saga'],
             'clf__multi_class':   ['auto', 'ovr', 'multinomial']
                   }, 
                 { 'clf': [LogisticRegressionCV(penalty='elasticnet',  n_jobs=-1, random_state=24, l1_ratios=[.5])], 
             'clf__solver':   ['saga'],
             'clf__multi_class':   ['auto', 'ovr', 'multinomial']
                    },  
                 { 'clf': [RidgeClassifierCV(cv = 5, class_weight='balanced')], 
             'clf__class_weight':   [None, 'balanced']
                  }, 
                 { 'clf': [GaussianNB()]
                  }, 
                 { 'clf': [NearestCentroid()], 
             'clf__shrink_threshold':   [6,5,4, 4.5, 3, 3.5, 2,1]
                  }, 
              { 'clf': [LinearSVC(dual = False, random_state=24, max_iter=1000000000)], 
               'clf__C': np.linspace(0,10,200).tolist(), 
               'clf__multi_class': ['ovr', 'crammer_singer']
               },  #based on https://scikit-learn.org/stable/modules/svm.html, param C trades off misclassification of training examples against simplicity of the decision surface. A low C makes the decision surface smooth, while a high C aims at classifying all training examples correctly, exponentially spaced to get good values
                 { 'clf': [LinearDiscriminantAnalysis()], 
             'clf__solver':   ['lsqr', 'eigen'], 
             'clf__shrinkage':   [None, 'auto'] 
                   }, 
                 { 'clf': [QuadraticDiscriminantAnalysis()]
                   },   
                 { 'clf': [LabelPropagation(kernel='knn', gamma=0, n_jobs=-1)], 
             'clf__n_neighbors':   np.arange(1, 21).tolist(), 
                   }, 
                 { 'clf': [LabelSpreading(kernel='knn', gamma=0, n_jobs=-1)], 
             'clf__n_neighbors':   np.arange(1, 21).tolist(), 
             'clf__alpha':  [.2, .4, .6, .8]
                   }, 
                 { 'clf': [NuSVC(max_iter=-1, random_state=24)], 
             'clf__nu':  [1, 10, 20], 
             'clf__kernel': ['linear', 'poly','rbf', 'sigmoid', 'precomputed'], 
               'clf__gamma':  ['scale', 'auto'], 
               'clf__shrinking':  [True,False], 
               'clf__class_weight':  [None, 'balanced'], 
               'clf__decision_function_shape':  ['ovo','ovr'], 
                   }, 
                 { 'clf': [SVC(probability=True, random_state=24)], 
             'clf__C':  [.2, .4, .6, .8], 
             'clf__kernel': ['linear', 'poly','rbf', 'sigmoid', 'precomputed'], 
               'clf__gamma':  ['scale', 'auto'], 
               'clf__shrinking':  [True,False], 
               'clf__class_weight':  [None, 'balanced'], 
               'clf__decision_function_shape':  ['ovo','ovr'], 
                   }, 
            { 'clf': [GaussianProcessClassifier(copy_X_train=False, random_state=24, n_jobs=-1)], 
             'clf__n_restarts_optimizer':  [1, 2]
                 }, 
                 { 'clf': [SGDClassifier(n_jobs=-1, random_state=24, )], 
             'clf__penalty':  ['l2', 'elasticnet'], 
             'clf__learning_rate': ['optimal', 'adaptive'], 
             'clf__eta0': [0.2, 0.4, 0.6, 0.8, 1, 1.2, 1.4],
             'clf__class_weight': [None, 'balanced'],
                   }, 
                 { 'clf': [Perceptron(n_jobs=-1, random_state=24, )], 
             'clf__penalty':  ['l2', 'elasticnet'], 
             'clf__class_weight': [None, 'balanced'],
                   }, 
                 { 'clf': [PassiveAggressiveClassifier(n_jobs=-1, random_state=24, average=True)], 
             'clf__C':  [.001, .01, .1, .2, .4, .6, .8, 1, 1.2], 
             'clf__class_weight': [None, 'balanced']
                   }, 
                 { 'clf': [GradientBoostingClassifier(random_state=24)], 
             'clf__learning_rate': [.06, .07, .08,.09], 
             'clf__n_estimators': [180, 190, 200],
             'clf__max_depth': [2, 4, 6, 8],
             'clf__init': [None, 'zero']
                    }, 
            ]
    

    scoring = {'balanced_accuracy': 'balanced_accuracy'
               , 'f1_micro': 'f1_micro'        
               ,'f1_macro': 'f1_macro'
               , 'f1_weighted': 'f1_weighted'
               ,'precision_micro': 'precision_micro'
               , 'precision_macro': 'precision_macro'
               ,'precision_weighted': 'precision_weighted'
               , 'recall_micro': 'recall_micro'
               ,'recall_macro': 'recall_macro'
               , 'recall_weighted': 'recall_weighted'
     }
    
    search = GridSearchCV(clf, param_grid, scoring=scoring, refit= 'balanced_accuracy', n_jobs=-1, cv=5, return_train_score=True)
    search.fit(X, y)
      
    print(f'Training Machine Learning Classifier for {SYSTEM} Color Categories: successful!')
    
    return search

Beispiel #14

0

Datei anzeigen

Datei: fiber_stats_gp_classify.py Projekt: sinkpoint/sagit

def plot(df, options):

    UNIQ_GROUPS = df.group.unique()
    UNIQ_GROUPS.sort()

    sns.set_style("white")
    grppal = sns.color_palette("Set2", len(UNIQ_GROUPS))

    print '# UNIQ GROUPS', UNIQ_GROUPS

    cent_stats = df.groupby(
        ['position', 'group', 'side']).apply(stats_per_group)
    cent_stats.reset_index(inplace=True)

    import time
    from sklearn import preprocessing
    from sklearn.gaussian_process import GaussianProcessRegressor, GaussianProcessClassifier
    from sklearn.gaussian_process.kernels import Matern, WhiteKernel, ExpSineSquared, ConstantKernel, RBF


    ctlDF = cent_stats[ cent_stats['group'] == 0 ]

    TNRightDF = cent_stats[ cent_stats['group'] != 0]
    TNRightDF = TNRightDF[TNRightDF['side'] == 'right']

    dataDf = pd.concat([ctlDF, TNRightDF], ignore_index=True)
    print dataDf

    yDf = dataDf['group'] == 0
    yDf = yDf.astype(int)
    y = yDf.values
    print y
    print y.shape

    XDf = dataDf[['position', 'values']]
    X = XDf.values
    X = preprocessing.scale(X)
    print X
    print X.shape
    

    # kernel = ConstantKernel() + Matern(length_scale=mean, nu=3 / 2) + \
    # WhiteKernel(noise_level=1e-10)
    
    kernel = 1**2 * Matern(length_scale=1, nu=1.5) + \
        WhiteKernel(noise_level=0.1)

    figure = plt.figure(figsize=(10, 6))


    stime = time.time()
    gp = GaussianProcessClassifier(kernel)
    gp.fit(X, y)

    print gp.kernel_
    print gp.log_marginal_likelihood()

    print("Time for GPR fitting: %.3f" % (time.time() - stime))


    # create a mesh to plot in
    h = 0.1
    x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
    y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
    xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
                        np.arange(y_min, y_max, h))

    plt.figure(figsize=(10, 5))
    
    # Plot the predicted probabilities. For that, we will assign a color to
    # each point in the mesh [x_min, m_max]x[y_min, y_max].

    Z = gp.predict_proba(np.c_[xx.ravel(), yy.ravel()])
    Z = Z[:,1]
    print Z
    print Z.shape
    # Put the result into a color plot
    Z = Z.reshape((xx.shape[0], xx.shape[1]))
    print Z.shape
    plt.imshow(Z, extent=(x_min, x_max, y_min, y_max), origin="lower")

    # Plot also the training points
    plt.scatter(X[:, 0], X[:, 1], c=np.array(["r", "g"])[y])
    plt.xlabel('position')
    plt.ylabel('normalized val')
    plt.xlim(xx.min(), xx.max())
    plt.ylim(yy.min(), yy.max())
    plt.xticks(())
    plt.yticks(())
    plt.title("%s, LML: %.3f" %
            ("TN vs. Control", gp.log_marginal_likelihood(gp.kernel_.theta)))

    plt.tight_layout()


    if options.title:
        plt.suptitle(options.title)

    if options.output:
        plt.savefig(options.output, dpi=150)

    if options.is_show:
        plt.show()

Beispiel #15

0

Datei anzeigen

Datei: plot_classifier_comparison.py Projekt: elsaVelazquez/faster-pet-adoption

    from sklearn.tree import DecisionTreeClassifier
    from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
    from sklearn.naive_bayes import GaussianNB
    from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis

    h = .02  # step size in the mesh

    names = ["Nearest Neighbors", "Linear SVM", "RBF SVM", "Gaussian Process",
            "Decision Tree", "Random Forest", "Neural Net", "AdaBoost",
            "Naive Bayes", "QDA"]

    classifiers = [
        KNeighborsClassifier(3),
        SVC(kernel="linear", C=0.025),
        SVC(gamma=2, C=1),
        GaussianProcessClassifier(1.0 * RBF(1.0)),
        DecisionTreeClassifier(max_depth=5),
        RandomForestClassifier(max_depth=5, n_estimators=10, max_features=1),
        MLPClassifier(alpha=1, max_iter=1000),
        AdaBoostClassifier(),
        GaussianNB(),
        QuadraticDiscriminantAnalysis()]

    # X, y = make_classification(n_features=2, n_redundant=0, n_informative=2,
    #                         random_state=1, n_clusters_per_class=1)
    
    
    rng = np.random.RandomState(2)
    # X += 2 * rng.uniform(size=X.shape)
    linearly_separable = (X, y)

Beispiel #16

0

Datei anzeigen

 def __init__(self, **kwargs):
     super(GaussianProcess, self).__init__()
     super(GaussianProcess, self).SetModel(
         GaussianProcessClassifier(random_state=42, **kwargs))

Beispiel #17

0

Datei anzeigen

label = shuffle(label, random_state=41)[:5000]
Kfold = StratifiedKFold(n_splits=n_splits)

accuracy_rbf_training = np.zeros(n_splits)
accuracy_rbf_testing = np.zeros(n_splits)
accuracy_matern_traing = np.zeros(n_splits)
accuracy_matern_testing = np.zeros(n_splits)
nlpd_matern_t = np.zeros(n_splits)
nlpd_matern_v = np.zeros(n_splits)
best_kernel = None
best_nlpd = np.inf

for i, (train_index, test_index) in enumerate(Kfold.split(features, label)):
    X_train, X_test = features[train_index], features[test_index]
    y_train, y_test = label[train_index], label[test_index]
    gp_matern_fix = GaussianProcessClassifier(kernel=3.7**2 * Matern(length_scale=9.4, nu=1.5),
                                              optimizer=None)
    gp_matern_fix.fit(X_train,y_train)
    accuracy_matern_traing[i] = accuracy_score(y_train, gp_matern_fix.predict(X_train))
    accuracy_matern_testing[i] = accuracy_score(y_test, gp_matern_fix.predict(X_test))
    neg_lpd_matern_t = -np.mean(np.log(gp_matern_fix.predict_proba(X_train)[np.arange(len(X_train)), y_train]))
    neg_lpd_matern_v = -np.mean(np.log(gp_matern_fix.predict_proba(X_test)[np.arange(len(X_test)), y_test]))
    nlpd_matern_t[i] = neg_lpd_matern_t
    nlpd_matern_v[i] = neg_lpd_matern_v
print("Average training accuracy with matern kernel: %.5f" % np.mean(accuracy_matern_traing))
print("Average testing accuracy with matern kernel: %.5f" % np.mean(accuracy_matern_testing))
print("Average negative log predictive density of training set with matern kernel: %.5f"
      % np.mean(nlpd_matern_t))
print("Average negative log predictive density of validation set with matern kernel: %.5f"
      % np.mean(nlpd_matern_v))
print("Total elapsed time: %.5f" % (time.time()-elapsed))

Beispiel #18

0

Datei anzeigen

Datei: classification performance using different classifiers.py Projekt: sera616/TargetMM

aa.write("\n-------------------10-fold corss-validation result--------------------\n")
for i in n_restarts_optimizerR: 
    for j in max_iter_predictR:
#         print("\tthe param is :%s" % i)
#         print("\tthe param is :%s" % j)
        MCC=[]
        ACC=[]
        SN =[]
        SP=[]
        precision=[]
        NPV=[]
        F1=[]
        
        k=1  
        for train, test in cv.split(X, y):
            gpc=GaussianProcessClassifier(kernel = 1.0 * RBF(1.0),n_restarts_optimizer=i,max_iter_predict=j)
            y_true,y_pred =y[test], gpc.fit(X.iloc[train], y[train]).predict(X.iloc[test])
#             print("\tmatthews_corrcoef: %1.3f" % metrics.matthews_corrcoef(y_true, y_pred))
            MCCv=metrics.matthews_corrcoef(y_true, y_pred)
            MCC.append(MCCv)
    #         print("\taccuracy_score: %1.3f\n" % metrics.accuracy_score(y_true, y_pred))
            ACCv=metrics.accuracy_score(y_true, y_pred)
            ACC.append(ACCv)
            
            SNv,SPv,precisionv,NPVv,F1v = comE(y_true, y_pred)
            ## y_true, y_pred 
            SN.append(SNv)
            SP.append(SPv)
            precision.append(precisionv)
            NPV.append(NPVv)
            F1.append(F1v)

Beispiel #19

0

Datei anzeigen

Datei: solu1.py Projekt: swapnilkumargupta/DEEP-LEARNING-WITH-PYTHON

#CHALLENGE - ...and train them on our data
clf = clf.fit(X, Y)
# Calling decision tree classifier and fitting
clf1 =tree.DecisionTreeClassifier()
clfDT =clf1.fit(X, Y) 

#Calling support vector machine and fitting
clf2 = svm.SVC(probability=True)
clfSVC =clf2.fit(X, Y)  

#Calling KNeighbors classifier and fitting 
clf3 = KNeighborsClassifier(n_neighbors=3)
clfKN =clf3.fit(X, Y)  

#Calling gaussian_process classifier and fitting 
clf4 = GaussianProcessClassifier()
clfGP = clf4.fit(X, Y)

##Calling MLPClassifier and fitting 
clf5 = MLPClassifier(learning_rate='constant', learning_rate_init=0.001,)
clfMLP = clf5.fit(X, Y)

test = [[180, 80, 42]]
#Storing results 
predictionDT = clfDT.predict (test) 
predictionSVC = clfSVC.predict (test) 
predictionKN = clfKN.predict (test) 
predictionGP = clfGP.predict (test) 
predictionMLP = clfMLP.predict (test) 

#Storing probabilities

Beispiel #20

0

Datei anzeigen

Datei: init_model_selection.py Projekt: jonkingseestheworld/Music_Match_Maker

# Prediction time!
linear_models = []

linear_models.append(("LogisticRegression", LogisticRegression()))

kernel_models = []

kernel_models.append(("Linear Support Vector Classifier", LinearSVC()))
kernel_models.append(
    ("Support Vector Classifier", SVC(kernel="rbf", probability=True)))
kernel_models.append(("Nu Support Vector Classifer", NuSVC(probability=True)))

neighbor_models = [("K-nearest neighbours Ball",
                    KNeighborsClassifier(algorithm='ball_tree'))]

gaussian_models = [("Gaussian Process", GaussianProcessClassifier())]
deTree_models = [("Decision Tree", DecisionTreeClassifier())]

ensemble_models = []

ensemble_models.append(("Random forest", RandomForestClassifier()))
ensemble_models.append(("AdaBoost", AdaBoostClassifier()))
ensemble_models.append(("GradientBoosting", GradientBoostingClassifier()))

mlpNetwork_models = [("MLP NNetwork", MLPClassifier())]

model_families = [("Linear Models", linear_models),
                  ("Kernel Methods", kernel_models),
                  ("Neighbour", neighbor_models),
                  ("Gaussian Methods", gaussian_models),
                  ("Decision Tree", deTree_models),

Beispiel #21

0

Datei anzeigen

Datei: TreinamentoAvaliacao.py Projekt: apcastrojr/court_of_law_datasets_and_text_classifiers2

    def gpc(self, dataset_array, label_array, data_teste):
        from sklearn.gaussian_process import GaussianProcessClassifier

        clf = GaussianProcessClassifier(max_iter_predict=300)
        clf.fit(dataset_array, label_array)
        return clf.predict(data_teste)

Beispiel #22

0

Datei anzeigen

Datei: plot_gpc_isoprobability.py Projekt: solversa/scikit-learn.github.io

# Design of experiments
X = np.array([[-4.61611719, -6.00099547],
              [4.10469096, 5.32782448],
              [0.00000000, -0.50000000],
              [-6.17289014, -4.6984743],
              [1.3109306, -6.93271427],
              [-5.03823144, 3.10584743],
              [-2.87600388, 6.74310541],
              [5.21301203, 4.26386883]])

# Observations
y = np.array(g(X) > 0, dtype=int)

# Instanciate and fit Gaussian Process Model
kernel = C(0.1, (1e-5, np.inf)) * DotProduct(sigma_0=0.1) ** 2
gp = GaussianProcessClassifier(kernel=kernel)
gp.fit(X, y)
print("Learned kernel: %s " % gp.kernel_)

# Evaluate real function and the predicted probability
res = 50
x1, x2 = np.meshgrid(np.linspace(- lim, lim, res),
                     np.linspace(- lim, lim, res))
xx = np.vstack([x1.reshape(x1.size), x2.reshape(x2.size)]).T

y_true = g(xx)
y_prob = gp.predict_proba(xx)[:, 1]
y_true = y_true.reshape((res, res))
y_prob = y_prob.reshape((res, res))

# Plot the probabilistic classification iso-values

Beispiel #23

0

Datei anzeigen

Datei: Main.py Projekt: ojw209/MaSTr1325-Project

    Test_X_B = Img_Bank[i].Raw_Img[:,:,2].ravel()
    
    Temp_X = np.array((Test_X_R,Test_X_G,Test_X_B)).transpose()
    Temp_Y = Img_Bank[i].Raw_Mask.ravel()
    
    Test_X = np.concatenate((Test_X,Temp_X))
    Test_Y = np.concatenate((Test_Y,Temp_Y))

#%% KNN learning trial.
from sklearn.neighbors import KNeighborsClassifier
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import RBF

#classifier = KNeighborsClassifier(n_neighbors=10, n_jobs=-1, verbose )
kernel = 1.0 * RBF(1.0)
classifier = GaussianProcessClassifier(kernel=kernel, random_state=0)

print('KNN - Learning Started')
classifier.fit(Test_X, Test_Y, )
print('KNN - Learning Finished')

print('KNN - Prediction Started')
Test_photo = 10
Test_X_R = Img_Bank[Test_photo].Raw_Img[:,:,0].ravel()
Test_X_G = Img_Bank[Test_photo].Raw_Img[:,:,1].ravel()
Test_X_B = Img_Bank[Test_photo].Raw_Img[:,:,2].ravel()
Test_X = np.array((Test_X_R,Test_X_G,Test_X_B)).transpose()

Y_Pred = classifier.predict(Test_X)
Y_Pred = np.reshape(Y_Pred,(384,512))
plt.matshow(Y_Pred)

Beispiel #24

0

Datei anzeigen

Datei: all_attempts.py Projekt: zyzhang1130/CZ4041-Machine-Learning

print(classification_report(y_true, y_pred))
print()

"""## GaussianProcessClassifier"""

# Commented out IPython magic to ensure Python compatibility.
X_train, X_test, y_train, y_test = train_test_split(X_train, y_train, test_size=0.2)

tuned_parameters = [{'kernel':[1.0 * RBF(0.8),1.0 * RBF(0.9),1.0 * RBF(1.0),1.0 * RBF(1.1),1.0 * RBF(1.2),1.0 * RBF(1.3)]}
                    ]


print()

clf = GridSearchCV(
    GaussianProcessClassifier(), tuned_parameters, scoring='roc_auc', cv=10
)
clf.fit(X_train, y_train)

print("Best parameters set found on development set:")
print()
print(clf.best_params_)
print()
print("Grid scores on development set:")
print()
means = clf.cv_results_['mean_test_score']
stds = clf.cv_results_['std_test_score']
for mean, std, params in zip(means, stds, clf.cv_results_['params']):
    print("%0.3f (+/-%0.03f) for %r"
#           % (mean, std * 2, params))
print()

Beispiel #25

0

Datei anzeigen

Datei: logistic.py Projekt: virtustate/PythonExamples

    'L2 logistic (Multinomial)':
    LogisticRegression(C=C,
                       penalty='l2',
                       solver='saga',
                       multi_class='multinomial',
                       max_iter=10000),
    'L2 logistic (OvR)':
    LogisticRegression(C=C,
                       penalty='l2',
                       solver='saga',
                       multi_class='ovr',
                       max_iter=10000),
    'Linear SVC':
    SVC(kernel='linear', C=C, probability=True, random_state=0),
    'GPC':
    GaussianProcessClassifier(kernel)
}

n_classifiers = len(classifiers)

plt.figure(figsize=(n_features * (X[:, 0].max() - X[:, 0].min()),
                    n_classifiers * (X[:, 1].max() - X[:, 1].min())))
plt.subplots_adjust(bottom=-.1, top=1)
xx = np.linspace(floor(X[:, 0].min()), ceil(X[:, 0].max()), 100)
yy = np.linspace(floor(X[:, 1].min()), ceil(X[:, 1].max()), 100).T
xx, yy = np.meshgrid(xx, yy)
Xfull = np.c_[xx.ravel(), yy.ravel()]

for index, (name, classifier) in enumerate(classifiers.items()):
    classifier.fit(X, y)

Beispiel #26

0

Datei anzeigen

Datei: skwrapper.py Projekt: deephyper/candlepb

def get_model(model_or_name, threads=-1, classify=False, seed=0):
    regression_models = {
        'xgboost': (XGBRegressor(max_depth=6,
                                 n_jobs=threads,
                                 random_state=seed), 'XGBRegressor'),
        'lightgbm': (LGBMRegressor(n_jobs=threads,
                                   random_state=seed,
                                   verbose=-1), 'LGBMRegressor'),
        'randomforest':
        (RandomForestRegressor(n_estimators=100,
                               n_jobs=threads), 'RandomForestRegressor'),
        'adaboost': (AdaBoostRegressor(), 'AdaBoostRegressor'),
        'linear': (LinearRegression(), 'LinearRegression'),
        'elasticnet': (ElasticNetCV(positive=True), 'ElasticNetCV'),
        'lasso': (LassoCV(positive=True), 'LassoCV'),
        'ridge': (Ridge(), 'Ridge'),
        'xgb.1k': (XGBRegressor(max_depth=6,
                                n_estimators=1000,
                                n_jobs=threads,
                                random_state=seed), 'XGBRegressor.1K'),
        'xgb.10k': (XGBRegressor(max_depth=6,
                                 n_estimators=10000,
                                 n_jobs=threads,
                                 random_state=seed), 'XGBRegressor.10K'),
        'lgbm.1k': (LGBMRegressor(n_estimators=1000,
                                  n_jobs=threads,
                                  random_state=seed,
                                  verbose=-1), 'LGBMRegressor.1K'),
        'lgbm.10k': (LGBMRegressor(n_estimators=10000,
                                   n_jobs=threads,
                                   random_state=seed,
                                   verbose=-1), 'LGBMRegressor.10K'),
        'rf.1k':
        (RandomForestRegressor(n_estimators=1000,
                               n_jobs=threads), 'RandomForestRegressor.1K'),
        'rf.10k': (RandomForestRegressor(n_estimators=10000, n_jobs=threads),
                   'RandomForestRegressor.10K')
    }

    classification_models = {
        'xgboost': (XGBClassifier(max_depth=6,
                                  n_jobs=threads,
                                  random_state=seed), 'XGBClassifier'),
        'lightgbm': (LGBMClassifier(n_jobs=threads,
                                    random_state=seed,
                                    verbose=-1), 'LGBMClassifier'),
        'randomforest':
        (RandomForestClassifier(n_estimators=100,
                                n_jobs=threads), 'RandomForestClassifier'),
        'adaboost': (AdaBoostClassifier(), 'AdaBoostClassifier'),
        'logistic': (LogisticRegression(), 'LogisticRegression'),
        'gaussian': (GaussianProcessClassifier(), 'GaussianProcessClassifier'),
        'knn': (KNeighborsClassifier(), 'KNeighborsClassifier'),
        'bayes': (GaussianNB(), 'GaussianNB'),
        'svm': (SVC(), 'SVC'),
        'xgb.1k': (XGBClassifier(max_depth=6,
                                 n_estimators=1000,
                                 n_jobs=threads,
                                 random_state=seed), 'XGBClassifier.1K'),
        'xgb.10k': (XGBClassifier(max_depth=6,
                                  n_estimators=10000,
                                  n_jobs=threads,
                                  random_state=seed), 'XGBClassifier.10K'),
        'lgbm.1k': (LGBMClassifier(n_estimators=1000,
                                   n_jobs=threads,
                                   random_state=seed,
                                   verbose=-1), 'LGBMClassifier.1K'),
        'lgbm.10k': (LGBMClassifier(n_estimators=1000,
                                    n_jobs=threads,
                                    random_state=seed,
                                    verbose=-1), 'LGBMClassifier.10K'),
        'rf.1k':
        (RandomForestClassifier(n_estimators=1000,
                                n_jobs=threads), 'RandomForestClassifier.1K'),
        'rf.10k': (RandomForestClassifier(n_estimators=10000, n_jobs=threads),
                   'RandomForestClassifier.10K')
    }

    if isinstance(model_or_name, str):
        if classify:
            model_and_name = classification_models.get(model_or_name.lower())
        else:
            model_and_name = regression_models.get(model_or_name.lower())
        if not model_and_name:
            raise Exception("unrecognized model: '{}'".format(model_or_name))
        else:
            model, name = model_and_name
    else:
        model = model_or_name
        name = re.search("\w+", str(model)).group(0)

    return model, name

Beispiel #27

0

Datei anzeigen

    mask = NMBA > 0
    NMBA = mask * 1
    X = pd.concat([NMBA, Age, Berlin, Sex, Weight], axis=1)

collist = list(X.columns)
imp = Imputer(missing_values='NaN', strategy='most_frequent', axis=0)
imp.fit(X)
X = imp.transform(X)
X = pd.DataFrame(X, columns=collist)
X_train, X_test, Y_train, Y_test = \
    train_test_split(X,Y,test_size=0.1,random_state=1)

# Kernel
# myKernel = kernels.Sum(kernels.Matern(), kernels.RBF())

# myKernel = kernels.Sum(myKernel,kernels.RationalQuadratic())
# myKernel = kernels.Sum(myKernel,kernels.DotProduct())
myKernel = kernels.RBF()
myKernel = kernels.Sum(myKernel, kernels.DotProduct())
myKernel = kernels.Sum(myKernel, kernels.ConstantKernel())
# myKernel = kernels.Product(myKernel, kernels.DotProduct())
# myKernel = kernels.Sum(myKernel,kernels.ConstantKernel())
model = GaussianProcessClassifier(kernel=myKernel, warm_start=True, n_jobs=2)
model.fit(X_train, Y_train)
y_pred = model.predict(X_test)
predictions = [round(value) for value in y_pred]
accuracy = accuracy_score(Y_test, predictions)

print(round(accuracy, 2))
# filename = 'gp.pkl'
# pickle.dump(model, open(filename, 'wb'))

Beispiel #28

0

Datei anzeigen

maxIter = 1000
tolerance = 1e-3

svc = SVC()
svmLinear = SVC(kernel='linear')
ridge = lm.SGDClassifier(loss='squared_loss',
                         penalty='l2',
                         alpha=0.5,
                         max_iter=maxIter)
logisitc = lm.LogisticRegression()
cartTree = tree.DecisionTreeClassifier()

from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import RBF

gp = GaussianProcessClassifier(kernel=RBF(), multi_class='one_vs_one')

# Bagging
from sklearn.ensemble import BaggingClassifier

cartTree_bagging = BaggingClassifier(cartTree,
                                     max_samples=0.7,
                                     max_features=1.0)

# Neural Network
from sklearn.neural_network import MLPClassifier
nn = MLPClassifier(solver='lbfgs', alpha=1e-6,\
                            hidden_layer_sizes=[18,12], random_state=1)

modelList = [['ridge', ridge], ['RBF SVM', svc], ['Linear SVM', svmLinear],
             ['Logistic', logisitc], ['CART Decision Tree', cartTree],

Beispiel #29

0

Datei anzeigen

classifiers = {
    'mock': MockClassifier(),
    'nbayes': GaussianNB(),
    'logistic': LogisticRegression(random_state=42),
    #'adao': our.AdaBoostClassifier(n_estimators=200),
    'adas': their.AdaBoostClassifier(n_estimators=200, random_state=42),
    'forest': RandomForestClassifier(n_estimators=200, random_state=42),
    'mlp': MLPClassifier(random_state=42),
    'svm': SVC(probability=True, random_state=42),
    'knn': KNeighborsClassifier(3),
    'svc-linear': SVC(kernel="linear",
                      C=0.025,
                      probability=True,
                      random_state=42),
    'svc-rbf': SVC(gamma=2, C=1, probability=True, random_state=42),
    'gp': GaussianProcessClassifier(1.0 * RBF(1.0), random_state=42),
    'tree': DecisionTreeClassifier(max_depth=5, random_state=42),
    'qda': QuadraticDiscriminantAnalysis(reg_param=0.1),
    'lda': LinearDiscriminantAnalysis()
}

score_types = {
    'mock': 'predict_proba',
    'nbayes': 'predict_proba',
    'logistic': 'predict_proba',
    #'adao': 'predict_proba',
    'adas': 'predict_proba',
    'forest': 'predict_proba',
    'mlp': 'predict_proba',
    'svm': 'sigmoid',
    'knn': 'predict_proba',

Beispiel #30

0

Datei anzeigen

from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import RBF, DotProduct

from sklearn import metrics

from sklearn.model_selection import train_test_split

import matplotlib.pyplot as plt

train, test = train_test_split(importation.t, test_size=0.1)
spamtrain, spamtest = train_test_split(importation.valspam, test_size=0.1)

kernels = [1.0 * RBF(length_scale=1.0), 1.0 * DotProduct(sigma_0=1.0)**2]

for i, kernel in enumerate(kernels):
    clf = GaussianProcessClassifier(kernel=kernel, warm_start=True)

    clf.fit(train, spamtrain)

    expected = spamtest
    predicted = clf.predict(test)

    print(clf.score(train, spamtrain))

    #print("Classification report for classifier %s:\n%s\n"
    #     % (clf, metrics.classification_report(expected, predicted)))

    #print("Confusion matrix:\n%s" % metrics.confusion_matrix(expected, predicted))

    #print("LA SUITE...\n\n\n\n")

Beispiel #31

0

Datei anzeigen

Datei: masking_with_pca.py Projekt: A-Elabasy/Gaussian-processes

#remained_feature_indices=np.where(mask==1)

selected_features_Oulu_data = Oulu_data_train * mask
#selected_features_Oulu_data_test=Oulu_data_test*mask

print(selected_features_Oulu_data.shape)
print(Oulu_labels_train.shape)
#print(selected_features_Oulu_data_test.shape)
#print(Oulu_labels_test.shape)

#the gussian process classification
kernel = 1.0 * RBF(214)

gpc = GaussianProcessClassifier(kernel=kernel,
                                n_restarts_optimizer=5,
                                random_state=None,
                                multi_class="one_vs_rest",
                                max_iter_predict=100,
                                n_jobs=-1)
gpc = gpc.fit(selected_features_Oulu_data, Oulu_labels_train)

print('')
print('accuracy on trainingset:',
      gpc.score(selected_features_Oulu_data, Oulu_labels_train))
#print('accuracy on testset of the hospital:',gpc.score(selected_features_Oulu_data_test, Oulu_labels_test))
'''
probs = gpc.predict_proba(selected_features_Oulu_data_test) 
probs = probs[:, 1]  
auc = roc_auc_score(Oulu_labels_test, probs)  
print('AUC: %.2f' % auc)
print(f1_score(Oulu_labels_test, gpc.predict(selected_features_Oulu_data_test), average='weighted')) 
'''

Beispiel #32

0

Datei anzeigen

def feature_training(feature_path,
                     results_path,
                     seg_name,
                     train_split=90,
                     cls_set="basic",
                     save_confusion_matrices=True):
    """
    Function to run the complete training pipeline
    :param feature_path: Path to the directory containing the NPY files with the features
    :param results_path: Path to the directory where the results should be saved
    :param seg_name: Name of the segmentation
    :param train_split: Percentage of data used for training, e.g. 80
    :param cls_set: Either a list of names of classifiers to be used, or a the single name of classifier; if not used
    all classifiers will be used
    :param save_confusion_matrices: True if the confusion matrices should be used
    :return: List of accuracies
    """
    split = (train_split / 100, (100 - train_split) / 100)
    (train_data, test_data), (train_label,
                              test_label) = load_features(split, feature_path)

    names = [
        "Nearest Neighbors", "Linear SVM", "RBF SVM", "Gaussian Process",
        "Decision Tree", "Random Forest", "Neural Net", "AdaBoost",
        "Naive Bayes", "QDA"
    ]

    classifiers = [
        KNeighborsClassifier(3),
        SVC(kernel="linear", C=0.025),
        SVC(gamma=2, C=1),
        GaussianProcessClassifier(1.0 * RBF(1.0)),
        DecisionTreeClassifier(max_depth=5),
        RandomForestClassifier(max_depth=5, n_estimators=10, max_features=1),
        MLPClassifier(alpha=1, max_iter=1000),
        AdaBoostClassifier(),
        GaussianNB(),
        QuadraticDiscriminantAnalysis()
    ]

    if type(cls_set) == list:
        if all(x in names for x in cls_set):
            classifiers = [
                classifiers[names.index(classifier)] for classifier in cls_set
            ]
            names = cls_set
    elif cls_set in names:
        classifiers = [classifiers[names.index(cls_set)]]
        names = [cls_set]

    confusion_matrices, accuracies = features_train_test(
        classifiers, train_data, train_label, test_data, test_label)

    accuracies = dict(zip(names, accuracies))

    if save_confusion_matrices:
        confusion_matrices = dict(zip(names, confusion_matrices))
        with open(
                os.path.join(results_path,
                             "confusion_matrices_seg_{}".format(seg_name)),
                "wb") as matrix_file:
            pickle.dump(confusion_matrices, matrix_file)

    return accuracies

Beispiel #33

0

Datei anzeigen

Datei: gaussian-process.py Projekt: janfreyberg/rivalry-eeg-gaussian-process

def trainPredict(subjectid, makeplot=False):
    print("testing participant " + subjectid)
    # Load training data from the file matlab generates
    traindata = np.genfromtxt('csvdata/' + subjectid +
                              '_sim.csv', delimiter=',',
                              missing_values=['NaN', 'nan'],
                              filling_values=None)
    # Clean + downsample this data
    trainx, trainy = cleandata(traindata, downsamplefactor=20)

    # Train a Gaussian Process
    anisokern = kernels.RBF()  # default kernel
    gp = GaussianProcessClassifier(kernel=anisokern)  # Initialize the GPC
    gp.fit(trainx, trainy)  # train this class on the data
    trainx = trainy = None  # Discard all training data to preserve memory

    # load test data
    testdata = np.genfromtxt('csvdata/' + subjectid +
                             '_rival.csv', delimiter=',',
                             missing_values=['NaN', 'nan'],
                             filling_values=None)
    testx, testy = cleandata(testdata, downsamplefactor=4)  # clean data

    testdata = None  # clear from memory
    # work out percentage in percept for each data point:
    percentages, nextpercept = assign_percentage(testy)

    # get a prediction for all points in the test data:
    predicty = gp.predict(testx)
    proby = gp.predict_proba(testx)

    if makeplot:
        summaryplot(participant, testx, testy, predicty, proby, gp)

    # Summarise prediction by reported percept
    meanprediction = {'mean' + percept:
                      proby[testy == value, 1].mean()
                      for percept, value in perceptindices.iteritems()}
    predictiondev = {'stdev' + percept:
                     proby[testy == value, 1].std()
                     for percept, value in perceptindices.iteritems()}
    predictionaccuracy = {'acc' + percept:
                          (predicty[testy == value] ==
                           testy[testy == value]).mean()
                          for percept, value in perceptindices.iteritems()}
    # Summarise prediction by percentage in percept
    predictioncourse = {'timecourse' + percept + str(cutoff):
                        proby[(testy == value) &
                              (percentages < cutoff) &
                              (percentages > cutoff - 0.1), 1].mean()
                        for percept, value in perceptindices.iteritems()
                        for cutoff in np.linspace(0.1, 1, 10)}

    # Summarise mixed percept time courses by the next percept
    nextcourse = {'nextcourse' + percept + str(cutoff):
                  proby[(testy == 0) &
                        (percentages < cutoff) &
                        (percentages > cutoff - 0.1) &
                        (nextpercept == perceptindices[percept]), 1].mean()
                  for percept in ['highfreq', 'lowfreq']
                  for cutoff in np.linspace(0.1, 1, 10)}

    afterdominant = {'after' + percept + "_" + after + "_" + str(cutoff):
                     proby[(testy == perceptindices[percept]) &
                           (percentages < cutoff) &
                           (percentages > cutoff - 0.1) &
                           (nextpercept == perceptindices[after]), 1].mean()
                     for percept, after in [('highfreq', 'mixed'),
                                            ('highfreq', 'lowfreq'),
                                            ('lowfreq', 'mixed'),
                                            ('lowfreq', 'highfreq')]
                     for cutoff in np.linspace(0.1, 1, 10)}

    # Only return the summarised data
    return meanprediction, predictiondev, predictionaccuracy, \
        predictioncourse, nextcourse, afterdominant

Beispiel #34

0

Datei anzeigen

Datei: testing_ml.py Projekt: sdcioc/trust_experiment

def do_machinea_leaning_stuff(train_X, train_Y, test_X, test_Y):
    returnValue = []
    test_predict_Y = []

    # de facut ceva cu acest rezultat
    #f_classif(X, y);

    #Algoritmi de clasificare
    rfc = RandomForestClassifier(n_estimators=100, max_depth=2, random_state=0)
    rfc.fit(train_X, train_Y)
    test_predict_Y = rfc.predict(test_X)
    returnValue.append({
        'name':
        "RandomForestClassifier",
        'score':
        rfc.score(test_X, test_Y),
        'accuracy_naive':
        (test_Y != test_predict_Y).sum() * 1.0 / len(test_predict_Y),
        'accuracy_score':
        accuracy_score(test_Y, test_predict_Y),
        'classification_report':
        classification_report(test_Y, test_predict_Y)
    })

    etc = ExtraTreesClassifier()
    etc.fit(train_X, train_Y)
    test_predict_Y = etc.predict(test_X)
    returnValue.append({
        'name':
        "ExtraTreesClassifier",
        'score':
        etc.score(test_X, test_Y),
        'accuracy_naive':
        (test_Y != test_predict_Y).sum() * 1.0 / len(test_predict_Y),
        'accuracy_score':
        accuracy_score(test_Y, test_predict_Y),
        'classification_report':
        classification_report(test_Y, test_predict_Y)
    })

    gpc = GaussianProcessClassifier(random_state=0)
    gpc.fit(train_X, train_Y)
    test_predict_Y = gpc.predict(test_X)
    # TODO : poate folosim si asta print(gpc.predict_proba(test_X))
    returnValue.append({
        'name':
        "GaussianProcessClassifier",
        'score':
        gpc.score(test_X, test_Y),
        'accuracy_naive':
        (test_Y != test_predict_Y).sum() * 1.0 / len(test_predict_Y),
        'accuracy_score':
        accuracy_score(test_Y, test_predict_Y),
        'classification_report':
        classification_report(test_Y, test_predict_Y)
    })

    pac = PassiveAggressiveClassifier(max_iter=1000, random_state=0, tol=1e-3)
    pac.fit(train_X, train_Y)
    test_predict_Y = pac.predict(test_X)
    returnValue.append({
        'name':
        "PassiveAggressiveClassifier",
        'score':
        pac.score(test_X, test_Y),
        'accuracy_naive':
        (test_Y != test_predict_Y).sum() * 1.0 / len(test_predict_Y),
        'accuracy_score':
        accuracy_score(test_Y, test_predict_Y),
        'classification_report':
        classification_report(test_Y, test_predict_Y)
    })

    rc = RidgeClassifier()
    rc.fit(train_X, train_Y)
    test_predict_Y = rc.predict(test_X)
    returnValue.append({
        'name':
        "RidgeClassifier",
        'score':
        rc.score(test_X, test_Y),
        'accuracy_naive':
        (test_Y != test_predict_Y).sum() * 1.0 / len(test_predict_Y),
        'accuracy_score':
        accuracy_score(test_Y, test_predict_Y),
        'classification_report':
        classification_report(test_Y, test_predict_Y)
    })

    sgdc = SGDClassifier(max_iter=1000, tol=1e-3)
    sgdc.fit(train_X, train_Y)
    test_predict_Y = sgdc.predict(test_X)
    returnValue.append({
        'name':
        "SGDClassifier",
        'score':
        sgdc.score(test_X, test_Y),
        'accuracy_naive':
        (test_Y != test_predict_Y).sum() * 1.0 / len(test_predict_Y),
        'accuracy_score':
        accuracy_score(test_Y, test_predict_Y),
        'classification_report':
        classification_report(test_Y, test_predict_Y)
    })

    bnb = BernoulliNB()
    bnb.fit(train_X, train_Y)
    test_predict_Y = bnb.predict(test_X)
    returnValue.append({
        'name':
        "BernoulliNB",
        'score':
        bnb.score(test_X, test_Y),
        'accuracy_naive':
        (test_Y != test_predict_Y).sum() * 1.0 / len(test_predict_Y),
        'accuracy_score':
        accuracy_score(test_Y, test_predict_Y),
        'classification_report':
        classification_report(test_Y, test_predict_Y)
    })

    knnc = KNeighborsClassifier(n_neighbors=3)
    knnc.fit(train_X, train_Y)
    test_predict_Y = knnc.predict(test_X)
    returnValue.append({
        'name':
        "KNeighborsClassifier",
        'score':
        knnc.score(test_X, test_Y),
        'accuracy_naive':
        (test_Y != test_predict_Y).sum() * 1.0 / len(test_predict_Y),
        'accuracy_score':
        accuracy_score(test_Y, test_predict_Y),
        'classification_report':
        classification_report(test_Y, test_predict_Y)
    })

    mlpc = MLPClassifier()
    mlpc.fit(train_X, train_Y)
    test_predict_Y = mlpc.predict(test_X)
    returnValue.append({
        'name':
        "MLPClassifier",
        'score':
        mlpc.score(test_X, test_Y),
        'accuracy_naive':
        (test_Y != test_predict_Y).sum() * 1.0 / len(test_predict_Y),
        'accuracy_score':
        accuracy_score(test_Y, test_predict_Y),
        'classification_report':
        classification_report(test_Y, test_predict_Y)
    })

    label_prop_model = LabelPropagation()
    rng = np.random.RandomState(42)
    random_unlabeled_points = rng.rand(len(train_Y)) < 0.3
    labels = np.copy(train_Y)
    labels[random_unlabeled_points] = -1
    label_prop_model.fit(train_X, labels)
    test_predict_Y = label_prop_model.predict(test_X)
    returnValue.append({
        'name':
        "LabelPropagation",
        'score':
        label_prop_model.score(test_X, test_Y),
        'accuracy_naive':
        (test_Y != test_predict_Y).sum() * 1.0 / len(test_predict_Y),
        'accuracy_score':
        accuracy_score(test_Y, test_predict_Y),
        'classification_report':
        classification_report(test_Y, test_predict_Y)
    })

    lsvc = LinearSVC(random_state=0, tol=1e-5)
    lsvc.fit(train_X, train_Y)
    test_predict_Y = lsvc.predict(test_X)
    returnValue.append({
        'name':
        "LinearSVC",
        'score':
        label_prop_model.score(test_X, test_Y),
        'accuracy_naive':
        (test_Y != test_predict_Y).sum() * 1.0 / len(test_predict_Y),
        'accuracy_score':
        accuracy_score(test_Y, test_predict_Y),
        'classification_report':
        classification_report(test_Y, test_predict_Y)
    })

    svc = SVC(gamma='auto')
    svc.fit(train_X, train_Y)
    test_predict_Y = svc.predict(test_X)
    returnValue.append({
        'name':
        "SVC",
        'score':
        svc.score(test_X, test_Y),
        'accuracy_naive':
        (test_Y != test_predict_Y).sum() * 1.0 / len(test_predict_Y),
        'accuracy_score':
        accuracy_score(test_Y, test_predict_Y),
        'classification_report':
        classification_report(test_Y, test_predict_Y)
    })

    dtc = DecisionTreeClassifier(random_state=0)
    dtc.fit(train_X, train_Y)
    test_predict_Y = dtc.predict(test_X)
    returnValue.append({
        'name':
        "DecisionTreeClassifier",
        'score':
        dtc.score(test_X, test_Y),
        'accuracy_naive':
        (test_Y != test_predict_Y).sum() * 1.0 / len(test_predict_Y),
        'accuracy_score':
        accuracy_score(test_Y, test_predict_Y),
        'classification_report':
        classification_report(test_Y, test_predict_Y)
    })

    cccv = CalibratedClassifierCV()
    cccv.fit(train_X, train_Y)
    test_predict_Y = cccv.predict(test_X)
    returnValue.append({
        'name':
        "CalibratedClassifierCV",
        'score':
        cccv.score(test_X, test_Y),
        'accuracy_naive':
        (test_Y != test_predict_Y).sum() * 1.0 / len(test_predict_Y),
        'accuracy_score':
        accuracy_score(test_Y, test_predict_Y),
        'classification_report':
        classification_report(test_Y, test_predict_Y)
    })

    return returnValue

Beispiel #35

0

Datei anzeigen

Datei: plot_gpc_xor.py Projekt: 0664j35t3r/scikit-learn

from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import RBF, DotProduct


xx, yy = np.meshgrid(np.linspace(-3, 3, 50),
                     np.linspace(-3, 3, 50))
rng = np.random.RandomState(0)
X = rng.randn(200, 2)
Y = np.logical_xor(X[:, 0] > 0, X[:, 1] > 0)

# fit the model
plt.figure(figsize=(10, 5))
kernels = [1.0 * RBF(length_scale=1.0), 1.0 * DotProduct(sigma_0=1.0)**2]
for i, kernel in enumerate(kernels):
    clf = GaussianProcessClassifier(kernel=kernel, warm_start=True).fit(X, Y)

    # plot the decision function for each datapoint on the grid
    Z = clf.predict_proba(np.vstack((xx.ravel(), yy.ravel())).T)[:, 1]
    Z = Z.reshape(xx.shape)

    plt.subplot(1, 2, i + 1)
    image = plt.imshow(Z, interpolation='nearest',
                       extent=(xx.min(), xx.max(), yy.min(), yy.max()),
                       aspect='auto', origin='lower', cmap=plt.cm.PuOr_r)
    contours = plt.contour(xx, yy, Z, levels=[0], linewidths=2,
                           linetypes='--')
    plt.scatter(X[:, 0], X[:, 1], s=30, c=Y, cmap=plt.cm.Paired)
    plt.xticks(())
    plt.yticks(())
    plt.axis([-3, 3, -3, 3])

Beispiel #36

0

Datei anzeigen

def GPC():
    return GaussianProcessClassifier(kernel)

Beispiel #37

0

Datei anzeigen

Datei: plot_gpc.py Projekt: Claire-Ling-Liu/scikit-learn

from matplotlib import pyplot as plt

from sklearn.metrics.classification import accuracy_score, log_loss
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import RBF


# Generate data
train_size = 50
rng = np.random.RandomState(0)
X = rng.uniform(0, 5, 100)[:, np.newaxis]
y = np.array(X[:, 0] > 2.5, dtype=int)

# Specify Gaussian Processes with fixed and optimized hyperparameters
gp_fix = GaussianProcessClassifier(kernel=1.0 * RBF(length_scale=1.0), optimizer=None)
gp_fix.fit(X[:train_size], y[:train_size])

gp_opt = GaussianProcessClassifier(kernel=1.0 * RBF(length_scale=1.0))
gp_opt.fit(X[:train_size], y[:train_size])

print("Log Marginal Likelihood (initial): %.3f" % gp_fix.log_marginal_likelihood(gp_fix.kernel_.theta))
print("Log Marginal Likelihood (optimized): %.3f" % gp_opt.log_marginal_likelihood(gp_opt.kernel_.theta))

print(
    "Accuracy: %.3f (initial) %.3f (optimized)"
    % (
        accuracy_score(y[:train_size], gp_fix.predict(X[:train_size])),
        accuracy_score(y[:train_size], gp_opt.predict(X[:train_size])),
    )
)

Beispiel #38

0

Datei anzeigen

        size(W, HEIGHT+dy+40)
else:
    def pltshow(mplpyplot):
        mplpyplot.show()
# nodebox section end



# Generate data
train_size = 50
rng = np.random.RandomState(0)
X = rng.uniform(0, 5, 100)[:, np.newaxis]
y = np.array(X[:, 0] > 2.5, dtype=int)

# Specify Gaussian Processes with fixed and optimized hyperparameters
gp_fix = GaussianProcessClassifier(kernel=1.0 * RBF(length_scale=1.0),
                                   optimizer=None)
gp_fix.fit(X[:train_size], y[:train_size])

gp_opt = GaussianProcessClassifier(kernel=1.0 * RBF(length_scale=1.0))
gp_opt.fit(X[:train_size], y[:train_size])

print("Log Marginal Likelihood (initial): %.3f"
      % gp_fix.log_marginal_likelihood(gp_fix.kernel_.theta))
print("Log Marginal Likelihood (optimized): %.3f"
      % gp_opt.log_marginal_likelihood(gp_opt.kernel_.theta))

print("Accuracy: %.3f (initial) %.3f (optimized)"
      % (accuracy_score(y[:train_size], gp_fix.predict(X[:train_size])),
         accuracy_score(y[:train_size], gp_opt.predict(X[:train_size]))))
print("Log-loss: %.3f (initial) %.3f (optimized)"
      % (log_loss(y[:train_size], gp_fix.predict_proba(X[:train_size])[:, 1]),

Beispiel #39

0

Datei anzeigen

Datei: train.py Projekt: vaxherra/Plagiarism-Detection-Project

    ## TODO: Add any additional arguments that you will need to pass into your model
    parser.add_argument('--length_scale', type=float, default=1.0)
    parser.add_argument('--kernel_scaling', type=float, default=1.0)

    # args holds all passed-in arguments
    args = parser.parse_args()

    # Read in csv training file
    training_dir = args.data_dir
    train_data = pd.read_csv(os.path.join(training_dir, "train.csv"),
                             header=None,
                             names=None)

    # Labels are in the first column
    train_y = train_data.iloc[:, 0]
    train_x = train_data.iloc[:, 1:]

    ## --- Your code here --- ##

    ## TODO: Define a model
    model = GaussianProcessClassifier(args.kernel_scaling *
                                      RBF(args.length_scale))

    ## TODO: Train the model
    model.fit(train_x, train_y)

    ## --- End of your code  --- ##

    # Save the trained model
    joblib.dump(model, os.path.join(args.model_dir, "model.joblib"))

Beispiel #40

0

Datei anzeigen

Ntrain_vec = [20, 50, 100, 500, 1000, 1500]

for i in Ntrain_vec:

    #N = np.int(1000)
    Ntrain = np.int(i)
    #Ntest = np.int(100)

    Xtrain = X[0:Ntrain - 1, :]
    ytrain = y[0:Ntrain - 1]
    Xtest = X[Ntrain:N, :]
    ytest = y[Ntrain:N]

    #kernel = 1.0 * RBF([20.0]) # isotropic kernel #Test error rate = 0.89
    kernel = DotProduct(20.0)  #Test error rate = 0.14
    gpc_rbf = GaussianProcessClassifier(kernel=kernel).fit(Xtrain, ytrain)
    yp_train = gpc_rbf.predict(Xtrain)

    train_error_rate = np.mean(np.not_equal(yp_train, ytrain))
    yp_test = gpc_rbf.predict(Xtest)
    test_error_rate = []
    test_error_rate.append(np.mean(np.not_equal(yp_test, ytest)))

    print('Training error rate')
    print(train_error_rate)
    print('Test error rate')
    print(test_error_rate)
"""
=====================================================
Confusion matrix for GP regression
=====================================================

Beispiel #41

0

Datei anzeigen

Datei: test_gpc.py Projekt: allefpablo/scikit-learn

def test_lml_improving(kernel):
    # Test that hyperparameter-tuning improves log-marginal likelihood.
    gpc = GaussianProcessClassifier(kernel=kernel).fit(X, y)
    assert_greater(gpc.log_marginal_likelihood(gpc.kernel_.theta),
                   gpc.log_marginal_likelihood(kernel.theta))

Beispiel #42

0

Datei anzeigen

logging.basicConfig(level=logging.DEBUG)

h = .02  # step size in the mesh

names = [
    "Nearest Neighbors", "Linear SVM", "RBF SVM", "Gaussian Process",
    "Decision Tree", "Random Forest", "Neural Net", "AdaBoost", "Naive Bayes",
    "QDA", "Custom NN"
]

classifiers = [
    KNeighborsClassifier(3),
    SVC(kernel="linear", C=0.025),
    SVC(gamma=2, C=1),
    GaussianProcessClassifier(1.0 * RBF(1.0), warm_start=True),
    DecisionTreeClassifier(max_depth=5),
    RandomForestClassifier(max_depth=5, n_estimators=10, max_features=1),
    MLPClassifier(alpha=1),
    AdaBoostClassifier(),
    GaussianNB(),
    QuadraticDiscriminantAnalysis(),
    NeuralNetwork([
        NeuronLayer(10, 2, 'leaky_relu', True),
        NeuronLayer(1, 10, 'sigmoid', True)
    ],
                  learning_rate=0.1,
                  step_decay_factor=0.99)
]

X, y = make_classification(n_features=2,

Beispiel #43

0

Datei anzeigen

#
# Initiate classifiers
#

names = [
    "Nearest Neighbors", "Linear SVM", "RBF SVM", "Gaussian Process",
    "Decision Tree", "Random Forest", "Neural Net", "AdaBoost", "Naive Bayes",
    "QDA"
]

classifiers = [
    KNeighborsClassifier(3),
    SVC(kernel="linear", C=0.025),
    SVC(gamma=2, C=1),
    GaussianProcessClassifier(),
    DecisionTreeClassifier(max_depth=15),
    RandomForestClassifier(max_depth=15, n_estimators=5, max_features=2),
    MLPClassifier(),
    AdaBoostClassifier(),
    GaussianNB()
]

# iterate over classifiers
for name, clf in zip(names, classifiers):
    clf.fit(X_train, y_train)
    score = clf.score(X_test, y_test)
    print('\n- Classifier:' + name + ' is scoring ' + str(score) + '.')

#
# Keeping the algo that outperforms the other without tweeking

Beispiel #44

0

Datei anzeigen

Datei: active_learner.py Projekt: burcusyn/ActiveLearningOverCrowdsourcedData

    def selectNext(self):
        proba_in = np.ones(self.poolData[self.indicesUnknown].shape[0])
        proba_in *= self.model.predict_proba(
            self.poolData[self.indicesUnknown])[:, 1]

        predicted = []
        predictedFalse = []
        for p in proba_in:
            if p < 0.5:
                predicted.append(
                    0
                )  # if magically we know that the predicted label l is true, we can add it to training set and train
                predictedFalse.append(
                    1
                )  # if magically we know that the predicted label l is wrong, we add 1-l to training set and train
            else:
                predicted.append(1)
                predictedFalse.append(0)

        #so now think every case and train the classifier for each item separately to calculate expected error of classifier
        eRightList = []
        eWrongList = []
        for i in range(len(predicted)):
            if i in self.indicesUntrained:
                X_train = np.append(self.trainData, [self.poolData[i]], axis=0)
                y_trainRight = np.append(self.trainLabels, predicted[i])
                y_trainWrong = np.append(self.trainLabels, predictedFalse[i])
            else:
                X_train = copy.deepcopy(self.trainData)
                if self.voteAggMethod == 'majorityVoting':
                    selectedVote = self.aggregator.majorityVoting(
                        self.queriedVoteHistory, i)
                elif self.voteAggMethod == 'DS':
                    selectedVote = self.aggregator.DS(
                        self.queriedVoteHistory, self.queriedVoteOwnersHistory,
                        i)
                else:
                    print(
                        "no valid technique is selected, default vote aggregation technique -majotityVoting- is used!"
                    )
                    selectedVote = self.aggregator.majorityVoting(
                        self.queriedVoteHistory, i)
                y_trainRight = copy.deepcopy(self.trainLabels)
                y_trainWrong = copy.deepcopy(self.trainLabels)
                y_trainRight[np.where(self.indicesTrained == i)] = selectedVote
                y_trainWrong[np.where(self.indicesTrained == i)] = selectedVote
            self.model.fit(X_train, y_trainRight)
            scores = cross_val_score(self.model,
                                     X_train,
                                     y_trainRight,
                                     cv=5,
                                     scoring='f1')
            eRightList.append(scores.mean())
            self.model.fit(X_train, y_trainWrong)
            scores = cross_val_score(self.model,
                                     X_train,
                                     y_trainWrong,
                                     cv=5,
                                     scoring='f1')
            eWrongList.append(scores.mean())

        # update the active learner back
        self.model.fit(self.trainData, self.trainLabels)

        # creating bootstaps
        bootstraps = []
        for i in range(10):
            size = self.trainData.shape[0]
            bootstrap_idx = np.random.choice(range(size), size, replace=True)
            bst_x = self.trainData[bootstrap_idx, :]
            bst_y = self.trainLabels[bootstrap_idx]
            bootstraps.append([bst_x, bst_y])

        #defining ML classifiers to use in bootstrapped training sets
        clfs = [
            LogisticRegression(C=1., solver='lbfgs'),
            CalibratedClassifierCV(SVC(kernel="linear", C=0.025), cv=3),
            CalibratedClassifierCV(SVC(gamma=2, C=1), cv=3),
            GaussianProcessClassifier(1.0 * RBF(1.0)),
            CalibratedClassifierCV(DecisionTreeClassifier(max_depth=40), cv=3),
            CalibratedClassifierCV(RandomForestClassifier(max_depth=5,
                                                          n_estimators=10),
                                   cv=3),
            CalibratedClassifierCV(LinearSVC(class_weight='balanced', C=0.1),
                                   cv=3),
            AdaBoostClassifier(),
            GaussianNB(),
            CalibratedClassifierCV(RandomForestClassifier(n_estimators=50,
                                                          max_depth=40),
                                   cv=3)
        ]

        #apply multiprocessing to learn predicted label of each item in the pool by each ML classifier
        manager = multiprocessing.Manager()
        return_dict = manager.dict()
        jobs = []
        for i in range(10):
            p = multiprocessing.Process(target=self.bootstrap_computation,
                                        args=(i, clfs[i], bootstraps[i],
                                              return_dict))
            jobs.append(p)
            p.start()

        for proc in jobs:
            proc.join()

        predictions = []
        for key, value in return_dict.items():
            predictions.append(value)

        # calculate p_u values for each item
        p_u = []
        for i in range(len(predicted)):
            p_i = 0
            for j in range(len(predictions)):
                if predicted[i] == predictions[j][i]:
                    p_i += 1
            p_u.append(p_i / 10)

        # calculate expected error of each item in the pool
        expErrorList = []
        for i in range(len(p_u)):
            expErrorList.append(eWrongList[i] - p_u[i] *
                                (eWrongList[i] - eRightList[i]))

        # get the indexes of min n_instances
        expErrorList = np.array(expErrorList)
        idx = np.argsort(expErrorList)
        query_idx = idx[0:self.batchSize]
        selectedIndex = self.indicesUnknown[query_idx]
        # return the selected items that have minimum expected error
        return selectedIndex

Beispiel #45

0

Datei anzeigen

Datei: test_gpc.py Projekt: BasilBeirouti/scikit-learn

def test_lml_precomputed():
    # Test that lml of optimized kernel is stored correctly.
    for kernel in kernels:
        gpc = GaussianProcessClassifier(kernel=kernel).fit(X, y)
        assert_almost_equal(gpc.log_marginal_likelihood(gpc.kernel_.theta),
                            gpc.log_marginal_likelihood(), 7)

Beispiel #46

0

Datei anzeigen

Xs_train = []
Labels = []
for _ in range(5):
    d = resample(X_train, n_samples=502)
    Xs_train.append(d.iloc[:, :-1])
    Labels.append(d['labels'])

# # Gaussian Process Regression

# A task that was considerably more difficult was the prediction of MMSE, which itself is a very noisy measure [8]. We used Gaussian Process Regression[5] (GPR) to predict this measure using selected imaging features, demographic features as well as genetic features. The choice of GPR was motivated by the ability of such models to fit very noisy data [4].

# In[21]:

from sklearn.gaussian_process import GaussianProcessClassifier
gp = GaussianProcessClassifier(optimizer='Welch',
                               multi_class='one_vs_rest',
                               n_restarts_optimizer=5)

# In[22]:

gpt = []
for i in range(5):
    gp.fit(Xs_train[i], Labels[i])
    gpt.append(gp.predict(X_test))

# In[23]:

gpc = []
for i in range(len(y_test)):
    p = []
    for j in gpt:

Beispiel #47

0

Datei anzeigen

Datei: plot_gpc_isoprobability.py Projekt: BasilBeirouti/scikit-learn

# Design of experiments
X = np.array([[-4.61611719, -6.00099547],
              [4.10469096, 5.32782448],
              [0.00000000, -0.50000000],
              [-6.17289014, -4.6984743],
              [1.3109306, -6.93271427],
              [-5.03823144, 3.10584743],
              [-2.87600388, 6.74310541],
              [5.21301203, 4.26386883]])

# Observations
y = np.array(g(X) > 0, dtype=int)

# Instanciate and fit Gaussian Process Model
kernel = C(0.1, (1e-5, np.inf)) * DotProduct(sigma_0=0.1) ** 2
gp = GaussianProcessClassifier(kernel=kernel)
gp.fit(X, y)
print("Learned kernel: %s " % gp.kernel_)

# Evaluate real function and the predicted probability
res = 50
x1, x2 = np.meshgrid(np.linspace(- lim, lim, res),
                     np.linspace(- lim, lim, res))
xx = np.vstack([x1.reshape(x1.size), x2.reshape(x2.size)]).T

y_true = g(xx)
y_prob = gp.predict_proba(xx)[:, 1]
y_true = y_true.reshape((res, res))
y_prob = y_prob.reshape((res, res))

# Plot the probabilistic classification iso-values

Beispiel #48

0

Datei anzeigen

Datei: exp3.py Projekt: sagarika-sharma/tutorial-gaussian-processes.github.io

from sklearn.datasets import load_iris
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import RBF
from sklearn.linear_model import LogisticRegression
X, y = load_iris(return_X_y=True)
kernel = 1.0 * RBF(1.0)
gpc = GaussianProcessClassifier(kernel=kernel, random_state=0).fit(X, y)
print(gpc.score(X, y)) 
clf = LogisticRegression(random_state=0, solver='lbfgs', multi_class='multinomial').fit(X, y)
print(clf.score(X,y))