Ejemplo n.º 1
0
encoded_classe = encoder.transform(classe)

#armazenando os dados da sépala na variável X como um arranjo NumPy
Xs = np.array(dados[['sepala-comprimento', 'sepala-largura']])

#armazenando os dados da pétala na variável X como um arranjo NumPy
Xp = np.array(dados[['petala-comprimento', 'petala-largura']])

#armazenando os dados da classe convertida num arranjo NumPy
Y = np.array(encoded_classe, dtype=int)

#tamanho do passo na malha
h = 0.02

#definindo o Kernel isotrópico
kernel = 1.0 * RBF([1.0])
gpc_isotropico_s = GaussianProcessClassifier(kernel=kernel).fit(Xs, Y)
gpc_isotropico_p = GaussianProcessClassifier(kernel=kernel).fit(Xp, Y)

#definindo o Kernel anisotrópico
kernel = 1.0 * RBF([1.0, 1.0])
gpc_anisotropico_s = GaussianProcessClassifier(kernel=kernel).fit(Xs, Y)
gpc_anisotropico_p = GaussianProcessClassifier(kernel=kernel).fit(Xp, Y)

#Criando a malha para graficar
x_min_s = Xs[:, 0].min() - 1
x_max_s = Xs[:, 0].max() + 1

x_min_p = Xp[:, 0].min() - 1
x_max_p = Xp[:, 0].max() + 1
Ejemplo n.º 2
0
# dsigma = dsigma / norm
# sigma_exact = sigma_exact / norm
# print("tau: {}".format(tau.shape))
# print("sigma: {}".format(sigma.shape))
# print("tau mesh: {}".format(tau_exact.shape))
# print("sigma exact: {}".format(sigma_exact.shape))

print("\nPreparing for exhaustive GridSearch():")

# Set the parameters to hyperoptimize
kernel1 = DotProduct(sigma_0=1.0, sigma_0_bounds=(1e-5, 1e5))
kernel2 = ExpSineSquared(length_scale=1.0,
                         periodicity=1.0,
                         length_scale_bounds=(1e-5, 1e5))
kernel3 = Exponentiation(
    RBF(length_scale=1.0, length_scale_bounds=(1e-2, 1e2)), 2)
kernel4 = Matern(length_scale=1.0, length_scale_bounds=(1e-5, 1e5), nu=1.5)
kernel5 = PairwiseKernel(gamma=1.0, gamma_bounds=(1e-5, 1e5))
kernel6 = Product(RBF(1.0, (1e-5, 1e5)), Matern(1.0, (1e-5, 1e5), nu=1.5))
kernel7 = RBF(length_scale=1.0, length_scale_bounds=(1e-5, 1e5))
kernel8 = RationalQuadratic(length_scale=1.0,
                            alpha=1.0,
                            length_scale_bounds=(1e-5, 1e5),
                            alpha_bounds=(1e-5, 1e5))
kernel9 = Sum(RBF(1.0, (1e-2, 1e2)), Matern(10, (1e-2, 1e2), nu=1.5))
# List of hyperparameters given to the GridSearchCV()
tuned_parameters = [{
    "kernel": [
        kernel1, kernel2, kernel3, kernel4, kernel5, kernel6, kernel7, kernel8,
        kernel9
    ]
Ejemplo n.º 3
0
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis

h = .02  # step size in the mesh

names = [
    "Nearest Neighbors", "Linear SVM", "RBF SVM", "Gaussian Process",
    "Decision Tree", "Random Forest", "Neural Net", "AdaBoost", "Naive Bayes",
    "QDA"
]

classifiers = [
    KNeighborsClassifier(3),
    SVC(kernel="linear", C=0.025),
    SVC(gamma=2, C=1),
    GaussianProcessClassifier(1.0 * RBF(1.0), warm_start=True),
    DecisionTreeClassifier(max_depth=5),
    RandomForestClassifier(max_depth=5, n_estimators=10, max_features=1),
    MLPClassifier(alpha=1),
    AdaBoostClassifier(),
    GaussianNB(),
    QuadraticDiscriminantAnalysis()
]

X, y = make_classification(n_features=2,
                           n_redundant=0,
                           n_informative=2,
                           random_state=1,
                           n_clusters_per_class=1)
rng = np.random.RandomState(2)
X += 2 * rng.uniform(size=X.shape)
Ejemplo n.º 4
0
def build_gaussian_model(X_train, y_train):    
    kernel = C(1.0, (1e-3, 1e3)) * RBF(10, (1e-2, 1e2))
    model = GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=9)
    model.fit(X_train, y_train)   
    return model
def random_serach_top_tiers():
    """ Perform random search to find the best hyper parameters."""
    tc = TopCoder()

    model_dct = {
        'BayesianRidge': BayesianRidge,
        'DecisionTreeRegressor': DecisionTreeRegressor,
        'GaussianProcessRegressor': GaussianProcessRegressor,
        'GradientBoostingRegressor': GradientBoostingRegressor,
        'KNeighborsRegressor': KNeighborsRegressor,
        'RandomForestRegressor': RandomForestRegressor,
        'SVR': SVR,
    }

    model_args_dct = {
        'BayesianRidge': {
            'fixed_args': dict(n_iter=1000),
            'tuned_args': dict(tol=[1e-3, 1e-4, 1e-5, 1e-6], ),
        },
        'DecisionTreeRegressor': {
            'fixed_args':
            dict(random_state=42),
            'tuned_args':
            dict(criterion=['mse', 'mae', 'friedman_mse'],
                 max_depth=[None, 3, 5, 10]),
        },
        'GaussianProcessRegressor': {
            'fixed_args':
            dict(),
            'tuned_args':
            dict(kernel=[
                1.0 * RBF(), 1.0 * RationalQuadratic(),
                ConstantKernel() * (DotProduct()**2),
                DotProduct() * WhiteKernel()
            ]),
        },
        'GradientBoostingRegressor': {
            'fixed_args':
            dict(random_state=42, n_iter_no_change=5),
            'tuned_args':
            dict(
                loss=['ls', 'lad'],
                n_estimators=[200, 500, 1000, 1500],
                learning_rate=[0.01, 0.001, 1e-4],
                tol=[0.01, 0.001, 1e-4, 1e-5, 2e-5, 1e-6],
            ),
        },
        'KNeighborsRegressor': {
            'fixed_args':
            dict(n_jobs=-1),
            'tuned_args':
            dict(
                n_neighbors=[5, 10, 15, 20],
                weights=['uniform', 'distance'],
                algorithm=['ball_tree', 'kd_tree'],
                leaf_size=[30, 60, 100],
            ),
        },
        'RandomForestRegressor': {
            'fixed_args':
            dict(n_jobs=-1, verbose=1, random_state=42, bootstrap=True),
            'tuned_args':
            dict(
                n_estimators=[100, 200, 500, 1000],
                max_features=['auto', 'sqrt', 0.333],
                criterion=['mae', 'mse'],
            ),
        },
        'SVR': {
            'fixed_args':
            dict(cache_size=15000),
            'tuned_args': [
                dict(kernel=['rbf'],
                     gamma=['scale', 'auto'],
                     C=[1, 10, 100, 1000]),
                dict(kernel=['linear'], C=[1, 10, 100, 1000]),
                dict(kernel=['poly'],
                     degree=[2, 3, 5],
                     coef0=[0, 0.5, 5, 50, 100],
                     C=[1, 10, 100, 1000]),
            ],
        },
    }

    scoring = {
        'mae': make_scorer(mean_absolute_error, greater_is_better=False),
        'mre': make_scorer(mre, greater_is_better=False),
    }

    rs_path = os.path.join(os.curdir, 'result', 'random_search_res')

    with open(
            os.path.join(os.curdir, 'result', 'simple_regression',
                         'top4_reg_dct.json')) as f:
        top_regs_dct = {
            target: list(metrics.keys())
            for target, metrics in json.load(f).items() if target != 'price'
        }

    for target, reg_lst in top_regs_dct.items():
        print(f'{target} | Random Searching....')
        X, y = tc.build_final_dataset(target)
        Xnp, ynp = X.to_numpy(), y.to_numpy()
        X_train, X_test, y_train, y_test = train_test_split(Xnp,
                                                            ynp,
                                                            test_size=0.3,
                                                            random_state=42)

        for reg_name in reg_lst:
            print(f'RS on {reg_name}...')

            rs_res_path = os.path.join(rs_path, f'{target}_{reg_name}_rs.json')
            if os.path.isfile(rs_res_path):
                continue

            reg = model_dct[reg_name]
            args = model_args_dct[reg_name]

            rs = RandomizedSearchCV(
                reg(**args['fixed_args']),
                param_distributions=args['tuned_args'],
                n_iter=6,
                scoring=scoring,
                refit='mre',
                n_jobs=-1,
                cv=10,
                random_state=42,
            )
            rs.fit(X_train, y_train)

            rs_res = {
                'regressor': reg_name,
                'best_params': rs.best_params_,
                'best_score_in_rs': rs.best_score_,
            }

            with open(rs_res_path, 'w') as f:
                json.dump(rs_res, f, indent=4)
# Custom defined list of Gaussian Process regression models to be used by TPOT
import numpy as np
import pdb
from itertools import product

from skrvm import RVR

# Define list of Kernels
from sklearn.gaussian_process.kernels import (RBF, Matern, RationalQuadratic,
                                              ExpSineSquared, DotProduct,
                                              ConstantKernel)
# The hyperparameters for the GPR, will be optimised during fitting
kernels = [RBF(), RationalQuadratic(), ExpSineSquared(), Matern()]

tpot_config_gpr = {
    'sklearn.gaussian_process.GaussianProcessRegressor': {
        'kernel': kernels,
        'random_state': [42],
        'alpha': np.arange(1e-2, 10, 30)
    },
    'skrvm.RVR': {
        'kernel': kernels,
        'alpha': [1e-10, 1e-06, 1e-02, 1],
        'beta': [1e-10, 1e-06, 1e-02, 1],
    },
    'sklearn.svm.LinearSVR': {
        'loss': ["epsilon_insensitive", "squared_epsilon_insensitive"],
        'dual': [True, False],
        'random_state': [42],
        'tol': [1e-5, 1e-4, 1e-3, 1e-2, 1e-1],
        'C': [1e-4, 1e-3, 1e-2, 1e-1, 0.5, 1., 5., 10., 15., 20., 25.],
Ejemplo n.º 7
0
# License: BSD 3 clause

import numpy as np
import matplotlib.pyplot as plt

from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import RBF, DotProduct

xx, yy = np.meshgrid(np.linspace(-3, 3, 50), np.linspace(-3, 3, 50))
rng = np.random.RandomState(0)
X = rng.randn(200, 2)
Y = np.logical_xor(X[:, 0] > 0, X[:, 1] > 0)

# fit the model
plt.figure(figsize=(10, 5))
kernels = [1.0 * RBF(length_scale=1.0), 1.0 * DotProduct(sigma_0=1.0)**2]
for i, kernel in enumerate(kernels):
    clf = GaussianProcessClassifier(kernel=kernel, warm_start=True).fit(X, Y)

    # plot the decision function for each datapoint on the grid
    Z = clf.predict_proba(np.vstack((xx.ravel(), yy.ravel())).T)[:, 1]
    Z = Z.reshape(xx.shape)

    plt.subplot(1, 2, i + 1)
    image = plt.imshow(
        Z,
        interpolation="nearest",
        extent=(xx.min(), xx.max(), yy.min(), yy.max()),
        aspect="auto",
        origin="lower",
        cmap=plt.cm.PuOr_r,
Ejemplo n.º 8
0
from sklearn.utils._testing \
    import (assert_array_less,
            assert_almost_equal, assert_array_almost_equal,
            assert_array_equal, assert_allclose)


def f(x):
    return x * np.sin(x)


X = np.atleast_2d([1., 3., 5., 6., 7., 8.]).T
X2 = np.atleast_2d([2., 4., 5.5, 6.5, 7.5]).T
y = f(X).ravel()

fixed_kernel = RBF(length_scale=1.0, length_scale_bounds="fixed")
kernels = [
    RBF(length_scale=1.0), fixed_kernel,
    RBF(length_scale=1.0, length_scale_bounds=(1e-3, 1e3)),
    C(1.0,
      (1e-2, 1e2)) * RBF(length_scale=1.0, length_scale_bounds=(1e-3, 1e3)),
    C(1.0,
      (1e-2, 1e2)) * RBF(length_scale=1.0, length_scale_bounds=(1e-3, 1e3)) +
    C(1e-5, (1e-5, 1e2)),
    C(0.1,
      (1e-2, 1e2)) * RBF(length_scale=1.0, length_scale_bounds=(1e-3, 1e3)) +
    C(1e-5, (1e-5, 1e2))
]
non_fixed_kernels = [kernel for kernel in kernels if kernel != fixed_kernel]

Ejemplo n.º 9
0
def test_no_optimizer():
    # Test that kernel parameters are unmodified when optimizer is None.
    kernel = RBF(1.0)
    gpr = GaussianProcessRegressor(kernel=kernel, optimizer=None).fit(X, y)
    assert np.exp(gpr.kernel_.theta) == 1.0
Ejemplo n.º 10
0
from sklearn.linear_model import RidgeClassifier
from sklearn.metrics import classification_report


x_train, y_train, x_valid, y_valid, x_test, y_test = prepare_data(one_hot=False)

classifiers = [
    GaussianNB(),
    #  RidgeClassifier(tol=1e-2, solver="lsqr"),
    QuadraticDiscriminantAnalysis(),
    LinearDiscriminantAnalysis(),
    DecisionTreeClassifier(max_depth=5),
    KNeighborsClassifier(3, n_jobs=-1),
    RandomForestClassifier(max_depth=5, n_estimators=10, max_features=1, n_jobs=-1),
    AdaBoostClassifier(),
    GradientBoostingClassifier(),
    SVC(kernel="linear", C=0.025, probability=True),
    SVC(gamma=2, C=1, probability=True),
    SVC(kernel="rbf", C=0.025, probability=True),
    MLPClassifier(alpha=1),
    GaussianProcessClassifier(1.0 * RBF(1.0), n_jobs=-1),
]

for clf in classifiers:
    print('_' * 80)
    print(clf.__class__.__name__)
    clf.fit(x_train, y_train)
    print('Train/val/test accuracy: ', clf.score(x_train, y_train), clf.score(x_valid, y_valid), clf.score(x_test, y_test))
    print('Classification report of Test data')
    print(classification_report(y_test, clf.predict(x_test)))
Ejemplo n.º 11
0
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from tqdm import tqdm

classifiers = {
    'SVC': (SVC(), {
        'kernel': ["linear"],
        'C': [0.01]
    }),
    'KNN': (KNeighborsClassifier(), {
        'n_neighbors': [5, 10]
    }),
    'GaussianProcess': (GaussianProcessClassifier(), {
        'kernel': 1.0 * RBF(1.0),
        'warm_start': True
    }),
    'DecisionTree': (DecisionTreeClassifier(), {}),
    'RandomForest': (RandomForestClassifier(), {}),
    'AdaBoost': (AdaBoostClassifier(), {}),
    'GradientBoosting': (GradientBoostingClassifier(), {}),
    'MLP': (MLPClassifier(), {}),
    'NaiveBayes': (GaussianNB(), {}),
    'LDA': (LinearDiscriminantAnalysis(), {}),
}


# noinspection PyPep8Naming
class ClassifierPool(object):
    def __init__(self, classifier_name='SVC', nb_features=1000):
# training_Y.append(0)

# training_X.append([0.5,4.5])
# training_Y.append(0)

# training_X.append([5.5,0.5])
# training_Y.append(0)

# training_X.append([7.5,9.5])
# training_Y.append(0)

print(len(training_X))
print(len(training_Y))

# Specify Gaussian Processes with fixed and optimized hyperparameters
gp_opt = GaussianProcessClassifier(RBF(length_scale=1.0))
gp_opt.fit(training_X,training_Y)
print("The trained hyperparameter are {}".format((gp_opt.kernel_.theta)))
# print("Log Marginal Likelihood (optimized): %.3f"
#       % gp_opt.log_marginal_likelihood(gp_opt.kernel_.theta))


# print("The probability of occupancy is {}".format(p_occ))

fig = plt.figure()
ZZ = np.empty([30,30])
for idx1, row in enumerate(x):
    for idx2,col in enumerate(y):
        K = [row,col]
        if K in training_X:
            ZZ[idx1,idx2] = 0.0
X = np.array(X)
Y = np.sin(2 * np.pi * X)
N = X.shape[0]

alpha = []
for i in range(N):
    alpha_ = 0.01
    alpha.append(alpha_)
alpha = np.array(alpha)

X_plot = X
Y_plot = Y + np.random.normal(0, alpha)
pylab.scatter(X_plot, Y_plot)

kernel = C(1.0, (0.01, 100)) \
    * ManifoldKernel.construct(base_kernel=RBF(length_scale=10), architecture=((1, 6, 2),),
                               transfer_fct="tanh", max_nn_weight=1)
gp = GaussianProcessRegressor(kernel=kernel,
                              alpha=alpha**2,
                              n_restarts_optimizer=1)
'''
kernel = C(1.0) * RBF(length_scale=0.1)
gp = GaussianProcessRegressor(kernel=kernel, alpha=alpha ** 2, n_restarts_optimizer=10)
'''

gp.fit(X[:, None], Y)

XX = np.linspace(-1.5, 1.5, 100)
YY = np.sin(2 * np.pi * XX)

pylab.figure(0, figsize=(10, 8))
 DecisionTreeClassifier(criterion='gini', splitter='best', max_depth=None,
                         min_samples_split=2, min_samples_leaf=1, min_weight_fraction_leaf=0.0,
                         max_features=None, random_state=None, max_leaf_nodes=None,
                         min_impurity_decrease=0.0, min_impurity_split=None,
                         class_weight=None, presort=False),
 KNeighborsClassifier(n_neighbors=2, weights='uniform', algorithm='auto',
                         leaf_size=30, p=2, metric='minkowski', metric_params=None, n_jobs=None),
 MLPClassifier(hidden_layer_sizes=(100, ), activation='relu',
                 solver='adam', alpha=1, batch_size='auto', learning_rate='constant',
                 learning_rate_init=0.001, power_t=0.5, max_iter=1000, shuffle=True,
                 random_state=None, tol=0.0001, verbose=False, warm_start=False,
                 momentum=0.9, nesterovs_momentum=True, early_stopping=False,
                 validation_fraction=0.1, beta_1=0.9, beta_2=0.999, epsilon=1e-08, n_iter_no_change=10),
 AdaBoostClassifier(base_estimator=None, n_estimators=50, learning_rate=1.0,
                     algorithm='SAMME.R', random_state=None),
 GaussianProcessClassifier(kernel=1.0 * RBF(1.0), optimizer='fmin_l_bfgs_b',
                             n_restarts_optimizer=0, max_iter_predict=100,
                             warm_start=False, copy_X_train=True,
                             random_state=None, multi_class='one_vs_rest', n_jobs=None),
 RandomForestClassifier(n_estimators='warn', criterion='gini',
                         max_depth=None, min_samples_split=2, min_samples_leaf=1,
                         min_weight_fraction_leaf=0.0, max_features='auto', max_leaf_nodes=None,
                         min_impurity_decrease=0.0, min_impurity_split=None, bootstrap=True,
                         oob_score=False, n_jobs=None, random_state=None, verbose=0,
                         warm_start=False, class_weight=None),
 SVC(kernel="linear", C=1, degree=3, gamma='auto_deprecated', coef0=0.0,
     shrinking=True, probability=False, tol=0.001, cache_size=200,
     class_weight=None, verbose=False, max_iter=-1,
     decision_function_shape='ovr', random_state=None),
 SVC(C=1.0, kernel='rbf', degree=3, gamma=0.1, coef0=0.0,
     shrinking=True, probability=False, tol=0.001, cache_size=200,
Ejemplo n.º 15
0
y_source = 2 * x_source[:, 0] + 3 * x_source[:, 1] + 1
y_source = y_source + noise_ratio_in_simulation * y_source.std(
) * np.random.rand(len(y_source))
x_target = np.random.rand(number_of_samples, 2)
y_target = 2 * x_target[:, 0] + 4 * x_target[:, 1] + 1
y_target = y_target + noise_ratio_in_simulation * y_target.std(
) * np.random.rand(len(y_target))

np.random.seed()
x_train, x_test, y_train, y_test = train_test_split(
    x_target, y_target, test_size=number_of_test_samples, random_state=0)

fold_number = min(fold_number, len(y_train))

# Gaussian process regression
regression_model = GaussianProcessRegressor(ConstantKernel() * RBF() +
                                            WhiteKernel(),
                                            alpha=0)
model = TransferLearningSample(base_estimator=regression_model,
                               x_source=x_source,
                               y_source=y_source,
                               cv_flag=False)
model.fit(x_train, y_train)

# calculate y in training data
calculated_y_train = model.predict(x_train)
# yy-plot
plt.rcParams['font.size'] = 18  # 横軸や縦軸の名前の文字などのフォントのサイズ
plt.figure(figsize=figure.figaspect(1))
plt.scatter(y_train, calculated_y_train, c='blue')
y_max = np.max(np.array([np.array(y_train), calculated_y_train]))
Ejemplo n.º 16
0
from matplotlib import pyplot as plt
import numpy as np
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF, WhiteKernel

length_scale = 1
noise = .1
kernel = RBF(length_scale=length_scale) + WhiteKernel(noise_level=noise**2)
gp = GaussianProcessRegressor(kernel=kernel, optimizer=None)

x_max = 2
x_min = -2
n_observation = 51

xs = np.zeros((10, n_observation))

xs[0] = (x_max - x_min) * (np.random.rand(n_observation) - .5)
idx = np.argsort(xs[0])
y = gp.sample_y(xs[0]).reshape(-1, 1)

plt.figure()
plt.plot(x[idx], y[idx])
plt.show()
Ejemplo n.º 17
0
import numpy as np
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF, ConstantKernel as C

# Generating sample randomly
data_x = [[4.0 * (-0.5 + random.random()), 4.0 * (-0.5 + random.random())]
          for i in range(200)]
data_y = [[x[0] * math.sin(3.0 * x[1])] for x in data_x]

# Training GPR (Gaussian Process for Regression) so that GPR can map from x to y.
# You can play with different kernels
#kernel= C(1.0, (1e-3, 1e3)) * RBF(1.0, (0.1, 10.0))
#kernel= C(1.0, (1.0, 1.0)) * RBF(1.0, (0.1, 10.0))
#kernel= C(1.0, (1e-3, 1e3)) * RBF(3.0, (3.0, 3.0))
#kernel= RBF(1.0, (0.1, 10.0))
kernel = RBF(3.0, (3.0, 3.0))
gp = GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=9)
gp.fit(data_x, data_y)
f = lambda x: gp.predict([x])[0, 0]

# Now we can compute y=f(x) for any x
print('f([0.0,0.0])=', f([0.0, 0.0]))
print('f([1.0,1.0])=', f([1.0, 1.0]))
print('f([1.5,2.0])=', f([1.5, 2.0]))

#Plot gp.predict(x)
plot, plot3d = PlotF(f, xmin=[-2, -2], xmax=[2, 2], dx=0.1, show=False)
#Plot data points
plot3d.scatter(np.array(data_x).T[0],
               np.array(data_x).T[1],
               data_y,
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import VotingClassifier
from sklearn.linear_model.stochastic_gradient import SGDClassifier
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import RBF
from sklearn.tree import DecisionTreeClassifier
from sklearn import metrics
from sklearn.ensemble import AdaBoostClassifier

clf1 = SVC(probability=False, C=9, gamma=0.15, kernel='rbf')
clf2 = RandomForestClassifier(criterion='gini',
                              n_estimators=34,
                              random_state=12)
clf3 = GaussianProcessClassifier(kernel=1.0 * RBF(1.0),
                                 n_restarts_optimizer=1,
                                 max_iter_predict=50,
                                 random_state=2)

eclf = VotingClassifier(estimators=[('svc', clf1), ('rf', clf2),
                                    ('gpc', clf3)],
                        voting='hard',
                        weights=[2, 5, 2])
# for clf, label in zip([clf1, clf2, clf3, eclf], ['SVC', 'RF', 'GPC', 'Ensemble']):
#     scores = cross_val_score(clf, X, y, cv=10, scoring='accuracy')
#     print("Accuracy: %0.2f (+/- %0.2f) [%s]" % (scores.mean(), scores.std(), label))
eclf4 = eclf.fit(X, y)

print('\n---------Independent test set ----------\n ')
yy_true, yy_pred = yy, eclf4.predict(XX)
Ejemplo n.º 19
0
def main(_):
  num_parallel_thetas = FLAGS.num_parallel_thetas
  num_theta_batches = FLAGS.num_theta_batches
  num_steps_autoencoder = 0 if FLAGS.uniform_weights else TRAINING_STEPS

  input_dim = len(FEATURES)

  training_df = pd.read_csv(FLAGS.training_data_path, header=0, sep=',')
  testing_df = pd.read_csv(FLAGS.testing_data_path, header=0, sep=',')
  validation_df = pd.read_csv(FLAGS.validation_data_path, header=0, sep=',')

  add_price_quantiles(training_df)
  add_price_quantiles(testing_df)
  add_price_quantiles(validation_df)

  train_labels = np.log(training_df['price'])
  validation_labels = np.log(validation_df['price'])
  test_labels = np.log(testing_df['price'])
  train_features = training_df[FEATURES]
  validation_features = validation_df[FEATURES]
  test_features = testing_df[FEATURES]
  validation_price = validation_df['price']
  test_price = testing_df['price']

  tf.reset_default_graph()
  x = tf.placeholder(tf.float32, shape=(None, input_dim), name='x')
  y = tf.placeholder(tf.float32, shape=(None, 1), name='y')

  xy = tf.concat([x, y], axis=1)
  autoencoder_layer1 = tf.layers.dense(
      inputs=xy, units=100, activation=tf.sigmoid)
  autoencoder_embedding_layer = tf.layers.dense(
      inputs=autoencoder_layer1,
      units=FLAGS.embedding_dim,
      activation=tf.sigmoid)
  autoencoder_layer3 = tf.layers.dense(
      inputs=autoencoder_embedding_layer, units=100, activation=tf.sigmoid)
  autoencoder_out_x = tf.layers.dense(
      inputs=autoencoder_layer3, units=input_dim)
  autoencoder_out_y = tf.layers.dense(inputs=autoencoder_layer3, units=1)

  autoencoder_y_loss = tf.losses.mean_squared_error(
      labels=y, predictions=autoencoder_out_y)
  autoencoder_x_loss = tf.losses.mean_squared_error(
      labels=x, predictions=autoencoder_out_x)
  autoencoder_loss = autoencoder_x_loss + autoencoder_y_loss
  autoencoder_optimizer = tf.train.AdamOptimizer(LEARNING_RATE).minimize(
      autoencoder_loss)

  parallel_outputs = []
  parallel_losses = []
  parallel_optimizers = []

  parallel_thetas = tf.placeholder(
      tf.float32,
      shape=(num_parallel_thetas, FLAGS.embedding_dim),
      name='parallel_thetas')
  unstack_parallel_thetas = tf.unstack(parallel_thetas, axis=0)
  embedding = tf.placeholder(
      tf.float32, shape=(None, FLAGS.embedding_dim), name='embedding')

  with tf.variable_scope('regressors'):
    for theta_index in range(num_parallel_thetas):
      output = regressor(x)
      theta = tf.reshape(
          unstack_parallel_thetas[theta_index], shape=[FLAGS.embedding_dim, 1])
      optimizer, loss = optimization(output, y, embedding, theta, LEARNING_RATE)

      parallel_outputs.append(output)
      parallel_losses.append(loss)
      parallel_optimizers.append(optimizer)

  init = tf.global_variables_initializer()
  regressors_init = tf.variables_initializer(
      tf.global_variables(scope='regressors'))

  kernel = RBF(
      length_scale=FLAGS.sampling_radius,
      length_scale_bounds=(FLAGS.sampling_radius * 1e-3, FLAGS.sampling_radius *
                           1e3)) * ConstantKernel(1.0, (1e-3, 1e3))

  thetas = np.zeros(shape=(0, FLAGS.embedding_dim))
  validation_metrics = []
  test_metrics = []

  with tf.Session() as sess:
    sess.run(init)

    # Training autoencoder
    for _ in range(num_steps_autoencoder):
      batch_index = random.sample(range(len(train_labels)), BATCH_SIZE)
      batch_x = train_features.iloc[batch_index, :].values
      batch_y = train_labels.iloc[batch_index].values.reshape(BATCH_SIZE, 1)
      _, _ = sess.run([autoencoder_optimizer, autoencoder_loss],
                      feed_dict={
                          x: batch_x,
                          y: batch_y,
                      })

    # GetCandidatesAlpha (Algorithm 2 in paper)
    for theta_batch_index in range(num_theta_batches):
      sess.run(regressors_init)
      if FLAGS.uniform_weights:
        theta_batch = np.zeros(shape=(num_parallel_thetas, FLAGS.embedding_dim))
      elif theta_batch_index == 0:
        # We first start uniformly.
        theta_batch = sample_from_ball(
            size=(num_parallel_thetas, FLAGS.embedding_dim),
            sampling_radius=FLAGS.sampling_radius)
      else:
        # Use UCB to generate candidates.
        theta_batch = np.zeros(shape=(0, FLAGS.embedding_dim))
        sample_thetas = np.copy(thetas)
        sample_validation_metrics = validation_metrics[:]
        candidates = sample_from_ball(
            size=(10000, FLAGS.embedding_dim),
            sampling_radius=FLAGS.sampling_radius)
        for theta_index in range(num_parallel_thetas):
          gp = GaussianProcessRegressor(
              kernel=kernel, alpha=1e-4).fit(sample_thetas,
                                             sample_validation_metrics)

          metric_mles, metric_stds = gp.predict(candidates, return_std=True)
          metric_lcbs = metric_mles - FLAGS.p_q_value * metric_stds

          best_index = np.argmin(metric_lcbs)
          best_theta = [candidates[best_index]]
          best_theta_metric_ucb = metric_mles[best_index] \
            + FLAGS.p_q_value * metric_stds[best_index]
          theta_batch = np.concatenate([theta_batch, best_theta])

          # Add candidate to the GP, assuming the metric observation is the LCB.
          sample_thetas = np.concatenate([sample_thetas, best_theta])
          sample_validation_metrics.append(best_theta_metric_ucb)

      # Training regressors
      for _ in range(TRAINING_STEPS):
        batch_index = random.sample(range(len(train_labels)), BATCH_SIZE)
        batch_x = train_features.iloc[batch_index, :].values
        batch_y = train_labels.iloc[batch_index].values.reshape(BATCH_SIZE, 1)
        batch_embedding = sess.run(
            autoencoder_embedding_layer, feed_dict={
                x: batch_x,
                y: batch_y,
            })
        _, _ = sess.run(
            [parallel_optimizers, parallel_losses],
            feed_dict={
                x: batch_x,
                y: batch_y,
                embedding: batch_embedding,
                parallel_thetas: theta_batch,
            })

      parallel_validation_outputs = sess.run(
          parallel_outputs,
          feed_dict={
              x: validation_features.values,
              y: validation_labels.values.reshape(len(validation_labels), 1),
          })
      parallel_validation_metrics = [
          metric(validation_labels, validation_output, validation_price)
          for validation_output in parallel_validation_outputs
      ]
      thetas = np.concatenate([thetas, theta_batch])
      validation_metrics.extend(parallel_validation_metrics)

      parallel_test_outputs = sess.run(
          parallel_outputs,
          feed_dict={
              x: test_features.values,
              y: test_labels.values.reshape(len(test_labels), 1),
          })
      parallel_test_metrics = [
          metric(test_labels, test_output, test_price)
          for test_output in parallel_test_outputs
      ]
      test_metrics.extend(parallel_test_metrics)

  best_observed_index = np.argmin(validation_metrics)
  print('[metric] validation={}'.format(
      validation_metrics[best_observed_index]))
  print('[metric] test={}'.format(test_metrics[best_observed_index]))

  return 0
Ejemplo n.º 20
0
    def get_estimator(self, estimator):
        # Classification
        if estimator == "RandomForestClassifier":
            self._learning_type = "classification"
            return RandomForestClassifier(verbose=True)
        elif estimator == "SVC":
            self._learning_type = "classification"
            return SVC(verbose=True)
        elif estimator == "LinearSVC":
            self._learning_type = "classification"
            return LinearSVC(verbose=True)
        elif estimator == "SGDClassifier":
            self._learning_type = "classification"
            return SGDClassifier(verbose=True)
        elif estimator == "KNeighborsClassifier":
            self._learning_type = "classification"
            return KNeighborsClassifier(verbose=True)
        elif estimator == "GaussianProcessClassifier":
            self._learning_type = "classification"
            return GaussianProcessClassifier(1.0 * RBF(1.0))
        elif estimator == "DecisionTreeClassifier":
            self._learning_type = "classification"
            return DecisionTreeClassifier()
        elif estimator == "AdaBoostClassifier":
            self._learning_type = "classification"
            return AdaBoostClassifier()
        elif estimator == "MLPClassifier":
            self._learning_type = "classification"
            return MLPClassifier(verbose=True)
        elif estimator == "RandomForestClassifier":
            self._learning_type = "classification"
            return RandomForestClassifier(verbose=True)
        elif estimator == "QuadraticDiscriminantAnalysis":
            self._learning_type = "classification"
            return QuadraticDiscriminantAnalysis()

        # Regression
        if estimator == "RandomForestRegressor":
            self._learning_type = "regression"
            return RandomForestRegressor(verbose=True)
        elif estimator == "KNeighborsRegressor":
            self._learning_type = "regression"
            return KNeighborsRegressor(verbose=True)
        elif estimator == "MultinomialNB":
            self._learning_type = "regression"
            return MultinomialNB()
        elif estimator == "SVR":
            self._learning_type = "regression"
            return SVR(verbose=True)
        elif estimator == "Lasso":
            self._learning_type = "regression"
            return Lasso()
        elif estimator == "ElasticNet":
            self._learning_type = "regression"
            return ElasticNet()
        elif estimator == "Ridge":
            self._learning_type = "regression"
            return Ridge(alpha=1.0, solver="auto")
        elif estimator == "LogisticRegression":
            self._learning_type = "regression"
            return LogisticRegression(verbose=True)
        elif estimator == "SGDRegressor":
            self._learning_type = "regression"
            return SGDRegressor(verbose=True)
        """ Find Estimator by returning all estimators"""
        if estimator == "classification":
            self._learning_type = "classification"
            estimators = [
                SVC(),
                LinearSVC(),
                SGDClassifier(),
                KNeighborsClassifier(),
                GaussianProcessClassifier(1.0 * RBF(1.0)),
                DecisionTreeClassifier(),
                MLPClassifier()
            ]
            return estimators
        elif estimator == "regression":
            self._learning_type = "regression"
            estimators = [
                RandomForestRegressor(),
                SVR(kernel='linear'),
                KNeighborsRegressor(),
                MultinomialNB(),
                SVR(),
                Lasso(),
                ElasticNet(),
                Ridge(alpha=1.0, solver="auto"),
                LogisticRegression(),
                SGDRegressor()
            ]
            return estimators
Ejemplo n.º 21
0
    def __init__(self, feature_table, labels, model, classes = None, C = 1.0):
        self.feature_table = feature_table
        self.labels = labels
        self.modelname = model 
        self.coef = np.zeros(feature_table.shape[1])
        if (len(feature_table)!=len(labels)):
            raise Exception("Feature table and labels length mismatch!")
        if classes:
            self.classes = classes
        else:
            self.classes = np.unique(labels)
            
        # turn string labels to numeric labels
        self.class_dict = dict(zip(self.classes, range(len(self.classes))))
        self.labels_num = pd.Series(self.labels).map(self.class_dict, na_action='ignore')
#        print(self.labels_num)
#        print(self.labels)
        
        model_names = ["Nearest Neighbors (kNN)", "Linear SVM (LSVM)", "RBF SVM (RBF_SVM)", "Gaussian Process (Gaussian)",
         "Decision Tree (DT)", "Random Forest (RF)", "Neural Net (MLP)", "AdaBoost ('Ada')",
         "Naive Bayes (NB)", "QDA"]

        
        if self.modelname == 'logistic_regression' or self.modelname == 'LR':
            if len(self.classes) == 2: #binomial logistic regression case
                self.model = LogisticRegression(C=C, class_weight=None, dual=False, fit_intercept=True,
                                                intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
                                                penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
                                                verbose=0, warm_start=False)
            else: #Multinomial logistic regression
                self.model = LogisticRegression(C=C, class_weight='balanced', dual=False, fit_intercept=True, 
                                                intercept_scaling=1, max_iter=100, multi_class='multinomial', n_jobs=1,
                                                penalty='l2', random_state=None, solver='newton-cg', tol=0.0001,
                                                verbose=0, warm_start=True)
        elif self.modelname == 'regulized_logistic_regression' or self.modelname == 'RLR':
            if len(self.classes) == 2: #binomial logistic regression case
                self.model = LogisticRegression(C=C, class_weight=None, dual=False, fit_intercept=True,
                                                intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
                                                penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
                                                verbose=0, warm_start=False)
            else: #Multinomial logistic regression
                self.model = LogisticRegression(C=C, class_weight='balanced', dual=False, fit_intercept=True, 
                                                intercept_scaling=1, max_iter=100, multi_class='multinomial', n_jobs=1,
                                                penalty='l2', random_state=None, solver='newton-cg', tol=0.0001,
                                                verbose=0, warm_start=True)
        elif self.modelname in ['decision_tree','DT']:
            self.model = tree.DecisionTreeClassifier(max_depth=5)
            
        elif self.modelname in ['kNN','k-NN','knn']:
            self.model = KNeighborsClassifier(n_neighbors=3)
            
        elif self.modelname in ['linear_svm', 'LSVM']:
            self.model = SVC(kernel="linear", C=0.025)
        
        elif self.modelname in ['rbf_svm', 'RBF_SVM']:
            self.model = SVC(gamma=2, C=1)
        
        elif self.modelname in ['Gaussian', 'gaussian']:
            self.model = GaussianProcessClassifier(1.0 * RBF(1.0))
            
        elif self.modelname in ['RF', 'Random_forest', 'Random_Forest', 'random_forest']:
            self.model = RandomForestClassifier(max_depth=5, n_estimators=10, max_features=1)
            
        elif self.modelname in ['MLP', 'Neural_net']:
            self.model = MLPClassifier(alpha=1, max_iter=1000)
        
        elif self.modelname in ['ADA', 'Ada', 'Adaboost', 'Ada_boost']:
            self.model = AdaBoostClassifier()
            
        elif self.modelname in ['NB', 'naive_bayes','Naive_Bayes','Naive_bayes']:
            self.model = GaussianNB()
        
        elif self.modelname in ['QDA','qda']:
            self.model = QuadraticDiscriminantAnalysis()
                
        else:
            raise Exception("Classifier model un-recognized, current supported models: logistic_regression, decision_tree, kNN, linear_svm, RBF_SVM, Gaussian, Random_Forest, MLP, ADA, naive_bayes, QDA")
Ejemplo n.º 22
0
def f(x):
    """The function to predict."""
    return x * np.sin(x)

train_X = np.atleast_2d(np.linspace(0, 10, 100)).T ###设定训练集大小
train_Y = f(train_X).ravel()
plt_X = np.atleast_2d(np.linspace(0, 10, 1000)).T

###手动实现GP1
gp = GP()
gp.fit(train_X,train_Y)
plt_Y, sigma1 = gp.predict(plt_X)##此处输出sigma为协方差矩阵
sigma1 = sigma1.diagonal()##获取对角线元素

###sklearn GP2
kernel = C(1.0, (1e-3, 1e3)) * RBF(10, (1e-2, 1e2))
gp = GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=9)
gp.fit(train_X, train_Y)
y_pred, sigma2 = gp.predict(plt_X, return_std=True)

####绘图
fig = plt.figure(figsize=(16, 10))

gs=gridspec.GridSpec(2,2)#分为2行2列 
GP1 = plt.subplot(gs[:,0])
GP2 = plt.subplot(gs[:,1])

GP1.plot(plt_X, f(plt_X), 'r:', label=r'$f(x) = x\,\sin(x)$')
GP1.plot(train_X, train_Y, 'r.', markersize=10, label='Observations')
GP1.plot(plt_X, plt_Y, 'b-', label='Prediction')
GP1.fill(np.concatenate([plt_X, plt_X[::-1]]),
Ejemplo n.º 23
0
x = training_data.iloc[:, number_of_y_variables:]
x_for_prediction.columns = x.columns
autoscaled_x = (x - x.mean()) / x.std()
autoscaled_x_for_prediction = (x_for_prediction - x.mean()) / x.std()
autoscaled_y = (y - y.mean()) / y.std()
mean_of_y = y.mean()
std_of_y = y.std()

# Gaussian process regression
estimated_y_for_prediction = np.zeros(
    [x_for_prediction.shape[0], number_of_y_variables])
std_of_estimated_y_for_prediction = np.zeros(
    [x_for_prediction.shape[0], number_of_y_variables])
plt.rcParams['font.size'] = 18
for y_number in range(number_of_y_variables):
    model = GaussianProcessRegressor(ConstantKernel() * RBF() + WhiteKernel())
    model.fit(autoscaled_x, autoscaled_y.iloc[:, y_number])
    estimated_y_for_prediction_tmp, std_of_estimated_y_for_prediction_tmp = model.predict(
        autoscaled_x_for_prediction, return_std=True)
    estimated_y_for_prediction[:, y_number] = estimated_y_for_prediction_tmp
    std_of_estimated_y_for_prediction[:,
                                      y_number] = std_of_estimated_y_for_prediction_tmp

    estimated_y = model.predict(autoscaled_x)
    estimated_y = estimated_y * std_of_y.iloc[y_number] + mean_of_y.iloc[
        y_number]
    plt.figure(figsize=figure.figaspect(1))
    plt.scatter(y.iloc[:, y_number], estimated_y)
    y_max = max(y.iloc[:, y_number].max(), estimated_y.max())
    y_min = min(y.iloc[:, y_number].min(), estimated_y.min())
    plt.plot([y_min - 0.05 * (y_max - y_min), y_max + 0.05 * (y_max - y_min)],
Ejemplo n.º 24
0
def HGPfunc(x,y,plot, h1low, h1high, h2low, h2high):
    y = y.reshape(-1,1)
    x = x.reshape(-1,1)
    if plot:
        plt.plot(x,y,'+')
        plt.xlabel("Pch (dBm)")
        plt.ylabel("SNR (dB)")
        plt.savefig('Adataset.png', dpi=200)
        plt.show()
    n = np.size(x)
    scaler = StandardScaler().fit(y)
    y = scaler.transform(y)
    
    def sqexp(X,Y,k1,k2):
        X = np.atleast_2d(X)
        if Y is None:
            dists = pdist(X / k2, metric='sqeuclidean')
            K = np.exp(-.5 * dists)
            # convert from upper-triangular matrix to square matrix
            K = squareform(K)
            np.fill_diagonal(K, 1)
            # return gradient 
            K_gradient = (K * squareform(dists))[:, :, np.newaxis]
            #K_gradient = (X[:, np.newaxis, :] - X[np.newaxis, :, :]) ** 2 \  # anisotropic case, see https://github.com/scikit-learn/scikit-learn/blob/95d4f0841d57e8b5f6b2a570312e9d832e69debc/sklearn/gaussian_process/kernels.py
            #            / (k2 ** 2)
            #K_gradient *= K[..., np.newaxis]
            return k1*K, K_gradient
        else:
            dists = cdist(X / k2, Y / k2,metric='sqeuclidean')
            K = np.exp(-.5 * dists)
            return k1*K
    # heteroscedastic versions of functions 
    global Kyinvh
    Kyinvh = 0.0
    global Kfh
    Kfh =  0.0 
    def lmlh(params,y,R):
        #print(params)  # show progress of fit
        [k1, k2] = params
        global Kfh
        Kfh = sqexp(x,None,k1,k2**0.5)[0]
        Ky = Kfh + R # calculate initial kernel with noise
        global Kyinvh
        Kyinvh = inv(Ky)
        return -(-0.5*mul(mul(T(y),Kyinvh), y) - 0.5*np.log((det(Ky))) - 0.5*n*np.log(2*np.pi)) # marginal likelihood - (5.8)
    def lmlgh(params,y,R):
        k1, k2 = params
        al = mul(Kyinvh,y)
        dKdk1 = Kfh*(1/k1)
        dKdk2 = sqexp(x,None,k1,k2**0.5)[1].reshape(n,n)
        lmlg1 = -(0.5*np.trace(mul(mul(al,T(al)) - Kyinvh, dKdk1)))
        lmlg2 = -(0.5*np.trace(mul(mul(al,T(al)) - Kyinvh, dKdk2)))
        return np.ndarray((2,), buffer=np.array([lmlg1,lmlg2]), dtype = float)
    def GPRfith(xs,k1,k2,R,Rs):
        Ky = sqexp(x,None,k1,k2**0.5)[0] + R
        Ks = sqexp(xs, x, k1, k2**0.5)
        Kss = sqexp(xs, None, k1, k2)[0]
        L = cholesky(Ky)
        al = solve(T(L), solve(L,y))
        fmst = mul(Ks,al)
        varfmst = np.empty([n,1])
        for i in range(np.size(xs)):
            v = solve(L,T(Ks[:,i]))
            varfmst[i] = Kss[i,i] - mul(T(v),v)  + Rs[i,i]
        lmlopt = -0.5*mul(T(y),al) - np.trace(np.log(L)) - 0.5*n*np.log(2*np.pi)
        #return fmst, varfmst[::-1], lmlopt
        return fmst, varfmst, lmlopt
    def hypopth(y, numrestarts, R):
        numh = 2 # number of hyperparameters in kernel function 
        k1s4 = np.empty([numrestarts,1])
        k2s4 = np.empty([numrestarts,1])
        for i in range(numrestarts):    
            #k1is4 = np.random.uniform(1e-2,1e3)
            #k2is4 = np.random.uniform(1e-1,1e3)
            k1is4 = np.random.uniform(h1low,h1high)
            k2is4 = np.random.uniform(h2low,h2high)
            kis4 = np.ndarray((numh,), buffer=np.array([k1is4,k2is4]), dtype = float)
            s4res = minimize(lmlh,kis4,args=(y,R),method = 'L-BFGS-B',jac=lmlgh,bounds = ((h1low,h1high),(h2low,h2high)),options={'maxiter':1e2})
            step4res = []
            if s4res.success:
                step4res.append(s4res.x)
                print("successful k1:" + str(k1is4))
                print("successful k2: " + str(k2is4))
            else:
                print("error " + str(k1is4))
                print("error " + str(k2is4))
                #raise ValueError(s4res.message)
                #k1is4 = np.random.uniform(1e-2,1e3)
                #k2is4 = np.random.uniform(2e-1,1e3)
                k1is4 = np.random.uniform(h1low,h1high)
                k2is4 = np.random.uniform(h2low,h2high)
                print("error in hypopth() - reinitialising hyperparameters")
                continue 
            k1s4[i] = step4res[0][0]
            k2s4[i] = step4res[0][1]
        lmltest = [lmlh([k1s4[i],k2s4[i]],y,R) for i in range(numrestarts)]
        k1f = k1s4[np.argmin(lmltest)]
        k2f = k2s4[np.argmin(lmltest)]
            #lml(params,y,sig)
        return k1f, k2f
    def hetloopSK(fmst,varfmst,numiters,numrestarts):
        s = 200
        #k1is3, k2is3, k1is4,k2is4  =  np.random.uniform(1e-2,1e2,4)
        MSE = np.empty([numiters,1])
        NLPD = np.empty([numiters,1])
        fmstf = np.empty([numiters,n])
        varfmstf = np.empty([numiters,n])
        lmloptf = np.empty([numiters,1])
        rf = np.empty([numiters,n])
        i = 0
        while i < numiters:        
            
            breakwhile = False
            # Step 2: estimate empirical noise levels z 
            #k1is4,k2is4  = np.random.uniform(1e-2,1e2,2)
            #k1is3, k1is4  =  np.random.uniform(1e-2,1e2,2)
            #k2is3, k2is4  =  np.random.uniform(1e-1,1e2,2)
            k1is3  =  np.random.uniform(h1low,h1high,1)
            k2is3  =  np.random.uniform(h2low,h2high,1)
            z = np.empty([n,1])
            for j in range(n):
                #np.random.seed()
                normdraw = normal(fmst[j], varfmst[j]**0.5, s).reshape(s,1)
                z[j] = np.log((1/s)*0.5*sum((y[j] - normdraw)**2))
                if math.isnan(z[j]): # True for NaN values
                    breakwhile = True
                    break
            if breakwhile:
                print("Nan value in z -- skipping iter "+ str(i))
                i = i + 1
                continue
            #  Step 3: estimate GP2 on D' - (x,z)
            kernel2 = C(k1is3, (h1low,h1high)) * RBF(k2is3, (h2low,h2high)) 
            gpr2 = GaussianProcessRegressor(kernel=kernel2, n_restarts_optimizer = numrestarts, normalize_y=False, alpha=np.var(z))
            
            gpr2.fit(x, z)
            ystar2, sigma2 = gpr2.predict(x, return_std=True )
            sigma2 = (sigma2**2 + 1)**0.5
        # Step 4: train heteroscedastic GP3 using predictive mean of G2 to predict log noise levels r
            r = exp(ystar2)
            R = r*np.identity(n)
            k1s4, k2s4 = hypopth(y,numrestarts,R)
            fmst4, varfmst4, lmlopt4 = GPRfith(x,k1s4,k2s4,R,R)
            # test for convergence 
            MSE[i] = (1/n)*sum(((y-fmst4)**2)/np.var(y))
            #NLPD[i] = sum([(1/n)*(-np.log(norm.pdf(x[j], fmst4[j], varfmst4[j]**0.5))) for j in range(n) ])
            nlpdarg = np.zeros([n,1])
            #nlpdtest = np.zeros([n,1])
            for k in range(n):
                nlpdarg[k] = -np.log10(norm.pdf(x[k], fmst4[k], varfmst4[k]**0.5))
                #nlpdtest[k] = norm.pdf(x[k], fmst4[k], varfmst4[k]**0.5)
            #print("mean NLPD log arg " + str(nlpdtest) )
                #test3[k] = -np.log(norm.pdf(x[k], fmst[k], varfmst[k]**0.5))
            NLPD[i] = sum(nlpdarg)*(1/n)
            print("MSE = " + str(MSE[i]))
            print("NLPD = " + str(NLPD[i]))
            print("finished iteration " + str(i+1))
            fmstf[i,:] = fmst4.reshape(n)
            varfmstf[i,:] = varfmst4.reshape(n)
            lmloptf[i] = lmlopt4
            fmst = fmst4
            varfmst = varfmst4
            rf[i,:] = r.reshape(n)
            #k1is3 = k1s4
            #k2is3 = k2s4
            i = i + 1
        return fmstf,varfmstf, lmloptf, MSE, rf, NLPD #  , NLPD 
    
    numiters = 10
    numrestarts = 20
    
    #kernel1 = C(1.0, (1e-3, 1e3)) * RBF(10, (1e-3, 1e3)) + W(1.0, (1e-5, 1e5))
    #gpr1 = GaussianProcessRegressor(kernel=kernel1, n_restarts_optimizer = 0, normalize_y=True)
    kernel1 = C(1.0, (h1low,h1high)) * RBF(1.0, (h2low,h2high)) 
    gpr1 = GaussianProcessRegressor(kernel=kernel1, n_restarts_optimizer = numrestarts, normalize_y=False, alpha=np.var(y))
    gpr1.fit(x, y)
    ystar1, sigma1 = gpr1.predict(x, return_std=True )
    var1 = (sigma1**2 + np.var(y))
    #sigma1 = np.reshape(sigma1,(np.size(sigma1), 1))

    
    start_time = time.time()
    fmstf,varfmstf, lmlopt, mse, _,NLPD = hetloopSK(ystar1,var1,numiters,numrestarts)
    duration = time.time() - start_time

    ind = numiters - 1
    #ind = 
    fmst4 = fmstf[ind]
    varfmst4 = varfmstf[ind]
    
    sigs4 = varfmst4**0.5
    fmstps4 = fmst4 + sigs4
    fmst4i = scaler.inverse_transform(fmst4)
    fmstps4i = scaler.inverse_transform(fmstps4)
    
    #  ================================ Mutual information transform ===========================================
# =============================================================================
#     MIcalc = False # select whether to calculate MI using Guassian-Hermite quadrature 
#     # import constellation shapes from MATLAB-generated csv files 
#     if MIcalc:  
#         Qam4r = np.genfromtxt(open("qam4r.csv", "r"), delimiter=",", dtype =float)
#         Qam4i = np.genfromtxt(open("qam4i.csv", "r"), delimiter=",", dtype =float)
#         Qam16r = np.genfromtxt(open("qam16r.csv", "r"), delimiter=",", dtype =float)
#         Qam16i = np.genfromtxt(open("qam16i.csv", "r"), delimiter=",", dtype =float)
#         Qam32r = np.genfromtxt(open("qam32r.csv", "r"), delimiter=",", dtype =float)
#         Qam32i = np.genfromtxt(open("qam32i.csv", "r"), delimiter=",", dtype =float)
#         Qam64r = np.genfromtxt(open("qam64r.csv", "r"), delimiter=",", dtype =float)
#         Qam64i = np.genfromtxt(open("qam64i.csv", "r"), delimiter=",", dtype =float)
#         Qam128r = np.genfromtxt(open("qam128r.csv", "r"), delimiter=",", dtype =float)
#         Qam128i = np.genfromtxt(open("qam128i.csv", "r"), delimiter=",", dtype =float)
#         
#         Qam4 = Qam4r + 1j*Qam4i
#         Qam16 = Qam16r + 1j*Qam16i
#         Qam32 = Qam32r + 1j*Qam32i
#         Qam64 = Qam64r + 1j*Qam64i
#         Qam128 = Qam128r + 1j*Qam128i
#         #  ================================ Estimate MI ================================ 
#         # set modulation format order and number of terms used in Gauss-Hermite quadrature
# =============================================================================

#         M = 16
#         L = 6
#         
#         def MIGHquad(SNR):
#             if M == 4:
#                 Ps = np.mean(np.abs(Qam4**2))
#                 X = Qam4
#             elif M == 16:
#                 Ps = np.mean(np.abs(Qam16**2))
#                 X = Qam16
#             elif M == 32:
#                 Ps = np.mean(np.abs(Qam32**2))
#                 X = Qam32
#             elif M == 64:
#                 Ps = np.mean(np.abs(Qam64**2))
#                 X = Qam64
#             elif M == 128:
#                 Ps = np.mean(np.abs(Qam128**2))
#                 X = Qam128
#             else:
#                 print("unrecogised M")
#             sigeff2 = Ps/(10**(SNR/10))
#             Wgh = GHquad(L)[0]
#             Rgh = GHquad(L)[1]
#             sum_out = 0
#             for ii in range(M):
#                 sum_in = 0
#                 for l1 in range(L):      
#                     sum_inn = 0
#                     for l2 in range(L):
#                         sum_exp = 0
#                         for jj in range(M):  
#                             arg_exp = np.linalg.norm(X[ii]-X[jj])**2 + 2*(sigeff2**0.5)*np.real( (Rgh[l1]+1j*Rgh[l2])*(X[ii]-X[jj]));
#                             sum_exp = np.exp(-arg_exp/sigeff2) + sum_exp
#                         sum_inn = Wgh[l2]*np.log2(sum_exp) + sum_inn
#                     sum_in = Wgh[l1]*sum_inn + sum_in
#                 sum_out = sum_in + sum_out
#             return np.log2(M)- (1/(M*np.pi))*sum_out 
#         
#         def findMI(SNR):
#             with multiprocessing.Pool() as pool:
#                 Ixy = pool.map(MIGHquad, SNR) 
#             return Ixy
# =============================================================================
        
    print("HGP fitting duration: " + str(duration)) 
    
    return fmst4i, fmstps4i, lmlopt, mse, NLPD
Ejemplo n.º 25
0
a = np.array(
    args.scalefactors
)  #z_edge[:-1] + np.diff(z_edge)/2 #NH z is now the redshift in the middle of each bin
ini = [inia]
a = np.concatenate([ini, a])
#print  a
wde = np.concatenate([wn, wde])
#print  wde

nb = len(wde)
#defining the baseline -1
base = lambda x: -1 + x - x

# Generation of the Gaussian Process
gp = GaussianProcessRegressor(kernel=RBF(l, (l, l)))

#Fit --> Training
g = gp.fit(a[:, np.newaxis], wde - base(a))

#Plotting points (if log use np.logspace)
a_sampling = np.linspace(inia, enda, ODEsteps)
print a_sampling

#transforming a_sampling in z_sampling
z_sampling = np.zeros(ODEsteps)
for i in range(ODEsteps):
    z_sampling[i] = -1 + 1 / a_sampling[i]
print z_sampling
#Predict points
w_pred, sigma = gp.predict(a_sampling[:, np.newaxis], return_std=True)
Ejemplo n.º 26
0
 def hetloopSK(fmst,varfmst,numiters,numrestarts):
     s = 200
     #k1is3, k2is3, k1is4,k2is4  =  np.random.uniform(1e-2,1e2,4)
     MSE = np.empty([numiters,1])
     NLPD = np.empty([numiters,1])
     fmstf = np.empty([numiters,n])
     varfmstf = np.empty([numiters,n])
     lmloptf = np.empty([numiters,1])
     rf = np.empty([numiters,n])
     i = 0
     while i < numiters:        
         
         breakwhile = False
         # Step 2: estimate empirical noise levels z 
         #k1is4,k2is4  = np.random.uniform(1e-2,1e2,2)
         #k1is3, k1is4  =  np.random.uniform(1e-2,1e2,2)
         #k2is3, k2is4  =  np.random.uniform(1e-1,1e2,2)
         k1is3  =  np.random.uniform(h1low,h1high,1)
         k2is3  =  np.random.uniform(h2low,h2high,1)
         z = np.empty([n,1])
         for j in range(n):
             #np.random.seed()
             normdraw = normal(fmst[j], varfmst[j]**0.5, s).reshape(s,1)
             z[j] = np.log((1/s)*0.5*sum((y[j] - normdraw)**2))
             if math.isnan(z[j]): # True for NaN values
                 breakwhile = True
                 break
         if breakwhile:
             print("Nan value in z -- skipping iter "+ str(i))
             i = i + 1
             continue
         #  Step 3: estimate GP2 on D' - (x,z)
         kernel2 = C(k1is3, (h1low,h1high)) * RBF(k2is3, (h2low,h2high)) 
         gpr2 = GaussianProcessRegressor(kernel=kernel2, n_restarts_optimizer = numrestarts, normalize_y=False, alpha=np.var(z))
         
         gpr2.fit(x, z)
         ystar2, sigma2 = gpr2.predict(x, return_std=True )
         sigma2 = (sigma2**2 + 1)**0.5
     # Step 4: train heteroscedastic GP3 using predictive mean of G2 to predict log noise levels r
         r = exp(ystar2)
         R = r*np.identity(n)
         k1s4, k2s4 = hypopth(y,numrestarts,R)
         fmst4, varfmst4, lmlopt4 = GPRfith(x,k1s4,k2s4,R,R)
         # test for convergence 
         MSE[i] = (1/n)*sum(((y-fmst4)**2)/np.var(y))
         #NLPD[i] = sum([(1/n)*(-np.log(norm.pdf(x[j], fmst4[j], varfmst4[j]**0.5))) for j in range(n) ])
         nlpdarg = np.zeros([n,1])
         #nlpdtest = np.zeros([n,1])
         for k in range(n):
             nlpdarg[k] = -np.log10(norm.pdf(x[k], fmst4[k], varfmst4[k]**0.5))
             #nlpdtest[k] = norm.pdf(x[k], fmst4[k], varfmst4[k]**0.5)
         #print("mean NLPD log arg " + str(nlpdtest) )
             #test3[k] = -np.log(norm.pdf(x[k], fmst[k], varfmst[k]**0.5))
         NLPD[i] = sum(nlpdarg)*(1/n)
         print("MSE = " + str(MSE[i]))
         print("NLPD = " + str(NLPD[i]))
         print("finished iteration " + str(i+1))
         fmstf[i,:] = fmst4.reshape(n)
         varfmstf[i,:] = varfmst4.reshape(n)
         lmloptf[i] = lmlopt4
         fmst = fmst4
         varfmst = varfmst4
         rf[i,:] = r.reshape(n)
         #k1is3 = k1s4
         #k2is3 = k2s4
         i = i + 1
     return fmstf,varfmstf, lmloptf, MSE, rf, NLPD #  , NLPD 
Ejemplo n.º 27
0
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis

names = [
    "Logistic Regression", "Nearest Neighbors", "Linear SVM", "RBF SVM",
    "Gaussian Process", "Decision Tree", "Random Forest", "Neural Net",
    "AdaBoost", "Naive Bayes"
]

classifiers = [
    LogisticRegression(),
    KNeighborsClassifier(3),
    SVC(kernel="linear", C=0.025),
    SVC(gamma=2, C=1),
    GaussianProcessClassifier(1.0 * RBF(1.0)),
    DecisionTreeClassifier(max_depth=5),
    RandomForestClassifier(max_depth=5, n_estimators=10, max_features=1),
    MLPClassifier(alpha=1),
    AdaBoostClassifier(),
    GaussianNB(),
]

#for name, clf in zip(names, classifiers):
#  clf.fit(X_train, y)
# accuracy = round(clf.score(X_train, y) * 100, 2)
# print(name, accuracy)

# In[ ]:

clf = RandomForestClassifier(max_depth=15, n_estimators=15, max_features=5)
Ejemplo n.º 28
0
from sklearn.gaussian_process.kernels import ConstantKernel, RBF

tt = pd.read_csv('immSurvey.csv')
tt.head()

alphas = tt.stanMeansNewSysPooled
sample = tt.textToSend

from sklearn.feature_extraction.text import CountVectorizer
vec = CountVectorizer(ngram_range=(2, 2))
X = vec.fit_transform(sample)
X

pd.DataFrame(X.toarray(), columns=vec.get_feature_names())

from sklearn.model_selection import train_test_split
Xtrain, Xtest, ytrain, ytest = train_test_split(X, alphas,
random_state=1)

rbf = ConstantKernel(1.0) * RBF(length_scale=1.0)
gpr = GaussianProcessRegressor(kernel=rbf, alpha=1e-8)

gpr.fit(Xtrain.toarray(), ytrain)

# Compute posterior predictive mean and covariance
mu_s, cov_s = gpr.predict(Xtest.toarray(), return_cov=True)

#test correlation between test and mus
np.corrcoef(ytest, mu_s)

Ejemplo n.º 29
0
        x_pr_grid,
        B_postsamples,
        T_fwdsamples,
        seed=200)

    jnp.save('plot_files/ccopula_lidar_logpdf_pr{}'.format(x_pr_val),
             logpdf_pr)
    jnp.save('plot_files/ccopula_lidar_logcdf_pr{}'.format(x_pr_val),
             logcdf_pr)

    #Convergence plot
    seed = 200
    T_fwdsamples = 10000
    logcdf_pr_conv, logpdf_pr_conv, pdiff, cdiff = check_convergence_pr_cregression(
        copula_cregression_obj, x, y_pr_grid, x_pr_grid, 1, T_fwdsamples, seed)
    jnp.save('plot_files/ccopula_lidar_pr_pdiff_pr{}'.format(x_pr_val), pdiff)

#Gaussian Process
print('Method: GP')
from sklearn.gaussian_process.kernels import RBF, ConstantKernel, WhiteKernel
from sklearn.gaussian_process import GaussianProcessRegressor

kernel = ConstantKernel() * RBF() + WhiteKernel()
gp = GaussianProcessRegressor(kernel=kernel,
                              n_restarts_optimizer=10,
                              normalize_y=True)
gp.fit(x, y)
mean_gp, std_gp = gp.predict(x_plot.reshape(-1, 1), return_std=True)
jnp.save('plot_files/gp_lidar_mean', mean_gp)
jnp.save('plot_files/gp_lidar_std', std_gp)
Ejemplo n.º 30
0
import numpy as np

from matplotlib import pyplot as plt

from sklearn.metrics.classification import accuracy_score, log_loss
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import RBF

# Generate data
train_size = 50
rng = np.random.RandomState(0)
X = rng.uniform(0, 5, 100)[:, np.newaxis]
y = np.array(X[:, 0] > 2.5, dtype=int)

# Specify Gaussian Processes with fixed and optimized hyperparameters
gp_fix = GaussianProcessClassifier(kernel=1.0 * RBF(length_scale=1.0),
                                   optimizer=None)
gp_fix.fit(X[:train_size], y[:train_size])

gp_opt = GaussianProcessClassifier(kernel=1.0 * RBF(length_scale=1.0))
gp_opt.fit(X[:train_size], y[:train_size])

print("Log Marginal Likelihood (initial): %.3f" %
      gp_fix.log_marginal_likelihood(gp_fix.kernel_.theta))
print("Log Marginal Likelihood (optimized): %.3f" %
      gp_opt.log_marginal_likelihood(gp_opt.kernel_.theta))

print("Accuracy: %.3f (initial) %.3f (optimized)" %
      (accuracy_score(y[:train_size], gp_fix.predict(X[:train_size])),
       accuracy_score(y[:train_size], gp_opt.predict(X[:train_size]))))
print("Log-loss: %.3f (initial) %.3f (optimized)" %