encoded_classe = encoder.transform(classe) #armazenando os dados da sépala na variável X como um arranjo NumPy Xs = np.array(dados[['sepala-comprimento', 'sepala-largura']]) #armazenando os dados da pétala na variável X como um arranjo NumPy Xp = np.array(dados[['petala-comprimento', 'petala-largura']]) #armazenando os dados da classe convertida num arranjo NumPy Y = np.array(encoded_classe, dtype=int) #tamanho do passo na malha h = 0.02 #definindo o Kernel isotrópico kernel = 1.0 * RBF([1.0]) gpc_isotropico_s = GaussianProcessClassifier(kernel=kernel).fit(Xs, Y) gpc_isotropico_p = GaussianProcessClassifier(kernel=kernel).fit(Xp, Y) #definindo o Kernel anisotrópico kernel = 1.0 * RBF([1.0, 1.0]) gpc_anisotropico_s = GaussianProcessClassifier(kernel=kernel).fit(Xs, Y) gpc_anisotropico_p = GaussianProcessClassifier(kernel=kernel).fit(Xp, Y) #Criando a malha para graficar x_min_s = Xs[:, 0].min() - 1 x_max_s = Xs[:, 0].max() + 1 x_min_p = Xp[:, 0].min() - 1 x_max_p = Xp[:, 0].max() + 1
# dsigma = dsigma / norm # sigma_exact = sigma_exact / norm # print("tau: {}".format(tau.shape)) # print("sigma: {}".format(sigma.shape)) # print("tau mesh: {}".format(tau_exact.shape)) # print("sigma exact: {}".format(sigma_exact.shape)) print("\nPreparing for exhaustive GridSearch():") # Set the parameters to hyperoptimize kernel1 = DotProduct(sigma_0=1.0, sigma_0_bounds=(1e-5, 1e5)) kernel2 = ExpSineSquared(length_scale=1.0, periodicity=1.0, length_scale_bounds=(1e-5, 1e5)) kernel3 = Exponentiation( RBF(length_scale=1.0, length_scale_bounds=(1e-2, 1e2)), 2) kernel4 = Matern(length_scale=1.0, length_scale_bounds=(1e-5, 1e5), nu=1.5) kernel5 = PairwiseKernel(gamma=1.0, gamma_bounds=(1e-5, 1e5)) kernel6 = Product(RBF(1.0, (1e-5, 1e5)), Matern(1.0, (1e-5, 1e5), nu=1.5)) kernel7 = RBF(length_scale=1.0, length_scale_bounds=(1e-5, 1e5)) kernel8 = RationalQuadratic(length_scale=1.0, alpha=1.0, length_scale_bounds=(1e-5, 1e5), alpha_bounds=(1e-5, 1e5)) kernel9 = Sum(RBF(1.0, (1e-2, 1e2)), Matern(10, (1e-2, 1e2), nu=1.5)) # List of hyperparameters given to the GridSearchCV() tuned_parameters = [{ "kernel": [ kernel1, kernel2, kernel3, kernel4, kernel5, kernel6, kernel7, kernel8, kernel9 ]
from sklearn.naive_bayes import GaussianNB from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis h = .02 # step size in the mesh names = [ "Nearest Neighbors", "Linear SVM", "RBF SVM", "Gaussian Process", "Decision Tree", "Random Forest", "Neural Net", "AdaBoost", "Naive Bayes", "QDA" ] classifiers = [ KNeighborsClassifier(3), SVC(kernel="linear", C=0.025), SVC(gamma=2, C=1), GaussianProcessClassifier(1.0 * RBF(1.0), warm_start=True), DecisionTreeClassifier(max_depth=5), RandomForestClassifier(max_depth=5, n_estimators=10, max_features=1), MLPClassifier(alpha=1), AdaBoostClassifier(), GaussianNB(), QuadraticDiscriminantAnalysis() ] X, y = make_classification(n_features=2, n_redundant=0, n_informative=2, random_state=1, n_clusters_per_class=1) rng = np.random.RandomState(2) X += 2 * rng.uniform(size=X.shape)
def build_gaussian_model(X_train, y_train): kernel = C(1.0, (1e-3, 1e3)) * RBF(10, (1e-2, 1e2)) model = GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=9) model.fit(X_train, y_train) return model
def random_serach_top_tiers(): """ Perform random search to find the best hyper parameters.""" tc = TopCoder() model_dct = { 'BayesianRidge': BayesianRidge, 'DecisionTreeRegressor': DecisionTreeRegressor, 'GaussianProcessRegressor': GaussianProcessRegressor, 'GradientBoostingRegressor': GradientBoostingRegressor, 'KNeighborsRegressor': KNeighborsRegressor, 'RandomForestRegressor': RandomForestRegressor, 'SVR': SVR, } model_args_dct = { 'BayesianRidge': { 'fixed_args': dict(n_iter=1000), 'tuned_args': dict(tol=[1e-3, 1e-4, 1e-5, 1e-6], ), }, 'DecisionTreeRegressor': { 'fixed_args': dict(random_state=42), 'tuned_args': dict(criterion=['mse', 'mae', 'friedman_mse'], max_depth=[None, 3, 5, 10]), }, 'GaussianProcessRegressor': { 'fixed_args': dict(), 'tuned_args': dict(kernel=[ 1.0 * RBF(), 1.0 * RationalQuadratic(), ConstantKernel() * (DotProduct()**2), DotProduct() * WhiteKernel() ]), }, 'GradientBoostingRegressor': { 'fixed_args': dict(random_state=42, n_iter_no_change=5), 'tuned_args': dict( loss=['ls', 'lad'], n_estimators=[200, 500, 1000, 1500], learning_rate=[0.01, 0.001, 1e-4], tol=[0.01, 0.001, 1e-4, 1e-5, 2e-5, 1e-6], ), }, 'KNeighborsRegressor': { 'fixed_args': dict(n_jobs=-1), 'tuned_args': dict( n_neighbors=[5, 10, 15, 20], weights=['uniform', 'distance'], algorithm=['ball_tree', 'kd_tree'], leaf_size=[30, 60, 100], ), }, 'RandomForestRegressor': { 'fixed_args': dict(n_jobs=-1, verbose=1, random_state=42, bootstrap=True), 'tuned_args': dict( n_estimators=[100, 200, 500, 1000], max_features=['auto', 'sqrt', 0.333], criterion=['mae', 'mse'], ), }, 'SVR': { 'fixed_args': dict(cache_size=15000), 'tuned_args': [ dict(kernel=['rbf'], gamma=['scale', 'auto'], C=[1, 10, 100, 1000]), dict(kernel=['linear'], C=[1, 10, 100, 1000]), dict(kernel=['poly'], degree=[2, 3, 5], coef0=[0, 0.5, 5, 50, 100], C=[1, 10, 100, 1000]), ], }, } scoring = { 'mae': make_scorer(mean_absolute_error, greater_is_better=False), 'mre': make_scorer(mre, greater_is_better=False), } rs_path = os.path.join(os.curdir, 'result', 'random_search_res') with open( os.path.join(os.curdir, 'result', 'simple_regression', 'top4_reg_dct.json')) as f: top_regs_dct = { target: list(metrics.keys()) for target, metrics in json.load(f).items() if target != 'price' } for target, reg_lst in top_regs_dct.items(): print(f'{target} | Random Searching....') X, y = tc.build_final_dataset(target) Xnp, ynp = X.to_numpy(), y.to_numpy() X_train, X_test, y_train, y_test = train_test_split(Xnp, ynp, test_size=0.3, random_state=42) for reg_name in reg_lst: print(f'RS on {reg_name}...') rs_res_path = os.path.join(rs_path, f'{target}_{reg_name}_rs.json') if os.path.isfile(rs_res_path): continue reg = model_dct[reg_name] args = model_args_dct[reg_name] rs = RandomizedSearchCV( reg(**args['fixed_args']), param_distributions=args['tuned_args'], n_iter=6, scoring=scoring, refit='mre', n_jobs=-1, cv=10, random_state=42, ) rs.fit(X_train, y_train) rs_res = { 'regressor': reg_name, 'best_params': rs.best_params_, 'best_score_in_rs': rs.best_score_, } with open(rs_res_path, 'w') as f: json.dump(rs_res, f, indent=4)
# Custom defined list of Gaussian Process regression models to be used by TPOT import numpy as np import pdb from itertools import product from skrvm import RVR # Define list of Kernels from sklearn.gaussian_process.kernels import (RBF, Matern, RationalQuadratic, ExpSineSquared, DotProduct, ConstantKernel) # The hyperparameters for the GPR, will be optimised during fitting kernels = [RBF(), RationalQuadratic(), ExpSineSquared(), Matern()] tpot_config_gpr = { 'sklearn.gaussian_process.GaussianProcessRegressor': { 'kernel': kernels, 'random_state': [42], 'alpha': np.arange(1e-2, 10, 30) }, 'skrvm.RVR': { 'kernel': kernels, 'alpha': [1e-10, 1e-06, 1e-02, 1], 'beta': [1e-10, 1e-06, 1e-02, 1], }, 'sklearn.svm.LinearSVR': { 'loss': ["epsilon_insensitive", "squared_epsilon_insensitive"], 'dual': [True, False], 'random_state': [42], 'tol': [1e-5, 1e-4, 1e-3, 1e-2, 1e-1], 'C': [1e-4, 1e-3, 1e-2, 1e-1, 0.5, 1., 5., 10., 15., 20., 25.],
# License: BSD 3 clause import numpy as np import matplotlib.pyplot as plt from sklearn.gaussian_process import GaussianProcessClassifier from sklearn.gaussian_process.kernels import RBF, DotProduct xx, yy = np.meshgrid(np.linspace(-3, 3, 50), np.linspace(-3, 3, 50)) rng = np.random.RandomState(0) X = rng.randn(200, 2) Y = np.logical_xor(X[:, 0] > 0, X[:, 1] > 0) # fit the model plt.figure(figsize=(10, 5)) kernels = [1.0 * RBF(length_scale=1.0), 1.0 * DotProduct(sigma_0=1.0)**2] for i, kernel in enumerate(kernels): clf = GaussianProcessClassifier(kernel=kernel, warm_start=True).fit(X, Y) # plot the decision function for each datapoint on the grid Z = clf.predict_proba(np.vstack((xx.ravel(), yy.ravel())).T)[:, 1] Z = Z.reshape(xx.shape) plt.subplot(1, 2, i + 1) image = plt.imshow( Z, interpolation="nearest", extent=(xx.min(), xx.max(), yy.min(), yy.max()), aspect="auto", origin="lower", cmap=plt.cm.PuOr_r,
from sklearn.utils._testing \ import (assert_array_less, assert_almost_equal, assert_array_almost_equal, assert_array_equal, assert_allclose) def f(x): return x * np.sin(x) X = np.atleast_2d([1., 3., 5., 6., 7., 8.]).T X2 = np.atleast_2d([2., 4., 5.5, 6.5, 7.5]).T y = f(X).ravel() fixed_kernel = RBF(length_scale=1.0, length_scale_bounds="fixed") kernels = [ RBF(length_scale=1.0), fixed_kernel, RBF(length_scale=1.0, length_scale_bounds=(1e-3, 1e3)), C(1.0, (1e-2, 1e2)) * RBF(length_scale=1.0, length_scale_bounds=(1e-3, 1e3)), C(1.0, (1e-2, 1e2)) * RBF(length_scale=1.0, length_scale_bounds=(1e-3, 1e3)) + C(1e-5, (1e-5, 1e2)), C(0.1, (1e-2, 1e2)) * RBF(length_scale=1.0, length_scale_bounds=(1e-3, 1e3)) + C(1e-5, (1e-5, 1e2)) ] non_fixed_kernels = [kernel for kernel in kernels if kernel != fixed_kernel]
def test_no_optimizer(): # Test that kernel parameters are unmodified when optimizer is None. kernel = RBF(1.0) gpr = GaussianProcessRegressor(kernel=kernel, optimizer=None).fit(X, y) assert np.exp(gpr.kernel_.theta) == 1.0
from sklearn.linear_model import RidgeClassifier from sklearn.metrics import classification_report x_train, y_train, x_valid, y_valid, x_test, y_test = prepare_data(one_hot=False) classifiers = [ GaussianNB(), # RidgeClassifier(tol=1e-2, solver="lsqr"), QuadraticDiscriminantAnalysis(), LinearDiscriminantAnalysis(), DecisionTreeClassifier(max_depth=5), KNeighborsClassifier(3, n_jobs=-1), RandomForestClassifier(max_depth=5, n_estimators=10, max_features=1, n_jobs=-1), AdaBoostClassifier(), GradientBoostingClassifier(), SVC(kernel="linear", C=0.025, probability=True), SVC(gamma=2, C=1, probability=True), SVC(kernel="rbf", C=0.025, probability=True), MLPClassifier(alpha=1), GaussianProcessClassifier(1.0 * RBF(1.0), n_jobs=-1), ] for clf in classifiers: print('_' * 80) print(clf.__class__.__name__) clf.fit(x_train, y_train) print('Train/val/test accuracy: ', clf.score(x_train, y_train), clf.score(x_valid, y_valid), clf.score(x_test, y_test)) print('Classification report of Test data') print(classification_report(y_test, clf.predict(x_test)))
from sklearn.neural_network import MLPClassifier from sklearn.preprocessing import StandardScaler from sklearn.svm import SVC from sklearn.tree import DecisionTreeClassifier from tqdm import tqdm classifiers = { 'SVC': (SVC(), { 'kernel': ["linear"], 'C': [0.01] }), 'KNN': (KNeighborsClassifier(), { 'n_neighbors': [5, 10] }), 'GaussianProcess': (GaussianProcessClassifier(), { 'kernel': 1.0 * RBF(1.0), 'warm_start': True }), 'DecisionTree': (DecisionTreeClassifier(), {}), 'RandomForest': (RandomForestClassifier(), {}), 'AdaBoost': (AdaBoostClassifier(), {}), 'GradientBoosting': (GradientBoostingClassifier(), {}), 'MLP': (MLPClassifier(), {}), 'NaiveBayes': (GaussianNB(), {}), 'LDA': (LinearDiscriminantAnalysis(), {}), } # noinspection PyPep8Naming class ClassifierPool(object): def __init__(self, classifier_name='SVC', nb_features=1000):
# training_Y.append(0) # training_X.append([0.5,4.5]) # training_Y.append(0) # training_X.append([5.5,0.5]) # training_Y.append(0) # training_X.append([7.5,9.5]) # training_Y.append(0) print(len(training_X)) print(len(training_Y)) # Specify Gaussian Processes with fixed and optimized hyperparameters gp_opt = GaussianProcessClassifier(RBF(length_scale=1.0)) gp_opt.fit(training_X,training_Y) print("The trained hyperparameter are {}".format((gp_opt.kernel_.theta))) # print("Log Marginal Likelihood (optimized): %.3f" # % gp_opt.log_marginal_likelihood(gp_opt.kernel_.theta)) # print("The probability of occupancy is {}".format(p_occ)) fig = plt.figure() ZZ = np.empty([30,30]) for idx1, row in enumerate(x): for idx2,col in enumerate(y): K = [row,col] if K in training_X: ZZ[idx1,idx2] = 0.0
X = np.array(X) Y = np.sin(2 * np.pi * X) N = X.shape[0] alpha = [] for i in range(N): alpha_ = 0.01 alpha.append(alpha_) alpha = np.array(alpha) X_plot = X Y_plot = Y + np.random.normal(0, alpha) pylab.scatter(X_plot, Y_plot) kernel = C(1.0, (0.01, 100)) \ * ManifoldKernel.construct(base_kernel=RBF(length_scale=10), architecture=((1, 6, 2),), transfer_fct="tanh", max_nn_weight=1) gp = GaussianProcessRegressor(kernel=kernel, alpha=alpha**2, n_restarts_optimizer=1) ''' kernel = C(1.0) * RBF(length_scale=0.1) gp = GaussianProcessRegressor(kernel=kernel, alpha=alpha ** 2, n_restarts_optimizer=10) ''' gp.fit(X[:, None], Y) XX = np.linspace(-1.5, 1.5, 100) YY = np.sin(2 * np.pi * XX) pylab.figure(0, figsize=(10, 8))
DecisionTreeClassifier(criterion='gini', splitter='best', max_depth=None, min_samples_split=2, min_samples_leaf=1, min_weight_fraction_leaf=0.0, max_features=None, random_state=None, max_leaf_nodes=None, min_impurity_decrease=0.0, min_impurity_split=None, class_weight=None, presort=False), KNeighborsClassifier(n_neighbors=2, weights='uniform', algorithm='auto', leaf_size=30, p=2, metric='minkowski', metric_params=None, n_jobs=None), MLPClassifier(hidden_layer_sizes=(100, ), activation='relu', solver='adam', alpha=1, batch_size='auto', learning_rate='constant', learning_rate_init=0.001, power_t=0.5, max_iter=1000, shuffle=True, random_state=None, tol=0.0001, verbose=False, warm_start=False, momentum=0.9, nesterovs_momentum=True, early_stopping=False, validation_fraction=0.1, beta_1=0.9, beta_2=0.999, epsilon=1e-08, n_iter_no_change=10), AdaBoostClassifier(base_estimator=None, n_estimators=50, learning_rate=1.0, algorithm='SAMME.R', random_state=None), GaussianProcessClassifier(kernel=1.0 * RBF(1.0), optimizer='fmin_l_bfgs_b', n_restarts_optimizer=0, max_iter_predict=100, warm_start=False, copy_X_train=True, random_state=None, multi_class='one_vs_rest', n_jobs=None), RandomForestClassifier(n_estimators='warn', criterion='gini', max_depth=None, min_samples_split=2, min_samples_leaf=1, min_weight_fraction_leaf=0.0, max_features='auto', max_leaf_nodes=None, min_impurity_decrease=0.0, min_impurity_split=None, bootstrap=True, oob_score=False, n_jobs=None, random_state=None, verbose=0, warm_start=False, class_weight=None), SVC(kernel="linear", C=1, degree=3, gamma='auto_deprecated', coef0=0.0, shrinking=True, probability=False, tol=0.001, cache_size=200, class_weight=None, verbose=False, max_iter=-1, decision_function_shape='ovr', random_state=None), SVC(C=1.0, kernel='rbf', degree=3, gamma=0.1, coef0=0.0, shrinking=True, probability=False, tol=0.001, cache_size=200,
y_source = 2 * x_source[:, 0] + 3 * x_source[:, 1] + 1 y_source = y_source + noise_ratio_in_simulation * y_source.std( ) * np.random.rand(len(y_source)) x_target = np.random.rand(number_of_samples, 2) y_target = 2 * x_target[:, 0] + 4 * x_target[:, 1] + 1 y_target = y_target + noise_ratio_in_simulation * y_target.std( ) * np.random.rand(len(y_target)) np.random.seed() x_train, x_test, y_train, y_test = train_test_split( x_target, y_target, test_size=number_of_test_samples, random_state=0) fold_number = min(fold_number, len(y_train)) # Gaussian process regression regression_model = GaussianProcessRegressor(ConstantKernel() * RBF() + WhiteKernel(), alpha=0) model = TransferLearningSample(base_estimator=regression_model, x_source=x_source, y_source=y_source, cv_flag=False) model.fit(x_train, y_train) # calculate y in training data calculated_y_train = model.predict(x_train) # yy-plot plt.rcParams['font.size'] = 18 # 横軸や縦軸の名前の文字などのフォントのサイズ plt.figure(figsize=figure.figaspect(1)) plt.scatter(y_train, calculated_y_train, c='blue') y_max = np.max(np.array([np.array(y_train), calculated_y_train]))
from matplotlib import pyplot as plt import numpy as np from sklearn.gaussian_process import GaussianProcessRegressor from sklearn.gaussian_process.kernels import RBF, WhiteKernel length_scale = 1 noise = .1 kernel = RBF(length_scale=length_scale) + WhiteKernel(noise_level=noise**2) gp = GaussianProcessRegressor(kernel=kernel, optimizer=None) x_max = 2 x_min = -2 n_observation = 51 xs = np.zeros((10, n_observation)) xs[0] = (x_max - x_min) * (np.random.rand(n_observation) - .5) idx = np.argsort(xs[0]) y = gp.sample_y(xs[0]).reshape(-1, 1) plt.figure() plt.plot(x[idx], y[idx]) plt.show()
import numpy as np from sklearn.gaussian_process import GaussianProcessRegressor from sklearn.gaussian_process.kernels import RBF, ConstantKernel as C # Generating sample randomly data_x = [[4.0 * (-0.5 + random.random()), 4.0 * (-0.5 + random.random())] for i in range(200)] data_y = [[x[0] * math.sin(3.0 * x[1])] for x in data_x] # Training GPR (Gaussian Process for Regression) so that GPR can map from x to y. # You can play with different kernels #kernel= C(1.0, (1e-3, 1e3)) * RBF(1.0, (0.1, 10.0)) #kernel= C(1.0, (1.0, 1.0)) * RBF(1.0, (0.1, 10.0)) #kernel= C(1.0, (1e-3, 1e3)) * RBF(3.0, (3.0, 3.0)) #kernel= RBF(1.0, (0.1, 10.0)) kernel = RBF(3.0, (3.0, 3.0)) gp = GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=9) gp.fit(data_x, data_y) f = lambda x: gp.predict([x])[0, 0] # Now we can compute y=f(x) for any x print('f([0.0,0.0])=', f([0.0, 0.0])) print('f([1.0,1.0])=', f([1.0, 1.0])) print('f([1.5,2.0])=', f([1.5, 2.0])) #Plot gp.predict(x) plot, plot3d = PlotF(f, xmin=[-2, -2], xmax=[2, 2], dx=0.1, show=False) #Plot data points plot3d.scatter(np.array(data_x).T[0], np.array(data_x).T[1], data_y,
from sklearn.neighbors import KNeighborsClassifier from sklearn.svm import SVC from sklearn.ensemble import RandomForestClassifier from sklearn.ensemble import VotingClassifier from sklearn.linear_model.stochastic_gradient import SGDClassifier from sklearn.gaussian_process import GaussianProcessClassifier from sklearn.gaussian_process.kernels import RBF from sklearn.tree import DecisionTreeClassifier from sklearn import metrics from sklearn.ensemble import AdaBoostClassifier clf1 = SVC(probability=False, C=9, gamma=0.15, kernel='rbf') clf2 = RandomForestClassifier(criterion='gini', n_estimators=34, random_state=12) clf3 = GaussianProcessClassifier(kernel=1.0 * RBF(1.0), n_restarts_optimizer=1, max_iter_predict=50, random_state=2) eclf = VotingClassifier(estimators=[('svc', clf1), ('rf', clf2), ('gpc', clf3)], voting='hard', weights=[2, 5, 2]) # for clf, label in zip([clf1, clf2, clf3, eclf], ['SVC', 'RF', 'GPC', 'Ensemble']): # scores = cross_val_score(clf, X, y, cv=10, scoring='accuracy') # print("Accuracy: %0.2f (+/- %0.2f) [%s]" % (scores.mean(), scores.std(), label)) eclf4 = eclf.fit(X, y) print('\n---------Independent test set ----------\n ') yy_true, yy_pred = yy, eclf4.predict(XX)
def main(_): num_parallel_thetas = FLAGS.num_parallel_thetas num_theta_batches = FLAGS.num_theta_batches num_steps_autoencoder = 0 if FLAGS.uniform_weights else TRAINING_STEPS input_dim = len(FEATURES) training_df = pd.read_csv(FLAGS.training_data_path, header=0, sep=',') testing_df = pd.read_csv(FLAGS.testing_data_path, header=0, sep=',') validation_df = pd.read_csv(FLAGS.validation_data_path, header=0, sep=',') add_price_quantiles(training_df) add_price_quantiles(testing_df) add_price_quantiles(validation_df) train_labels = np.log(training_df['price']) validation_labels = np.log(validation_df['price']) test_labels = np.log(testing_df['price']) train_features = training_df[FEATURES] validation_features = validation_df[FEATURES] test_features = testing_df[FEATURES] validation_price = validation_df['price'] test_price = testing_df['price'] tf.reset_default_graph() x = tf.placeholder(tf.float32, shape=(None, input_dim), name='x') y = tf.placeholder(tf.float32, shape=(None, 1), name='y') xy = tf.concat([x, y], axis=1) autoencoder_layer1 = tf.layers.dense( inputs=xy, units=100, activation=tf.sigmoid) autoencoder_embedding_layer = tf.layers.dense( inputs=autoencoder_layer1, units=FLAGS.embedding_dim, activation=tf.sigmoid) autoencoder_layer3 = tf.layers.dense( inputs=autoencoder_embedding_layer, units=100, activation=tf.sigmoid) autoencoder_out_x = tf.layers.dense( inputs=autoencoder_layer3, units=input_dim) autoencoder_out_y = tf.layers.dense(inputs=autoencoder_layer3, units=1) autoencoder_y_loss = tf.losses.mean_squared_error( labels=y, predictions=autoencoder_out_y) autoencoder_x_loss = tf.losses.mean_squared_error( labels=x, predictions=autoencoder_out_x) autoencoder_loss = autoencoder_x_loss + autoencoder_y_loss autoencoder_optimizer = tf.train.AdamOptimizer(LEARNING_RATE).minimize( autoencoder_loss) parallel_outputs = [] parallel_losses = [] parallel_optimizers = [] parallel_thetas = tf.placeholder( tf.float32, shape=(num_parallel_thetas, FLAGS.embedding_dim), name='parallel_thetas') unstack_parallel_thetas = tf.unstack(parallel_thetas, axis=0) embedding = tf.placeholder( tf.float32, shape=(None, FLAGS.embedding_dim), name='embedding') with tf.variable_scope('regressors'): for theta_index in range(num_parallel_thetas): output = regressor(x) theta = tf.reshape( unstack_parallel_thetas[theta_index], shape=[FLAGS.embedding_dim, 1]) optimizer, loss = optimization(output, y, embedding, theta, LEARNING_RATE) parallel_outputs.append(output) parallel_losses.append(loss) parallel_optimizers.append(optimizer) init = tf.global_variables_initializer() regressors_init = tf.variables_initializer( tf.global_variables(scope='regressors')) kernel = RBF( length_scale=FLAGS.sampling_radius, length_scale_bounds=(FLAGS.sampling_radius * 1e-3, FLAGS.sampling_radius * 1e3)) * ConstantKernel(1.0, (1e-3, 1e3)) thetas = np.zeros(shape=(0, FLAGS.embedding_dim)) validation_metrics = [] test_metrics = [] with tf.Session() as sess: sess.run(init) # Training autoencoder for _ in range(num_steps_autoencoder): batch_index = random.sample(range(len(train_labels)), BATCH_SIZE) batch_x = train_features.iloc[batch_index, :].values batch_y = train_labels.iloc[batch_index].values.reshape(BATCH_SIZE, 1) _, _ = sess.run([autoencoder_optimizer, autoencoder_loss], feed_dict={ x: batch_x, y: batch_y, }) # GetCandidatesAlpha (Algorithm 2 in paper) for theta_batch_index in range(num_theta_batches): sess.run(regressors_init) if FLAGS.uniform_weights: theta_batch = np.zeros(shape=(num_parallel_thetas, FLAGS.embedding_dim)) elif theta_batch_index == 0: # We first start uniformly. theta_batch = sample_from_ball( size=(num_parallel_thetas, FLAGS.embedding_dim), sampling_radius=FLAGS.sampling_radius) else: # Use UCB to generate candidates. theta_batch = np.zeros(shape=(0, FLAGS.embedding_dim)) sample_thetas = np.copy(thetas) sample_validation_metrics = validation_metrics[:] candidates = sample_from_ball( size=(10000, FLAGS.embedding_dim), sampling_radius=FLAGS.sampling_radius) for theta_index in range(num_parallel_thetas): gp = GaussianProcessRegressor( kernel=kernel, alpha=1e-4).fit(sample_thetas, sample_validation_metrics) metric_mles, metric_stds = gp.predict(candidates, return_std=True) metric_lcbs = metric_mles - FLAGS.p_q_value * metric_stds best_index = np.argmin(metric_lcbs) best_theta = [candidates[best_index]] best_theta_metric_ucb = metric_mles[best_index] \ + FLAGS.p_q_value * metric_stds[best_index] theta_batch = np.concatenate([theta_batch, best_theta]) # Add candidate to the GP, assuming the metric observation is the LCB. sample_thetas = np.concatenate([sample_thetas, best_theta]) sample_validation_metrics.append(best_theta_metric_ucb) # Training regressors for _ in range(TRAINING_STEPS): batch_index = random.sample(range(len(train_labels)), BATCH_SIZE) batch_x = train_features.iloc[batch_index, :].values batch_y = train_labels.iloc[batch_index].values.reshape(BATCH_SIZE, 1) batch_embedding = sess.run( autoencoder_embedding_layer, feed_dict={ x: batch_x, y: batch_y, }) _, _ = sess.run( [parallel_optimizers, parallel_losses], feed_dict={ x: batch_x, y: batch_y, embedding: batch_embedding, parallel_thetas: theta_batch, }) parallel_validation_outputs = sess.run( parallel_outputs, feed_dict={ x: validation_features.values, y: validation_labels.values.reshape(len(validation_labels), 1), }) parallel_validation_metrics = [ metric(validation_labels, validation_output, validation_price) for validation_output in parallel_validation_outputs ] thetas = np.concatenate([thetas, theta_batch]) validation_metrics.extend(parallel_validation_metrics) parallel_test_outputs = sess.run( parallel_outputs, feed_dict={ x: test_features.values, y: test_labels.values.reshape(len(test_labels), 1), }) parallel_test_metrics = [ metric(test_labels, test_output, test_price) for test_output in parallel_test_outputs ] test_metrics.extend(parallel_test_metrics) best_observed_index = np.argmin(validation_metrics) print('[metric] validation={}'.format( validation_metrics[best_observed_index])) print('[metric] test={}'.format(test_metrics[best_observed_index])) return 0
def get_estimator(self, estimator): # Classification if estimator == "RandomForestClassifier": self._learning_type = "classification" return RandomForestClassifier(verbose=True) elif estimator == "SVC": self._learning_type = "classification" return SVC(verbose=True) elif estimator == "LinearSVC": self._learning_type = "classification" return LinearSVC(verbose=True) elif estimator == "SGDClassifier": self._learning_type = "classification" return SGDClassifier(verbose=True) elif estimator == "KNeighborsClassifier": self._learning_type = "classification" return KNeighborsClassifier(verbose=True) elif estimator == "GaussianProcessClassifier": self._learning_type = "classification" return GaussianProcessClassifier(1.0 * RBF(1.0)) elif estimator == "DecisionTreeClassifier": self._learning_type = "classification" return DecisionTreeClassifier() elif estimator == "AdaBoostClassifier": self._learning_type = "classification" return AdaBoostClassifier() elif estimator == "MLPClassifier": self._learning_type = "classification" return MLPClassifier(verbose=True) elif estimator == "RandomForestClassifier": self._learning_type = "classification" return RandomForestClassifier(verbose=True) elif estimator == "QuadraticDiscriminantAnalysis": self._learning_type = "classification" return QuadraticDiscriminantAnalysis() # Regression if estimator == "RandomForestRegressor": self._learning_type = "regression" return RandomForestRegressor(verbose=True) elif estimator == "KNeighborsRegressor": self._learning_type = "regression" return KNeighborsRegressor(verbose=True) elif estimator == "MultinomialNB": self._learning_type = "regression" return MultinomialNB() elif estimator == "SVR": self._learning_type = "regression" return SVR(verbose=True) elif estimator == "Lasso": self._learning_type = "regression" return Lasso() elif estimator == "ElasticNet": self._learning_type = "regression" return ElasticNet() elif estimator == "Ridge": self._learning_type = "regression" return Ridge(alpha=1.0, solver="auto") elif estimator == "LogisticRegression": self._learning_type = "regression" return LogisticRegression(verbose=True) elif estimator == "SGDRegressor": self._learning_type = "regression" return SGDRegressor(verbose=True) """ Find Estimator by returning all estimators""" if estimator == "classification": self._learning_type = "classification" estimators = [ SVC(), LinearSVC(), SGDClassifier(), KNeighborsClassifier(), GaussianProcessClassifier(1.0 * RBF(1.0)), DecisionTreeClassifier(), MLPClassifier() ] return estimators elif estimator == "regression": self._learning_type = "regression" estimators = [ RandomForestRegressor(), SVR(kernel='linear'), KNeighborsRegressor(), MultinomialNB(), SVR(), Lasso(), ElasticNet(), Ridge(alpha=1.0, solver="auto"), LogisticRegression(), SGDRegressor() ] return estimators
def __init__(self, feature_table, labels, model, classes = None, C = 1.0): self.feature_table = feature_table self.labels = labels self.modelname = model self.coef = np.zeros(feature_table.shape[1]) if (len(feature_table)!=len(labels)): raise Exception("Feature table and labels length mismatch!") if classes: self.classes = classes else: self.classes = np.unique(labels) # turn string labels to numeric labels self.class_dict = dict(zip(self.classes, range(len(self.classes)))) self.labels_num = pd.Series(self.labels).map(self.class_dict, na_action='ignore') # print(self.labels_num) # print(self.labels) model_names = ["Nearest Neighbors (kNN)", "Linear SVM (LSVM)", "RBF SVM (RBF_SVM)", "Gaussian Process (Gaussian)", "Decision Tree (DT)", "Random Forest (RF)", "Neural Net (MLP)", "AdaBoost ('Ada')", "Naive Bayes (NB)", "QDA"] if self.modelname == 'logistic_regression' or self.modelname == 'LR': if len(self.classes) == 2: #binomial logistic regression case self.model = LogisticRegression(C=C, class_weight=None, dual=False, fit_intercept=True, intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1, penalty='l2', random_state=None, solver='liblinear', tol=0.0001, verbose=0, warm_start=False) else: #Multinomial logistic regression self.model = LogisticRegression(C=C, class_weight='balanced', dual=False, fit_intercept=True, intercept_scaling=1, max_iter=100, multi_class='multinomial', n_jobs=1, penalty='l2', random_state=None, solver='newton-cg', tol=0.0001, verbose=0, warm_start=True) elif self.modelname == 'regulized_logistic_regression' or self.modelname == 'RLR': if len(self.classes) == 2: #binomial logistic regression case self.model = LogisticRegression(C=C, class_weight=None, dual=False, fit_intercept=True, intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1, penalty='l2', random_state=None, solver='liblinear', tol=0.0001, verbose=0, warm_start=False) else: #Multinomial logistic regression self.model = LogisticRegression(C=C, class_weight='balanced', dual=False, fit_intercept=True, intercept_scaling=1, max_iter=100, multi_class='multinomial', n_jobs=1, penalty='l2', random_state=None, solver='newton-cg', tol=0.0001, verbose=0, warm_start=True) elif self.modelname in ['decision_tree','DT']: self.model = tree.DecisionTreeClassifier(max_depth=5) elif self.modelname in ['kNN','k-NN','knn']: self.model = KNeighborsClassifier(n_neighbors=3) elif self.modelname in ['linear_svm', 'LSVM']: self.model = SVC(kernel="linear", C=0.025) elif self.modelname in ['rbf_svm', 'RBF_SVM']: self.model = SVC(gamma=2, C=1) elif self.modelname in ['Gaussian', 'gaussian']: self.model = GaussianProcessClassifier(1.0 * RBF(1.0)) elif self.modelname in ['RF', 'Random_forest', 'Random_Forest', 'random_forest']: self.model = RandomForestClassifier(max_depth=5, n_estimators=10, max_features=1) elif self.modelname in ['MLP', 'Neural_net']: self.model = MLPClassifier(alpha=1, max_iter=1000) elif self.modelname in ['ADA', 'Ada', 'Adaboost', 'Ada_boost']: self.model = AdaBoostClassifier() elif self.modelname in ['NB', 'naive_bayes','Naive_Bayes','Naive_bayes']: self.model = GaussianNB() elif self.modelname in ['QDA','qda']: self.model = QuadraticDiscriminantAnalysis() else: raise Exception("Classifier model un-recognized, current supported models: logistic_regression, decision_tree, kNN, linear_svm, RBF_SVM, Gaussian, Random_Forest, MLP, ADA, naive_bayes, QDA")
def f(x): """The function to predict.""" return x * np.sin(x) train_X = np.atleast_2d(np.linspace(0, 10, 100)).T ###设定训练集大小 train_Y = f(train_X).ravel() plt_X = np.atleast_2d(np.linspace(0, 10, 1000)).T ###手动实现GP1 gp = GP() gp.fit(train_X,train_Y) plt_Y, sigma1 = gp.predict(plt_X)##此处输出sigma为协方差矩阵 sigma1 = sigma1.diagonal()##获取对角线元素 ###sklearn GP2 kernel = C(1.0, (1e-3, 1e3)) * RBF(10, (1e-2, 1e2)) gp = GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=9) gp.fit(train_X, train_Y) y_pred, sigma2 = gp.predict(plt_X, return_std=True) ####绘图 fig = plt.figure(figsize=(16, 10)) gs=gridspec.GridSpec(2,2)#分为2行2列 GP1 = plt.subplot(gs[:,0]) GP2 = plt.subplot(gs[:,1]) GP1.plot(plt_X, f(plt_X), 'r:', label=r'$f(x) = x\,\sin(x)$') GP1.plot(train_X, train_Y, 'r.', markersize=10, label='Observations') GP1.plot(plt_X, plt_Y, 'b-', label='Prediction') GP1.fill(np.concatenate([plt_X, plt_X[::-1]]),
x = training_data.iloc[:, number_of_y_variables:] x_for_prediction.columns = x.columns autoscaled_x = (x - x.mean()) / x.std() autoscaled_x_for_prediction = (x_for_prediction - x.mean()) / x.std() autoscaled_y = (y - y.mean()) / y.std() mean_of_y = y.mean() std_of_y = y.std() # Gaussian process regression estimated_y_for_prediction = np.zeros( [x_for_prediction.shape[0], number_of_y_variables]) std_of_estimated_y_for_prediction = np.zeros( [x_for_prediction.shape[0], number_of_y_variables]) plt.rcParams['font.size'] = 18 for y_number in range(number_of_y_variables): model = GaussianProcessRegressor(ConstantKernel() * RBF() + WhiteKernel()) model.fit(autoscaled_x, autoscaled_y.iloc[:, y_number]) estimated_y_for_prediction_tmp, std_of_estimated_y_for_prediction_tmp = model.predict( autoscaled_x_for_prediction, return_std=True) estimated_y_for_prediction[:, y_number] = estimated_y_for_prediction_tmp std_of_estimated_y_for_prediction[:, y_number] = std_of_estimated_y_for_prediction_tmp estimated_y = model.predict(autoscaled_x) estimated_y = estimated_y * std_of_y.iloc[y_number] + mean_of_y.iloc[ y_number] plt.figure(figsize=figure.figaspect(1)) plt.scatter(y.iloc[:, y_number], estimated_y) y_max = max(y.iloc[:, y_number].max(), estimated_y.max()) y_min = min(y.iloc[:, y_number].min(), estimated_y.min()) plt.plot([y_min - 0.05 * (y_max - y_min), y_max + 0.05 * (y_max - y_min)],
def HGPfunc(x,y,plot, h1low, h1high, h2low, h2high): y = y.reshape(-1,1) x = x.reshape(-1,1) if plot: plt.plot(x,y,'+') plt.xlabel("Pch (dBm)") plt.ylabel("SNR (dB)") plt.savefig('Adataset.png', dpi=200) plt.show() n = np.size(x) scaler = StandardScaler().fit(y) y = scaler.transform(y) def sqexp(X,Y,k1,k2): X = np.atleast_2d(X) if Y is None: dists = pdist(X / k2, metric='sqeuclidean') K = np.exp(-.5 * dists) # convert from upper-triangular matrix to square matrix K = squareform(K) np.fill_diagonal(K, 1) # return gradient K_gradient = (K * squareform(dists))[:, :, np.newaxis] #K_gradient = (X[:, np.newaxis, :] - X[np.newaxis, :, :]) ** 2 \ # anisotropic case, see https://github.com/scikit-learn/scikit-learn/blob/95d4f0841d57e8b5f6b2a570312e9d832e69debc/sklearn/gaussian_process/kernels.py # / (k2 ** 2) #K_gradient *= K[..., np.newaxis] return k1*K, K_gradient else: dists = cdist(X / k2, Y / k2,metric='sqeuclidean') K = np.exp(-.5 * dists) return k1*K # heteroscedastic versions of functions global Kyinvh Kyinvh = 0.0 global Kfh Kfh = 0.0 def lmlh(params,y,R): #print(params) # show progress of fit [k1, k2] = params global Kfh Kfh = sqexp(x,None,k1,k2**0.5)[0] Ky = Kfh + R # calculate initial kernel with noise global Kyinvh Kyinvh = inv(Ky) return -(-0.5*mul(mul(T(y),Kyinvh), y) - 0.5*np.log((det(Ky))) - 0.5*n*np.log(2*np.pi)) # marginal likelihood - (5.8) def lmlgh(params,y,R): k1, k2 = params al = mul(Kyinvh,y) dKdk1 = Kfh*(1/k1) dKdk2 = sqexp(x,None,k1,k2**0.5)[1].reshape(n,n) lmlg1 = -(0.5*np.trace(mul(mul(al,T(al)) - Kyinvh, dKdk1))) lmlg2 = -(0.5*np.trace(mul(mul(al,T(al)) - Kyinvh, dKdk2))) return np.ndarray((2,), buffer=np.array([lmlg1,lmlg2]), dtype = float) def GPRfith(xs,k1,k2,R,Rs): Ky = sqexp(x,None,k1,k2**0.5)[0] + R Ks = sqexp(xs, x, k1, k2**0.5) Kss = sqexp(xs, None, k1, k2)[0] L = cholesky(Ky) al = solve(T(L), solve(L,y)) fmst = mul(Ks,al) varfmst = np.empty([n,1]) for i in range(np.size(xs)): v = solve(L,T(Ks[:,i])) varfmst[i] = Kss[i,i] - mul(T(v),v) + Rs[i,i] lmlopt = -0.5*mul(T(y),al) - np.trace(np.log(L)) - 0.5*n*np.log(2*np.pi) #return fmst, varfmst[::-1], lmlopt return fmst, varfmst, lmlopt def hypopth(y, numrestarts, R): numh = 2 # number of hyperparameters in kernel function k1s4 = np.empty([numrestarts,1]) k2s4 = np.empty([numrestarts,1]) for i in range(numrestarts): #k1is4 = np.random.uniform(1e-2,1e3) #k2is4 = np.random.uniform(1e-1,1e3) k1is4 = np.random.uniform(h1low,h1high) k2is4 = np.random.uniform(h2low,h2high) kis4 = np.ndarray((numh,), buffer=np.array([k1is4,k2is4]), dtype = float) s4res = minimize(lmlh,kis4,args=(y,R),method = 'L-BFGS-B',jac=lmlgh,bounds = ((h1low,h1high),(h2low,h2high)),options={'maxiter':1e2}) step4res = [] if s4res.success: step4res.append(s4res.x) print("successful k1:" + str(k1is4)) print("successful k2: " + str(k2is4)) else: print("error " + str(k1is4)) print("error " + str(k2is4)) #raise ValueError(s4res.message) #k1is4 = np.random.uniform(1e-2,1e3) #k2is4 = np.random.uniform(2e-1,1e3) k1is4 = np.random.uniform(h1low,h1high) k2is4 = np.random.uniform(h2low,h2high) print("error in hypopth() - reinitialising hyperparameters") continue k1s4[i] = step4res[0][0] k2s4[i] = step4res[0][1] lmltest = [lmlh([k1s4[i],k2s4[i]],y,R) for i in range(numrestarts)] k1f = k1s4[np.argmin(lmltest)] k2f = k2s4[np.argmin(lmltest)] #lml(params,y,sig) return k1f, k2f def hetloopSK(fmst,varfmst,numiters,numrestarts): s = 200 #k1is3, k2is3, k1is4,k2is4 = np.random.uniform(1e-2,1e2,4) MSE = np.empty([numiters,1]) NLPD = np.empty([numiters,1]) fmstf = np.empty([numiters,n]) varfmstf = np.empty([numiters,n]) lmloptf = np.empty([numiters,1]) rf = np.empty([numiters,n]) i = 0 while i < numiters: breakwhile = False # Step 2: estimate empirical noise levels z #k1is4,k2is4 = np.random.uniform(1e-2,1e2,2) #k1is3, k1is4 = np.random.uniform(1e-2,1e2,2) #k2is3, k2is4 = np.random.uniform(1e-1,1e2,2) k1is3 = np.random.uniform(h1low,h1high,1) k2is3 = np.random.uniform(h2low,h2high,1) z = np.empty([n,1]) for j in range(n): #np.random.seed() normdraw = normal(fmst[j], varfmst[j]**0.5, s).reshape(s,1) z[j] = np.log((1/s)*0.5*sum((y[j] - normdraw)**2)) if math.isnan(z[j]): # True for NaN values breakwhile = True break if breakwhile: print("Nan value in z -- skipping iter "+ str(i)) i = i + 1 continue # Step 3: estimate GP2 on D' - (x,z) kernel2 = C(k1is3, (h1low,h1high)) * RBF(k2is3, (h2low,h2high)) gpr2 = GaussianProcessRegressor(kernel=kernel2, n_restarts_optimizer = numrestarts, normalize_y=False, alpha=np.var(z)) gpr2.fit(x, z) ystar2, sigma2 = gpr2.predict(x, return_std=True ) sigma2 = (sigma2**2 + 1)**0.5 # Step 4: train heteroscedastic GP3 using predictive mean of G2 to predict log noise levels r r = exp(ystar2) R = r*np.identity(n) k1s4, k2s4 = hypopth(y,numrestarts,R) fmst4, varfmst4, lmlopt4 = GPRfith(x,k1s4,k2s4,R,R) # test for convergence MSE[i] = (1/n)*sum(((y-fmst4)**2)/np.var(y)) #NLPD[i] = sum([(1/n)*(-np.log(norm.pdf(x[j], fmst4[j], varfmst4[j]**0.5))) for j in range(n) ]) nlpdarg = np.zeros([n,1]) #nlpdtest = np.zeros([n,1]) for k in range(n): nlpdarg[k] = -np.log10(norm.pdf(x[k], fmst4[k], varfmst4[k]**0.5)) #nlpdtest[k] = norm.pdf(x[k], fmst4[k], varfmst4[k]**0.5) #print("mean NLPD log arg " + str(nlpdtest) ) #test3[k] = -np.log(norm.pdf(x[k], fmst[k], varfmst[k]**0.5)) NLPD[i] = sum(nlpdarg)*(1/n) print("MSE = " + str(MSE[i])) print("NLPD = " + str(NLPD[i])) print("finished iteration " + str(i+1)) fmstf[i,:] = fmst4.reshape(n) varfmstf[i,:] = varfmst4.reshape(n) lmloptf[i] = lmlopt4 fmst = fmst4 varfmst = varfmst4 rf[i,:] = r.reshape(n) #k1is3 = k1s4 #k2is3 = k2s4 i = i + 1 return fmstf,varfmstf, lmloptf, MSE, rf, NLPD # , NLPD numiters = 10 numrestarts = 20 #kernel1 = C(1.0, (1e-3, 1e3)) * RBF(10, (1e-3, 1e3)) + W(1.0, (1e-5, 1e5)) #gpr1 = GaussianProcessRegressor(kernel=kernel1, n_restarts_optimizer = 0, normalize_y=True) kernel1 = C(1.0, (h1low,h1high)) * RBF(1.0, (h2low,h2high)) gpr1 = GaussianProcessRegressor(kernel=kernel1, n_restarts_optimizer = numrestarts, normalize_y=False, alpha=np.var(y)) gpr1.fit(x, y) ystar1, sigma1 = gpr1.predict(x, return_std=True ) var1 = (sigma1**2 + np.var(y)) #sigma1 = np.reshape(sigma1,(np.size(sigma1), 1)) start_time = time.time() fmstf,varfmstf, lmlopt, mse, _,NLPD = hetloopSK(ystar1,var1,numiters,numrestarts) duration = time.time() - start_time ind = numiters - 1 #ind = fmst4 = fmstf[ind] varfmst4 = varfmstf[ind] sigs4 = varfmst4**0.5 fmstps4 = fmst4 + sigs4 fmst4i = scaler.inverse_transform(fmst4) fmstps4i = scaler.inverse_transform(fmstps4) # ================================ Mutual information transform =========================================== # ============================================================================= # MIcalc = False # select whether to calculate MI using Guassian-Hermite quadrature # # import constellation shapes from MATLAB-generated csv files # if MIcalc: # Qam4r = np.genfromtxt(open("qam4r.csv", "r"), delimiter=",", dtype =float) # Qam4i = np.genfromtxt(open("qam4i.csv", "r"), delimiter=",", dtype =float) # Qam16r = np.genfromtxt(open("qam16r.csv", "r"), delimiter=",", dtype =float) # Qam16i = np.genfromtxt(open("qam16i.csv", "r"), delimiter=",", dtype =float) # Qam32r = np.genfromtxt(open("qam32r.csv", "r"), delimiter=",", dtype =float) # Qam32i = np.genfromtxt(open("qam32i.csv", "r"), delimiter=",", dtype =float) # Qam64r = np.genfromtxt(open("qam64r.csv", "r"), delimiter=",", dtype =float) # Qam64i = np.genfromtxt(open("qam64i.csv", "r"), delimiter=",", dtype =float) # Qam128r = np.genfromtxt(open("qam128r.csv", "r"), delimiter=",", dtype =float) # Qam128i = np.genfromtxt(open("qam128i.csv", "r"), delimiter=",", dtype =float) # # Qam4 = Qam4r + 1j*Qam4i # Qam16 = Qam16r + 1j*Qam16i # Qam32 = Qam32r + 1j*Qam32i # Qam64 = Qam64r + 1j*Qam64i # Qam128 = Qam128r + 1j*Qam128i # # ================================ Estimate MI ================================ # # set modulation format order and number of terms used in Gauss-Hermite quadrature # ============================================================================= # M = 16 # L = 6 # # def MIGHquad(SNR): # if M == 4: # Ps = np.mean(np.abs(Qam4**2)) # X = Qam4 # elif M == 16: # Ps = np.mean(np.abs(Qam16**2)) # X = Qam16 # elif M == 32: # Ps = np.mean(np.abs(Qam32**2)) # X = Qam32 # elif M == 64: # Ps = np.mean(np.abs(Qam64**2)) # X = Qam64 # elif M == 128: # Ps = np.mean(np.abs(Qam128**2)) # X = Qam128 # else: # print("unrecogised M") # sigeff2 = Ps/(10**(SNR/10)) # Wgh = GHquad(L)[0] # Rgh = GHquad(L)[1] # sum_out = 0 # for ii in range(M): # sum_in = 0 # for l1 in range(L): # sum_inn = 0 # for l2 in range(L): # sum_exp = 0 # for jj in range(M): # arg_exp = np.linalg.norm(X[ii]-X[jj])**2 + 2*(sigeff2**0.5)*np.real( (Rgh[l1]+1j*Rgh[l2])*(X[ii]-X[jj])); # sum_exp = np.exp(-arg_exp/sigeff2) + sum_exp # sum_inn = Wgh[l2]*np.log2(sum_exp) + sum_inn # sum_in = Wgh[l1]*sum_inn + sum_in # sum_out = sum_in + sum_out # return np.log2(M)- (1/(M*np.pi))*sum_out # # def findMI(SNR): # with multiprocessing.Pool() as pool: # Ixy = pool.map(MIGHquad, SNR) # return Ixy # ============================================================================= print("HGP fitting duration: " + str(duration)) return fmst4i, fmstps4i, lmlopt, mse, NLPD
a = np.array( args.scalefactors ) #z_edge[:-1] + np.diff(z_edge)/2 #NH z is now the redshift in the middle of each bin ini = [inia] a = np.concatenate([ini, a]) #print a wde = np.concatenate([wn, wde]) #print wde nb = len(wde) #defining the baseline -1 base = lambda x: -1 + x - x # Generation of the Gaussian Process gp = GaussianProcessRegressor(kernel=RBF(l, (l, l))) #Fit --> Training g = gp.fit(a[:, np.newaxis], wde - base(a)) #Plotting points (if log use np.logspace) a_sampling = np.linspace(inia, enda, ODEsteps) print a_sampling #transforming a_sampling in z_sampling z_sampling = np.zeros(ODEsteps) for i in range(ODEsteps): z_sampling[i] = -1 + 1 / a_sampling[i] print z_sampling #Predict points w_pred, sigma = gp.predict(a_sampling[:, np.newaxis], return_std=True)
def hetloopSK(fmst,varfmst,numiters,numrestarts): s = 200 #k1is3, k2is3, k1is4,k2is4 = np.random.uniform(1e-2,1e2,4) MSE = np.empty([numiters,1]) NLPD = np.empty([numiters,1]) fmstf = np.empty([numiters,n]) varfmstf = np.empty([numiters,n]) lmloptf = np.empty([numiters,1]) rf = np.empty([numiters,n]) i = 0 while i < numiters: breakwhile = False # Step 2: estimate empirical noise levels z #k1is4,k2is4 = np.random.uniform(1e-2,1e2,2) #k1is3, k1is4 = np.random.uniform(1e-2,1e2,2) #k2is3, k2is4 = np.random.uniform(1e-1,1e2,2) k1is3 = np.random.uniform(h1low,h1high,1) k2is3 = np.random.uniform(h2low,h2high,1) z = np.empty([n,1]) for j in range(n): #np.random.seed() normdraw = normal(fmst[j], varfmst[j]**0.5, s).reshape(s,1) z[j] = np.log((1/s)*0.5*sum((y[j] - normdraw)**2)) if math.isnan(z[j]): # True for NaN values breakwhile = True break if breakwhile: print("Nan value in z -- skipping iter "+ str(i)) i = i + 1 continue # Step 3: estimate GP2 on D' - (x,z) kernel2 = C(k1is3, (h1low,h1high)) * RBF(k2is3, (h2low,h2high)) gpr2 = GaussianProcessRegressor(kernel=kernel2, n_restarts_optimizer = numrestarts, normalize_y=False, alpha=np.var(z)) gpr2.fit(x, z) ystar2, sigma2 = gpr2.predict(x, return_std=True ) sigma2 = (sigma2**2 + 1)**0.5 # Step 4: train heteroscedastic GP3 using predictive mean of G2 to predict log noise levels r r = exp(ystar2) R = r*np.identity(n) k1s4, k2s4 = hypopth(y,numrestarts,R) fmst4, varfmst4, lmlopt4 = GPRfith(x,k1s4,k2s4,R,R) # test for convergence MSE[i] = (1/n)*sum(((y-fmst4)**2)/np.var(y)) #NLPD[i] = sum([(1/n)*(-np.log(norm.pdf(x[j], fmst4[j], varfmst4[j]**0.5))) for j in range(n) ]) nlpdarg = np.zeros([n,1]) #nlpdtest = np.zeros([n,1]) for k in range(n): nlpdarg[k] = -np.log10(norm.pdf(x[k], fmst4[k], varfmst4[k]**0.5)) #nlpdtest[k] = norm.pdf(x[k], fmst4[k], varfmst4[k]**0.5) #print("mean NLPD log arg " + str(nlpdtest) ) #test3[k] = -np.log(norm.pdf(x[k], fmst[k], varfmst[k]**0.5)) NLPD[i] = sum(nlpdarg)*(1/n) print("MSE = " + str(MSE[i])) print("NLPD = " + str(NLPD[i])) print("finished iteration " + str(i+1)) fmstf[i,:] = fmst4.reshape(n) varfmstf[i,:] = varfmst4.reshape(n) lmloptf[i] = lmlopt4 fmst = fmst4 varfmst = varfmst4 rf[i,:] = r.reshape(n) #k1is3 = k1s4 #k2is3 = k2s4 i = i + 1 return fmstf,varfmstf, lmloptf, MSE, rf, NLPD # , NLPD
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier from sklearn.naive_bayes import GaussianNB from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis names = [ "Logistic Regression", "Nearest Neighbors", "Linear SVM", "RBF SVM", "Gaussian Process", "Decision Tree", "Random Forest", "Neural Net", "AdaBoost", "Naive Bayes" ] classifiers = [ LogisticRegression(), KNeighborsClassifier(3), SVC(kernel="linear", C=0.025), SVC(gamma=2, C=1), GaussianProcessClassifier(1.0 * RBF(1.0)), DecisionTreeClassifier(max_depth=5), RandomForestClassifier(max_depth=5, n_estimators=10, max_features=1), MLPClassifier(alpha=1), AdaBoostClassifier(), GaussianNB(), ] #for name, clf in zip(names, classifiers): # clf.fit(X_train, y) # accuracy = round(clf.score(X_train, y) * 100, 2) # print(name, accuracy) # In[ ]: clf = RandomForestClassifier(max_depth=15, n_estimators=15, max_features=5)
from sklearn.gaussian_process.kernels import ConstantKernel, RBF tt = pd.read_csv('immSurvey.csv') tt.head() alphas = tt.stanMeansNewSysPooled sample = tt.textToSend from sklearn.feature_extraction.text import CountVectorizer vec = CountVectorizer(ngram_range=(2, 2)) X = vec.fit_transform(sample) X pd.DataFrame(X.toarray(), columns=vec.get_feature_names()) from sklearn.model_selection import train_test_split Xtrain, Xtest, ytrain, ytest = train_test_split(X, alphas, random_state=1) rbf = ConstantKernel(1.0) * RBF(length_scale=1.0) gpr = GaussianProcessRegressor(kernel=rbf, alpha=1e-8) gpr.fit(Xtrain.toarray(), ytrain) # Compute posterior predictive mean and covariance mu_s, cov_s = gpr.predict(Xtest.toarray(), return_cov=True) #test correlation between test and mus np.corrcoef(ytest, mu_s)
x_pr_grid, B_postsamples, T_fwdsamples, seed=200) jnp.save('plot_files/ccopula_lidar_logpdf_pr{}'.format(x_pr_val), logpdf_pr) jnp.save('plot_files/ccopula_lidar_logcdf_pr{}'.format(x_pr_val), logcdf_pr) #Convergence plot seed = 200 T_fwdsamples = 10000 logcdf_pr_conv, logpdf_pr_conv, pdiff, cdiff = check_convergence_pr_cregression( copula_cregression_obj, x, y_pr_grid, x_pr_grid, 1, T_fwdsamples, seed) jnp.save('plot_files/ccopula_lidar_pr_pdiff_pr{}'.format(x_pr_val), pdiff) #Gaussian Process print('Method: GP') from sklearn.gaussian_process.kernels import RBF, ConstantKernel, WhiteKernel from sklearn.gaussian_process import GaussianProcessRegressor kernel = ConstantKernel() * RBF() + WhiteKernel() gp = GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=10, normalize_y=True) gp.fit(x, y) mean_gp, std_gp = gp.predict(x_plot.reshape(-1, 1), return_std=True) jnp.save('plot_files/gp_lidar_mean', mean_gp) jnp.save('plot_files/gp_lidar_std', std_gp)
import numpy as np from matplotlib import pyplot as plt from sklearn.metrics.classification import accuracy_score, log_loss from sklearn.gaussian_process import GaussianProcessClassifier from sklearn.gaussian_process.kernels import RBF # Generate data train_size = 50 rng = np.random.RandomState(0) X = rng.uniform(0, 5, 100)[:, np.newaxis] y = np.array(X[:, 0] > 2.5, dtype=int) # Specify Gaussian Processes with fixed and optimized hyperparameters gp_fix = GaussianProcessClassifier(kernel=1.0 * RBF(length_scale=1.0), optimizer=None) gp_fix.fit(X[:train_size], y[:train_size]) gp_opt = GaussianProcessClassifier(kernel=1.0 * RBF(length_scale=1.0)) gp_opt.fit(X[:train_size], y[:train_size]) print("Log Marginal Likelihood (initial): %.3f" % gp_fix.log_marginal_likelihood(gp_fix.kernel_.theta)) print("Log Marginal Likelihood (optimized): %.3f" % gp_opt.log_marginal_likelihood(gp_opt.kernel_.theta)) print("Accuracy: %.3f (initial) %.3f (optimized)" % (accuracy_score(y[:train_size], gp_fix.predict(X[:train_size])), accuracy_score(y[:train_size], gp_opt.predict(X[:train_size])))) print("Log-loss: %.3f (initial) %.3f (optimized)" %