def test_y_normalization():
    """ Test normalization of the target values in GP

    Fitting non-normalizing GP on normalized y and fitting normalizing GP
    on unnormalized y should yield identical results
    """
    y_mean = y.mean(0)
    y_norm = y - y_mean
    for kernel in kernels:
        # Fit non-normalizing GP on normalized y
        gpr = GaussianProcessRegressor(kernel=kernel)
        gpr.fit(X, y_norm)
        # Fit normalizing GP on unnormalized y
        gpr_norm = GaussianProcessRegressor(kernel=kernel, normalize_y=True)
        gpr_norm.fit(X, y)

        # Compare predicted mean, std-devs and covariances
        y_pred, y_pred_std = gpr.predict(X2, return_std=True)
        y_pred = y_mean + y_pred
        y_pred_norm, y_pred_std_norm = gpr_norm.predict(X2, return_std=True)

        assert_almost_equal(y_pred, y_pred_norm)
        assert_almost_equal(y_pred_std, y_pred_std_norm)

        _, y_cov = gpr.predict(X2, return_cov=True)
        _, y_cov_norm = gpr_norm.predict(X2, return_cov=True)
        assert_almost_equal(y_cov, y_cov_norm)
def test_no_fit_default_predict():
    # Test that GPR predictions without fit does not break by default.
    default_kernel = (C(1.0, constant_value_bounds="fixed") *
                      RBF(1.0, length_scale_bounds="fixed"))
    gpr1 = GaussianProcessRegressor()
    _, y_std1 = gpr1.predict(X, return_std=True)
    _, y_cov1 = gpr1.predict(X, return_cov=True)

    gpr2 = GaussianProcessRegressor(kernel=default_kernel)
    _, y_std2 = gpr2.predict(X, return_std=True)
    _, y_cov2 = gpr2.predict(X, return_cov=True)

    assert_array_almost_equal(y_std1, y_std2)
    assert_array_almost_equal(y_cov1, y_cov2)
def plot_gp(x_min, x_max, x, y, train_features, train_labels):
    
    fig = plt.figure(figsize=(16, 10))
    fig.suptitle('Gaussian Process and Utility Function After {} Steps'.format(len(train_features)), fontdict={'size':30})
    
    gs = gridspec.GridSpec(2, 1, height_ratios=[3, 1]) 
    axis = plt.subplot(gs[0])
    acq = plt.subplot(gs[1])
    
    gp = GaussianProcessRegressor(
    kernel=Matern(nu=2.5),
    n_restarts_optimizer=25, )
    
    gp.fit(train_features, train_labels)
    mu, sigma = gp.predict(x, return_std=True)
    
    axis.plot(x, y, linewidth=3, label='Target')
    axis.plot(train_features.flatten(), train_labels, 'D', markersize=8, label=u'Observations', color='r')
    axis.plot(x, mu, '--', color='k', label='Prediction')

    axis.fill(np.concatenate([x, x[::-1]]), 
              np.concatenate([mu - 1.9600 * sigma, (mu + 1.9600 * sigma)[::-1]]),
        alpha=.6, fc='c', ec='None', label='95% confidence interval')
    
    axis.set_xlim((x_min, x_max))
    axis.set_ylim((None, None))
    axis.set_ylabel('f(x)', fontdict={'size':20})
    axis.set_xlabel('x', fontdict={'size':20})
    
    
    bounds = np.asarray([[x_min, x_max]])
    
    acquisition_fucntion_kappa = 5
    
    mean, std = gp.predict(x, return_std=True)
    acquisition_fucntion_values = mean + acquisition_fucntion_kappa * std    
    
    acq.plot(x, acquisition_fucntion_values, label='Utility Function', color='purple')
    
    acq.plot(x[np.argmax(acquisition_fucntion_values)], np.max(acquisition_fucntion_values), '*', markersize=15, 
             label=u'Next Best Guess', markerfacecolor='gold', markeredgecolor='k', markeredgewidth=1)
    acq.set_xlim((x_min, x_max))
    acq.set_ylim((0, np.max(acquisition_fucntion_values) + 0.5))
    acq.set_ylabel('Utility', fontdict={'size':20})
    acq.set_xlabel('x', fontdict={'size':20})
    
    axis.legend(loc=2, bbox_to_anchor=(1.01, 1), borderaxespad=0.)
    acq.legend(loc=2, bbox_to_anchor=(1.01, 1), borderaxespad=0.)
def test_gpr_interpolation(kernel):
    # Test the interpolating property for different kernels.
    gpr = GaussianProcessRegressor(kernel=kernel).fit(X, y)
    y_pred, y_cov = gpr.predict(X, return_cov=True)

    assert_almost_equal(y_pred, y)
    assert_almost_equal(np.diag(y_cov), 0.)
def test_predict_cov_vs_std():
    """ Test that predicted std.-dev. is consistent with cov's diagonal."""
    for kernel in kernels:
        gpr = GaussianProcessRegressor(kernel=kernel).fit(X, y)
        y_mean, y_cov = gpr.predict(X2, return_cov=True)
        y_mean, y_std = gpr.predict(X2, return_std=True)
        assert_almost_equal(np.sqrt(np.diag(y_cov)), y_std)
def test_gpr_interpolation():
    """Test the interpolating property for different kernels."""
    for kernel in kernels:
        gpr = GaussianProcessRegressor(kernel=kernel).fit(X, y)
        y_pred, y_cov = gpr.predict(X, return_cov=True)

        assert_true(np.allclose(y_pred, y))
        assert_true(np.allclose(np.diag(y_cov), 0.))
Example #7
0
def test_duplicate_input():
    """ Test GPR can handle two different output-values for the same input. """
    for kernel in kernels:
        gpr_equal_inputs = GaussianProcessRegressor(kernel=kernel, alpha=1e-2)
        gpr_similar_inputs = GaussianProcessRegressor(kernel=kernel, alpha=1e-2)

        X_ = np.vstack((X, X[0]))
        y_ = np.hstack((y, y[0] + 1))
        gpr_equal_inputs.fit(X_, y_)

        X_ = np.vstack((X, X[0] + 1e-15))
        y_ = np.hstack((y, y[0] + 1))
        gpr_similar_inputs.fit(X_, y_)

        X_test = np.linspace(0, 10, 100)[:, None]
        y_pred_equal, y_std_equal = gpr_equal_inputs.predict(X_test, return_std=True)
        y_pred_similar, y_std_similar = gpr_similar_inputs.predict(X_test, return_std=True)

        assert_almost_equal(y_pred_equal, y_pred_similar)
        assert_almost_equal(y_std_equal, y_std_similar)
def test_y_multioutput():
    """ Test that GPR can deal with multi-dimensional target values"""
    y_2d = np.vstack((y, y*2)).T

    # Test for fixed kernel that first dimension of 2d GP equals the output
    # of 1d GP and that second dimension is twice as large
    kernel = RBF(length_scale=1.0)

    gpr = GaussianProcessRegressor(kernel=kernel, optimizer=None,
                                   normalize_y=False)
    gpr.fit(X, y)

    gpr_2d = GaussianProcessRegressor(kernel=kernel, optimizer=None,
                                      normalize_y=False)
    gpr_2d.fit(X, y_2d)

    y_pred_1d, y_std_1d = gpr.predict(X2, return_std=True)
    y_pred_2d, y_std_2d = gpr_2d.predict(X2, return_std=True)
    _, y_cov_1d = gpr.predict(X2, return_cov=True)
    _, y_cov_2d = gpr_2d.predict(X2, return_cov=True)

    assert_almost_equal(y_pred_1d, y_pred_2d[:, 0])
    assert_almost_equal(y_pred_1d, y_pred_2d[:, 1] / 2)

    # Standard deviation and covariance do not depend on output
    assert_almost_equal(y_std_1d, y_std_2d)
    assert_almost_equal(y_cov_1d, y_cov_2d)

    y_sample_1d = gpr.sample_y(X2, n_samples=10)
    y_sample_2d = gpr_2d.sample_y(X2, n_samples=10)
    assert_almost_equal(y_sample_1d, y_sample_2d[:, 0])

    # Test hyperparameter optimization
    for kernel in kernels:
        gpr = GaussianProcessRegressor(kernel=kernel, normalize_y=True)
        gpr.fit(X, y)

        gpr_2d = GaussianProcessRegressor(kernel=kernel, normalize_y=True)
        gpr_2d.fit(X, np.vstack((y, y)).T)

        assert_almost_equal(gpr.kernel_.theta, gpr_2d.kernel_.theta, 4)
def test_prior(kernel):
    # Test that GP prior has mean 0 and identical variances.
    gpr = GaussianProcessRegressor(kernel=kernel)

    y_mean, y_cov = gpr.predict(X, return_cov=True)

    assert_almost_equal(y_mean, 0, 5)
    if len(gpr.kernel.theta) > 1:
        # XXX: quite hacky, works only for current kernels
        assert_almost_equal(np.diag(y_cov), np.exp(kernel.theta[0]), 5)
    else:
        assert_almost_equal(np.diag(y_cov), 1, 5)
Example #10
0
def test_sample_statistics():
    """ Test that statistics of samples drawn from GP are correct."""
    for kernel in kernels:
        gpr = GaussianProcessRegressor(kernel=kernel).fit(X, y)

        y_mean, y_cov = gpr.predict(X2, return_cov=True)

        samples = gpr.sample_y(X2, 300000)

        # More digits accuracy would require many more samples
        assert_almost_equal(y_mean, np.mean(samples, 1), 2)
        assert_almost_equal(np.diag(y_cov) / np.diag(y_cov).max(), np.var(samples, 1) / np.diag(y_cov).max(), 1)
Example #11
0
def fit_GP(x_train):

    y_train = gaussian(x_train, mu, sig).ravel()

    # Instanciate a Gaussian Process model
    kernel = C(1.0, (1e-3, 1e3)) * RBF(1, (1e-2, 1e2))
    gp = GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=9)

    # Fit to data using Maximum Likelihood Estimation of the parameters
    gp.fit(x_train, y_train)

    # Make the prediction on the meshed x-axis (ask for MSE as well)
    y_pred, sigma = gp.predict(x, return_std=True)
    return y_train, y_pred, sigma
Example #12
0
    def test_GP_brownian_motion(self):
        from sklearn.gaussian_process import GaussianProcessRegressor
        from sklearn.gaussian_process.kernels import RBF, ConstantKernel as C

        # add data
        t = np.linspace(0, 10, 100)
        #
        # Instanciate a Gaussian Process model
        # kernel = C(1.0, (1e-3, 1e3)) * RBF(10, (1e-2, 1e2))
        # Instanciate a Gaussian Process model
        kernel = lambda x, y: 1. * min(x, y)
        # kernel = C(1.0, (1e-3, 1e3)) * RBF(10, (1e-2, 1e2))
        gp = GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=9)
        # gp = GaussianProcessRegressor()

        # Fit to data using Maximum Likelihood Estimation of the parameters
        X = np.atleast_2d(t).T
        gp.fit(X, y)

        # gp = GaussianProcessRegressor()

        # Fit to data using Maximum Likelihood Estimation of the parameters
        # gp.fit(t, y)

        # Make the prediction on the meshed x-axis (ask for MSE as well)
        # y_star, err_y_star = gp.predict(t, return_std=True)
        # Make the prediction on the meshed x-axis (ask for MSE as well)
        y_pred, sigma = gp.predict(t, return_std=True)

        fig = plt.figure()
        ax = fig.add_axes((0.1, 0.3, 0.8, 0.65))
        ax.invert_yaxis()

        ax.plot(t, y, color='blue', label='L bol', lw=2.5)
        ax.errorbar(t, y, yerr=yerr, fmt='o', color='blue', label='%s obs.')

        #
        # ax.plot(t, y_star, color='red', ls='--', lw=1.5, label='GP')
        ax.plot(t, y_pred, '-', color='gray')
        # ax.fill_between(t, y_star - 2 * err_y_star, y_star + 2 * err_y_star, color='gray', alpha=0.3)
        ax.fill(np.concatenate([t, t[::-1]]),
                np.concatenate([y_pred - 1.9600 * sigma,
                                (y_pred + 1.9600 * sigma)[::-1]]),
                alpha=.5, fc='b', ec='None', label='95% confidence interval')

        plt.show()
Example #13
0
def plot_gaussian(data, col):
    '''
        Plots the gaussian process regression with a characteristic length scale
        of 10 years. Essentially this highlights the 'slow trend' in the data.
        
        Parameters
        ----------
        
        data: dataframe
        pandas dataframe containing 'date', 'linMean' which is the average
        runtime and 'linSD' which is the standard deviation.
        
        col: string
        the color in which the plot the data
        '''
    #extract the results from the dataframe
    Year = np.array(data[u'date'].tolist())
    Mean = np.array(data[u'linMean'].tolist())
    SD = np.array(data[u'linSD'].tolist())
    
    #initialize the gaussian process. Note that the process is calculated with a
    #length scale of 10years to give the 'slow trend' in the results.
    length_scale = 10.
    kernel = 1.* RBF(length_scale)
    gp = GaussianProcessRegressor(kernel=kernel, sigma_squared_n=(SD) ** 2, \
                                  normalize_y=True)
    
    #now fit the data and get the predicted mean and standard deviation
    #Note: for reasons that are unclear, GaussianProcessRegressor won't take 1D
    #arrays so the data are converted to 2D and then converted back for plotting
    gp.fit(np.atleast_2d(Year).T, np.atleast_2d(Mean).T)
    Year_array = np.atleast_2d(np.linspace(min(Year)-2, max(Year)+2, 100)).T
    Mean_prediction, SD_prediction = gp.predict(Year_pred, return_std=True)
    Year_array=Year_array.ravel()
    Mean_prediction=Mean_prediction.ravel()
    
    #plot the predicted best fit
    plt.plot(Year_array, Mean_prediction, col, alpha=1)
    #plot the 95% confidence interval
    plt.fill_between(Year_array, (Mean_prediction - 1.9600 * SD_prediction), \
                     y2=(Mean_prediction + 1.9600 * SD_prediction), alpha=0.5, \
                     color=col)
    plt.draw()
def test_K_inv_reset(kernel):
    y2 = f(X2).ravel()

    # Test that self._K_inv is reset after a new fit
    gpr = GaussianProcessRegressor(kernel=kernel).fit(X, y)
    assert hasattr(gpr, '_K_inv')
    assert gpr._K_inv is None
    gpr.predict(X, return_std=True)
    assert gpr._K_inv is not None
    gpr.fit(X2, y2)
    assert gpr._K_inv is None
    gpr.predict(X2, return_std=True)
    gpr2 = GaussianProcessRegressor(kernel=kernel).fit(X2, y2)
    gpr2.predict(X2, return_std=True)
    # the value of K_inv should be independent of the first fit
    assert_array_equal(gpr._K_inv, gpr2._K_inv)
Example #15
0
class GP(BaseTuner):
    def __init__(self, tunables, gridding=0, r_minimum=2):
        """
        Extra args:
            r_minimum: the minimum number of past results this selector needs in
                order to use gaussian process for prediction. If not enough
                results are present during a fit(), subsequent calls to
                propose() will revert to uniform selection.
        """
        super(GP, self).__init__(tunables, gridding=gridding)
        self.r_minimum = r_minimum

    def fit(self, X, y):
        """ Use X and y to train a Gaussian process. """
        super(GP, self).fit(X, y)

        # skip training the process if there aren't enough samples
        if X.shape[0] < self.r_minimum:
            return

        self.gp = GaussianProcessRegressor(normalize_y=True)
        self.gp.fit(X, y)

    def predict(self, X):
        if self.X.shape[0] < self.r_minimum:
            # we probably don't have enough
            logger.warn('GP: not enough data, falling back to uniform sampler')
            return Uniform(self.tunables).predict(X)

        y, stdev = self.gp.predict(X, return_std=True)
        return np.array(list(zip(y, stdev)))

    def _acquire(self, predictions):
        """
        Predictions from the GP will be in the form (prediction, error).
        The default acquisition function returns the index with the highest
        predicted value, not factoring in error.
        """
        return np.argmax(predictions[:, 0])
def integrated_sigma(alpha, n_samples, n_restarts_optimizer=16, f=f):
    print("integrated_sigma(n_samples={n_samples}, alpha={alpha})".format(
        n_samples=n_samples,
        alpha=alpha,
    ))
    X = np.atleast_2d(
        np.linspace(1, 9, n_samples)
    ).T
    y = f(X).ravel()
    x = np.atleast_2d(np.linspace(0, 10, 16 * 1024)).T

    kernel = kernels.Matern() + (kernels.WhiteKernel(noise_level=alpha) if alpha is not None else 0.0)
    gp = GaussianProcessRegressor(
        kernel=kernel,
        n_restarts_optimizer=n_restarts_optimizer,
    )
    gp.fit(X, y)

    y_pred, sigma = gp.predict(x, return_std=True)

    return simps(
        x=x.ravel(),
        y=sigma,
    )
Example #17
0
def classifier_GPR(data, session):    
    
    y = data_HP['DM']
    X = data_HP['Data']
    length = []
    correct_list_within = []
    correct_list_between = []
    all_y_within_1 = []
    all_y_between_1 = []
    all_Ys = []
   

    for s, sess in enumerate(X):
        
        # Design matrix for the session
        DM = y[s]
     
        firing_rates_all_time = X[s]
        
        if firing_rates_all_time.shape[1]>20:
            # Tasks indicies 
            choices =  DM[:,1]
            task = DM[:,4]
            
            
            task_1 = np.where(task == 1)[0] #& (choices == 1))[0]
            task_2 = np.where(task == 2)[0] #& (choices == 1))[0]
            task_3 = np.where(task == 3)[0] #& (choices == 1))[0]
            
            # Find the maximum length of any of the tasks in a session
            length.append(len(task_1))
            length.append(len(task_2))
            length.append(len(task_3))     
            min_trials_in_task = int(np.min(length)/2)
            
            #firing_rates_all_time = (firing_rates_all_time-firing_rates_all_time_mean)/firing_rates_all_time_std
            
            # Select the first min_trials_in_task in task one
            firing_rates_mean_task_1_1 = firing_rates_all_time[task_1]
            firing_rates_mean_task_1_1 = firing_rates_mean_task_1_1[:min_trials_in_task,:]
            # Select the last min_trials_in_task in task one
            firing_rates_mean_task_1_2 = firing_rates_all_time[task_1]
            firing_rates_mean_task_1_2 = firing_rates_all_time[task_2[0]-1-min_trials_in_task:task_2[0]-1,:]
           
            # Select the first min_trials_in_task in task two
            firing_rates_mean_task_2_1 = firing_rates_all_time[task_2]
            firing_rates_mean_task_2_1 = firing_rates_all_time[task_2[0]:task_2[0]+min_trials_in_task,:]
            firing_rates_mean_task_2_2 = firing_rates_all_time[task_2]
            firing_rates_mean_task_2_2 = firing_rates_all_time[task_3[0]-1-min_trials_in_task:task_3[0]-1,:]
    
            # Select the first min_trials_in_task in task three
            firing_rates_mean_task_3_1 = firing_rates_all_time[task_3]
            firing_rates_mean_task_3_1 = firing_rates_all_time[task_3[0]:task_3[0]+min_trials_in_task,:]
            firing_rates_mean_task_3_2 = firing_rates_all_time[task_3]
            firing_rates_mean_task_3_2 = firing_rates_all_time[task_3[-1]-min_trials_in_task:task_3[-1],:]
    
            
            # Finding the angle between initiation and every ms          
            # C = 2πr;  Circumference of a circle
            
            C = session.aligned_rates.shape[2]
            p = math.pi
            r =  C/(2*p)
            
            angle_sin_list = []
            angle_cos_list = []
            for i in range(C):
                L = 0+ (i+1)
                ang = (180*L)/(p*r)
                ang_sin = np.sin(np.deg2rad(ang))
                ang_cos = np.cos(np.deg2rad(ang))
            
                angle_sin_list.append(ang_sin)
                angle_cos_list.append(ang_cos)
         
            firing_rates_mean_1_1 = np.concatenate(firing_rates_mean_task_1_1, axis = 1)
            firing_rates_mean_1_2 = np.concatenate(firing_rates_mean_task_1_2, axis = 1)
            firing_rates_mean_2_1 = np.concatenate(firing_rates_mean_task_2_1, axis = 1)
            firing_rates_mean_2_2 = np.concatenate(firing_rates_mean_task_2_2, axis = 1)
            firing_rates_mean_3_1 = np.concatenate(firing_rates_mean_task_3_1, axis = 1)
            firing_rates_mean_3_2 = np.concatenate(firing_rates_mean_task_3_2, axis = 1)
    
            l = firing_rates_mean_1_1.shape[1]
            
            # Creating a vector which identifies trial stage in the firing rate vector
            Y_cos = np.tile(angle_cos_list,int(l/len(angle_cos_list)))
            Y_sin = np.tile(angle_sin_list,int(l/len(angle_sin_list)))
            Y = np.vstack((Y_cos,Y_sin))
            
            #kernel = RBF(length_scale = 2)
            kernel = Matern(nu = 3/2)
            model_nb = GPR(kernel = kernel)
            #model_nb = LinearRegression()
            
            model_nb.fit(np.transpose(firing_rates_mean_1_2), np.transpose(Y))
            y_pred_class_between_t_1_2 = model_nb.predict(np.transpose(firing_rates_mean_2_1))
            correct_between_t_1 = model_nb.score(np.transpose(firing_rates_mean_2_1),np.transpose(Y))
            
            model_nb.fit(np.transpose(firing_rates_mean_1_1),np.transpose(Y))     
            y_pred_class_within_t_1_2 = model_nb.predict(np.transpose(firing_rates_mean_1_2))
            correct_within_t_1 = model_nb.score(np.transpose(firing_rates_mean_1_2),np.transpose(Y))

            model_nb.fit(np.transpose(firing_rates_mean_2_2),np.transpose(Y))
            y_pred_class_between_t_2_3 = model_nb.predict(np.transpose(firing_rates_mean_3_1))
            correct_between_t_2 =  model_nb.score(np.transpose(firing_rates_mean_3_1),np.transpose(Y))

            model_nb.fit(np.transpose(firing_rates_mean_2_1),np.transpose(Y))
            y_pred_class_within_t_2_3 = model_nb.predict(np.transpose(firing_rates_mean_2_2))
            correct_within_t_2 =  model_nb.score(np.transpose(firing_rates_mean_2_2),np.transpose(Y))
            
            model_nb.fit(np.transpose(firing_rates_mean_3_1),np.transpose(Y))
            y_pred_class_within_t_3 = model_nb.predict(np.transpose(firing_rates_mean_3_2))
            correct_within_t_3 =  model_nb.score(np.transpose(firing_rates_mean_3_2),np.transpose(Y))

            correct_list_within.append(correct_within_t_1)
            correct_list_within.append(correct_within_t_2)
            correct_list_within.append(correct_within_t_3)
    
            correct_list_between.append(correct_between_t_1)
            correct_list_between.append(correct_between_t_2)
            
            all_y_within_1.append(y_pred_class_within_t_1_2)
            all_y_between_1.append(y_pred_class_between_t_1_2)
            all_Ys.append(Y)
            
    print(correct_list_within)
    print(correct_list_between)

    return correct_list_within, correct_list_between,all_y_within_1,all_y_between_1,all_Ys
Example #18
0
    def fit(self, X, y):
        """Build an ensemble of individual TDE classifiers.

         Using the training set (X,y), through randomising over the parameter space
         to a set number of times then selecting new parameters using Gaussian
        processes.

        Parameters
        ----------
        X : nested pandas DataFrame of shape [n_instances, 1]
            Nested dataframe with univariate time-series in cells.
        y : array-like, shape = [n_instances] The class labels.

        Returns
        -------
        self : object
        """
        X, y = check_X_y(X, y, coerce_to_numpy=True)

        time_limit = self.time_limit_in_minutes * 60
        self.n_instances, self.n_dims, self.series_length = X.shape
        self.n_classes = np.unique(y).shape[0]
        self.classes_ = class_distribution(np.asarray(y).reshape(-1, 1))[0][0]
        for index, classVal in enumerate(self.classes_):
            self.class_dictionary[classVal] = index

        self.classifiers = []
        self.weights = []
        self.prev_parameters_x = []
        self.prev_parameters_y = []

        # Window length parameter space dependent on series length
        max_window_searches = self.series_length / 4
        max_window = int(self.series_length * self.max_win_len_prop)
        win_inc = int((max_window - self.min_window) / max_window_searches)
        if win_inc < 1:
            win_inc = 1
        if self.min_window > max_window + 1:
            raise ValueError(
                f"Error in TemporalDictionaryEnsemble, min_window ="
                f"{self.min_window} is bigger"
                f" than max_window ={max_window},"
                f" series length is {self.series_length}"
                f" try set min_window to be smaller than series length in "
                f"the constructor, but the classifier may not work at "
                f"all with very short series")

        possible_parameters = self._unique_parameters(max_window, win_inc)
        num_classifiers = 0
        start_time = time.time()
        train_time = 0
        subsample_size = int(self.n_instances * 0.7)
        lowest_acc = 1
        lowest_acc_idx = 0

        if time_limit > 0:
            self.n_parameter_samples = 0

        rng = check_random_state(self.random_state)

        if self.bigrams is None:
            if self.n_dims > 1:
                use_bigrams = False
            else:
                use_bigrams = True
        else:
            use_bigrams = self.bigrams

        # use time limit or n_parameter_samples if limit is 0
        while (train_time < time_limit or num_classifiers <
               self.n_parameter_samples) and len(possible_parameters) > 0:
            if num_classifiers < self.randomly_selected_params:
                parameters = possible_parameters.pop(
                    rng.randint(0, len(possible_parameters)))
            else:
                gp = GaussianProcessRegressor(random_state=self.random_state)
                gp.fit(self.prev_parameters_x, self.prev_parameters_y)
                preds = gp.predict(possible_parameters)
                parameters = possible_parameters.pop(
                    rng.choice(np.flatnonzero(preds == preds.max())))

            subsample = rng.choice(self.n_instances,
                                   size=subsample_size,
                                   replace=False)
            X_subsample = X[subsample]
            y_subsample = y[subsample]

            tde = IndividualTDE(
                *parameters,
                alphabet_size=self.alphabet_size,
                bigrams=use_bigrams,
                dim_threshold=self.dim_threshold,
                max_dims=self.max_dims,
                random_state=self.random_state,
            )
            tde.fit(X_subsample, y_subsample)
            tde.subsample = subsample

            tde.accuracy = self._individual_train_acc(
                tde,
                y_subsample,
                subsample_size,
                -999999
                if num_classifiers < self.max_ensemble_size else lowest_acc,
            )
            weight = math.pow(tde.accuracy, 4)

            if num_classifiers < self.max_ensemble_size:
                if tde.accuracy < lowest_acc:
                    lowest_acc = tde.accuracy
                    lowest_acc_idx = num_classifiers
                self.weights.append(weight)
                self.classifiers.append(tde)
            elif tde.accuracy > lowest_acc:
                self.weights[lowest_acc_idx] = weight
                self.classifiers[lowest_acc_idx] = tde
                lowest_acc, lowest_acc_idx = self._worst_ensemble_acc()

            self.prev_parameters_x.append(parameters)
            self.prev_parameters_y.append(tde.accuracy)

            num_classifiers += 1
            train_time = time.time() - start_time

        self.n_estimators = len(self.classifiers)
        self.weight_sum = np.sum(self.weights)

        self._is_fitted = True
        return self
Example #19
0
class GCP(BaseTuner):
    def __init__(self, tunables, gridding=0, r_minimum=2):
        """
        Extra args:
            r_minimum: the minimum number of past results this selector needs in
                order to use gaussian process for prediction. If not enough
                results are present during a fit(), subsequent calls to
                propose() will revert to uniform selection.
        """
        super(GCP, self).__init__(tunables, gridding=gridding)
        self.r_minimum = r_minimum

    def fit(self, X, y):

        def jitter(x, range):
            y = np.copy(x)
            scale_exp_min = np.abs(np.ceil(np.log10(range[0])))
            scale_exp_max = np.abs(np.ceil(np.log10(range[1])))
            scale_exp = (scale_exp_max + scale_exp_min) / 2.
            r = np.random.rand(y.size) / (10**scale_exp)
            y = y + r
            return y

        # Print msg. when going into gcp.fit
        strMessage = "rows in X = %d, r_minimum = %d" % (X.shape[0], self.r_minimum)
        logger.debug(strMessage)

        # Use X and y to train a Gaussian Copula Process.
        super(GCP, self).fit(X, y)

        # skip training the process if there aren't enough samples
        if X.shape[0] < self.r_minimum:
            return

        # -- Non-parametric model of 'y', estimated with kernel density
        kernel_pdf = st.gaussian_kde(y)
        kernel_cdf = make_cdf(kernel_pdf)
        kernel_ppf = make_ppf(kernel_pdf)
        y_kernel_model = {'pdf': kernel_pdf, 'cdf': kernel_cdf, 'ppf': kernel_ppf}
        self.y_kernel_model = y_kernel_model

        # - Transform y-->F-->vF-->norm.ppf-->v
        vF = y_kernel_model['cdf'](y)
        v = st.norm.ppf(vF)

        # -- Non-parametric model of each feature in 'X', estimated with kernel density
        X_kernel_model = []
        for ki in range(X.shape[1]):
            columnX = X[:, ki]
            if self.tunables[ki][1].is_integer:
                columnX = jitter(columnX, self.tunables[ki][1].range)
            kernel_pdf = st.gaussian_kde(columnX)
            kernel_cdf = make_cdf(kernel_pdf)
            kernel_ppf = make_ppf(kernel_pdf)
            kernel_model = {'pdf': kernel_pdf, 'cdf': kernel_cdf, 'ppf': kernel_ppf}
            X_kernel_model.append(kernel_model)
        self.X_kernel_model = X_kernel_model

        # -- Transform X-->F-->uF-->norm.ppf-->U
        U = np.empty_like(X)
        for ki in range(X.shape[1]):
            uF = X_kernel_model[ki]['cdf'](X[:, ki])
            U[:, ki] = st.norm.ppf(uF)

        # - Instantiate a GP and fit it with (U, v)
        self.gcp = GaussianProcessRegressor(normalize_y=True)
        self.gcp.fit(U, v)

    def predict(self, X):
        if self.X.shape[0] < self.r_minimum:
            # we probably don't have enough
            logger.warn('GP: not enough data, falling back to uniform sampler')
            return Uniform(self.tunables).predict(X)

        def get_valid_row(U):
            ind_OK = np.full(U.shape[0], 1, dtype=bool)
            for ki in range(U.shape[1]):
                ind_OK = np.logical_and(ind_OK, np.logical_not(np.isinf(U[:, ki])))
            V = np.copy(U[ind_OK, :])
            return V, ind_OK

        # -- Load non-parametric model
        x_kernel_model = self.X_kernel_model
        y_kernel_model = self.y_kernel_model

        # -- Transform X into U before using the GP learned
        U = np.empty_like(X)
        for ki in range(X.shape[1]):
            uF = x_kernel_model[ki]['cdf'](X[:, ki])
            U[:, ki] = st.norm.ppf(uF)

        # -- Get U_safe and print msg. to inform of how many rows are valid
        U_safe, ind_OK = get_valid_row(U)
        strMessage = "Num. of valid rows in X = %d" % (np.sum(ind_OK))
        logger.debug(strMessage)

        # -- use GP to estimate mean and stdev only of safe U's
        mu_v, stdev_v = self.gcp.predict(U_safe, return_std=True)

        # -- Transform back mu_u-->NormStd-->mu_uF
        mu_vF = st.norm.cdf(mu_v)
        stdev_vF = st.norm.cdf(stdev_v)

        # -- Transform back mu_uF-->F.ppf-->mu_y
        # VERSION 1:
        # It should be used in case of mu_y and stdev_y can have a size lower than X.
        # Otherwise, swap to version 2.
        mu_y = y_kernel_model['ppf'](mu_vF)
        stdev_y = y_kernel_model['ppf'](stdev_vF)
        '''
        # VERSION 2:
        # It should be used in case of mu_y and stdev_y must have the same length
        # than X. Otherwise, Version 1 is faster.
        # -- Transform back mu_uF-->F.ppf-->mu_y
        #    mu_y has the same length than U, but is positive only for safe rows
        mu_y = np.zeros([U.shape[0]])
        stdev_y = np.zeros([U.shape[0]])
        mu_y[ind_OK] = y_kernel_model['ppf'](mu_vF)
        stdev_y[ind_OK] = y_kernel_model['ppf'](stdev_vF)
        '''

        return np.array(list(zip(mu_y, stdev_y)))

    def _acquire(self, predictions):
        """
        Predictions from the GCP will be in the form (prediction, error).
        The default acquisition function returns the index with the highest
        predicted value, not factoring in error.
        """
        return np.argmax(predictions[:, 0])
Example #20
0
class GPTS_Learner(Learner):
    '''Gaussian Process Thompson Sampling Learner inheriting from the Learner class.'''
    def __init__(self, n_arms, arms, kernel=None, plain_gp=False):
        '''Initialize the Gaussian Process Thompson Sampling Learner with a number of arms, the arms and a kernel.'''

        super(GPTS_Learner,
              self).__init__(n_arms)  # supercharges the init from the learner

        # Assignments and Initializations
        self.arms = arms
        self.means = np.linspace(0, 40, self.n_arms)
        self.sigmas = np.ones(self.n_arms) * 10
        self.pulled_arms = []
        alpha = 10.0

        # When no kernel is set, Radial-basis function one is chosen with 9 restarts, otherwise no restart is needed
        if not kernel:
            # The kernel is set as the product of a constant and a Radial-basis with values 1 and range 1e-3 to 1e3
            kernel = C(1.0, (1e-3, 1e3)) * RBF(1.0, (1e-3, 1e3))
            n_restarts = 9
        else:
            n_restarts = 0

        if plain_gp:
            self.gp = GaussianProcessRegressor()
        else:
            # Sets the Gaussian Process Regressor from the given kernel
            self.gp = GaussianProcessRegressor(kernel=kernel,
                                               alpha=alpha**2,
                                               normalize_y=True,
                                               n_restarts_optimizer=n_restarts)

    def update_observations(self, pulled_arm, reward):
        '''Updates the information on the rewards keeping track of the pulled arm (supercharges update_observations in Learner).'''

        super(GPTS_Learner, self).update_observations(
            pulled_arm,
            reward)  # supercharge update_observations from the learner

        # Keeps track of the pulled arm
        self.pulled_arms.append(self.arms[pulled_arm])

    def update_model(self):
        '''Updates the model with the new means and sigmas.'''

        # Sets the trimmed pulled arms vs rewards
        x = np.atleast_2d(self.pulled_arms).T
        y = self.collected_rewards

        # Fits the Gaussian process
        self.gp.fit(x, y)

        # Evaluates current means and sigmas with a lower bound on the standard deviation of 0.01 (for convergence)
        self.means, self.sigmas = self.gp.predict(np.atleast_2d(self.arms).T,
                                                  return_std=True)
        self.sigmas = np.maximum(self.sigmas, 1e-2)

    def update(self, pulled_arm, reward):
        '''Proceeds of 1 time step updating both the observations and the model.'''
        self.t += 1
        self.update_observations(pulled_arm, reward)
        self.update_model()

    def pull_arm(self, budget):
        '''Pulls the arm from the current multidimensional random normal distribution, returning the index of the best arm satisfying the budget allocation.'''

        sampled_values = np.random.normal(
            self.means, self.sigmas
        )  # pulls some random arms basing on current means and sigmas
        feasible_idxs = np.argwhere(
            self.arms <=
            budget)  # finds the indices which satisfy the budget allocation
        return np.argmax(
            sampled_values[feasible_idxs]
        )  # returns the index of the best arm satisfying the budget allocation
Example #21
0
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF, WhiteKernel


rng = np.random.RandomState(0)
X = rng.uniform(0, 5, 20)[:, np.newaxis]
y = 0.5 * np.sin(3 * X[:, 0]) + rng.normal(0, 0.5, X.shape[0])

# First run
plt.figure(0)
kernel = 1.0 * RBF(length_scale=100.0, length_scale_bounds=(1e-2, 1e3)) \
    + WhiteKernel(noise_level=1, noise_level_bounds=(1e-10, 1e+1))
gp = GaussianProcessRegressor(kernel=kernel,
                              alpha=0.0).fit(X, y)
X_ = np.linspace(0, 5, 100)
y_mean, y_cov = gp.predict(X_[:, np.newaxis], return_cov=True)
plt.plot(X_, y_mean, 'k', lw=3, zorder=9)
plt.fill_between(X_, y_mean - np.sqrt(np.diag(y_cov)),
                 y_mean + np.sqrt(np.diag(y_cov)),
                 alpha=0.5, color='k')
plt.plot(X_, 0.5*np.sin(3*X_), 'r', lw=3, zorder=9)
plt.scatter(X[:, 0], y, c='r', s=50, zorder=10)
plt.title("Initial: %s\nOptimum: %s\nLog-Marginal-Likelihood: %s"
          % (kernel, gp.kernel_,
             gp.log_marginal_likelihood(gp.kernel_.theta)))
plt.tight_layout()

# Second run
plt.figure(1)
kernel = 1.0 * RBF(length_scale=1.0, length_scale_bounds=(1e-2, 1e3)) \
    + WhiteKernel(noise_level=1e-5, noise_level_bounds=(1e-10, 1e+1))
Example #22
0
gp = GaussianProcessRegressor(kernel=RBF(l, (l, l)))

#Fit --> Training
g = gp.fit(a[:, np.newaxis], wde - base(a))

#Plotting points (if log use np.logspace)
a_sampling = np.linspace(inia, enda, ODEsteps)
print a_sampling

#transforming a_sampling in z_sampling
z_sampling = np.zeros(ODEsteps)
for i in range(ODEsteps):
    z_sampling[i] = -1 + 1 / a_sampling[i]
print z_sampling
#Predict points
w_pred, sigma = gp.predict(a_sampling[:, np.newaxis], return_std=True)
w_pred = w_pred + base(a_sampling)

#Plot the result: remove it from final verions
fig = plt.figure(figsize=(14, 12))
plt.plot(a_sampling, w_pred, label='l=%s' % l)
plt.legend(fontsize=20)
plt.scatter(a, wde)
fig.savefig('test_figure.png')

# print to file
#f = open(filename,'w')
# print len(z_sampling)
#for i in range(0, ODEsteps):
#    print >>f, z_sampling[i], w_pred[i]
np.savetxt(filename, np.array([z_sampling, w_pred]).T, fmt="%15.8e")
Example #23
0
    X = np.atleast_2d([1., 9.]).T
    y = blackbox_func(X).ravel()

    # Gaussian Processs Upper Confidence Bound (GP-UCB)アルゴリズム
    # --> 収束するまで繰り返す(収束条件などチューニングポイント)
    n_iteration = 50
    for i in range(n_iteration):

        # 既に分かっている値でガウス過程フィッティング
        # --> カーネル関数やパラメータはデフォルトにしています(チューニングポイント)
        gp = GaussianProcessRegressor()
        gp = KNeighborsRegressor(n_neighbors=2)
        gp.fit(X, y)

        # 事後分布が求まる
        posterior_mean = gp.predict(x_grid)
        #        posterior_sig = dist_knn(X, x_grid)
        posterior_sig = dist_knn(X, x_grid, i + 1)

        # 目的関数を最大化する x を次のパラメータとして選択する
        # --> βを大きくすると探索重視(初期は大きくし探索重視しイテレーションに同期して減衰させ活用を重視させるなど、チューニングポイント)
        idx = acq_ucb(posterior_mean, posterior_sig, beta=100.0)
        x_next = x_grid[idx]

        plot(x_grid,
             y,
             X,
             posterior_mean,
             posterior_sig,
             title='Iteration=%2d,  x_next = %f' % (i + 2, x_next))
Example #24
0
class GPUCB:
    def create_theta(self, x):
        theta = dict()
        for i in range(self.dim):
            theta[self.space.continuous_space[i].
                  label] = self.space.continuous_space[i].convert(x[i])
        return theta

    def log_generation(self, x, y):
        self.evals += 1
        self.fval = y
        theta = self.create_theta(x)
        print("theta:{}, fval:{}".format(theta, self.fval))
        if self.fval < self.best_fval:
            self.best_fval = self.fval
            self.best_params = theta

        self.log["evals"].append(self.evals)
        self.log["fval"].append(self.fval)
        self.log["best_fval"].append(self.best_fval)
        for i in range(self.dim):
            self.log[self.space.continuous_space[i].label].append(
                self.space.continuous_space[i].convert(x[i]))

    def __init__(self, **params):
        self.seed = params["seed"]
        np.random.seed(self.seed)
        self.B = params["B"]
        self.obj_func = params["obj_func"]
        self.dim = params["obj_func"].dim
        # GP
        self.xs = np.zeros((0, self.dim))
        self.ys = np.zeros((0, 1))
        # kernel
        matern = Matern(nu=2.5, length_scale=[1.0 for _ in range(self.dim)])
        white = WhiteKernel(noise_level=1.0)
        constant = ConstantKernel(constant_value=1.0)
        self.gp = GaussianProcessRegressor(kernel=constant * matern + white,
                                           normalize_y=True,
                                           n_restarts_optimizer=15)
        # BO
        self.beta = 2.0
        self.num_initial_samples = 5
        self.acq_initial_points = 50

        # for logging
        self.evals = 0
        self.fval = None
        self.best_params = None
        self.best_fval = np.inf
        self.space = params["space"]
        self.log = opt_util.basic_log_setup(self.space)

    def ucb(self, x):
        mean, std = self.gp.predict(x, return_std=True)
        # Note that our implementation targets minimization, so this is lcb, in fact.
        return mean - self.beta * std

    def argmin_acq(self):
        x_initials = [
            np.random.uniform(0.0, 1.0, size=self.dim)
            for _ in range(self.acq_initial_points)
        ]
        x_best = None
        fval_best = np.inf
        for xp in x_initials:
            res = minimize(
                lambda x: self.ucb(x.reshape(
                    1, -1)),  # (1, -1) means it contains a single sample
                xp.reshape(1, -1),
                bounds=[(0.0, 1.0) for _ in range(self.dim)],
                method="L-BFGS-B",
            )
            if res.fun < fval_best:
                x_best = res.x
                fval_best = res.fun
        return x_best

    def optimize(self):
        for t in range(self.B):
            # Step 1. select sample to evaluate
            if t < self.num_initial_samples:
                x = np.random.uniform(0.0, 1.0, size=self.dim)
            else:
                x = self.argmin_acq()

            # Step 2. evaluate the sample selected in Step 1.
            y = self.obj_func.evaluate(x)
            self.log_generation(x, y)

            # Step 3. accumulate the observed data
            self.xs = np.concatenate(
                (self.xs, np.array(x).reshape(1, self.dim)))
            self.ys = np.concatenate((self.ys, np.array(y).reshape(1, 1)))
Example #25
0
def fit_gp(x,
           y,
           x0=None,
           symmetry_list=None,
           y_errors=None,
           hypercube_rescale=False,
           fname_export="gp_fit"):
    """
    x = array so x[0] , x[1], x[2] are points.
    """

    # If we are loading a fit, override everything else
    if opts.fit_load_gp:
        print(
            " WARNING: Do not re-use fits across architectures or versions : pickling is not transferrable "
        )
        my_gp = joblib.load(opts.fit_load_gp)
        return lambda x: my_gp.predict(x)

    # Amplitude:
    #   - We are fitting lnL.
    #   - We know the scale more or less: more than 2 in the log is bad
    # Scale
    #   - because of strong correlations with chirp mass, the length scales can be very short
    #   - they are rarely very long, but at high mass can be long
    #   - I need to allow for a RANGE

    length_scale_est = []
    length_scale_bounds_est = []
    for indx in np.arange(len(x[0])):
        # These length scales have been tuned by expereience
        length_scale_est.append(
            2 *
            np.std(x[:, indx]))  # auto-select range based on sampling retained
        length_scale_min_here = np.max(
            [1e-3, 0.2 * np.std(x[:, indx] / np.sqrt(len(x)))])
        length_scale_bounds_est.append(
            (length_scale_min_here, 5 * np.std(x[:, indx]))
        )  # auto-select range based on sampling *RETAINED* (i.e., passing cut).  Note that for the coordinates I usually use, it would be nonsensical to make the range in coordinate too small, as can occasionally happens

    print(" GP: Input sample size ", len(x), len(y))
    print(" GP: Estimated length scales ")
    print(length_scale_est)
    print(length_scale_bounds_est)

    if not (hypercube_rescale):
        # These parameters have been hand-tuned by experience to try to set to levels comparable to typical lnL Monte Carlo error
        kernel = WhiteKernel(
            noise_level=0.1, noise_level_bounds=(1e-2, 1)) + C(
                0.5,
                (1e-3, 1e1)) * RBF(length_scale=length_scale_est,
                                   length_scale_bounds=length_scale_bounds_est)
        gp = GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=8)

        gp.fit(x, y)

        print(" Fit: std: ", np.std(y - gp.predict(x)),
              "using number of features ", len(y))

        if opts.fit_save_gp:
            print(" Attempting to save fit ", opts.fit_save_gp + ".pkl")
            joblib.dump(gp, opts.fit_save_gp + ".pkl")

        return lambda x: gp.predict(x)
    else:
        x_scaled = np.zeros(x.shape)
        x_center = np.zeros(len(length_scale_est))
        x_center = np.mean(x)
        print(" Scaling data to central point ", x_center)
        for indx in np.arange(len(x)):
            x_scaled[indx] = (x[indx] - x_center) / length_scale_est  # resize

        kernel = WhiteKernel(noise_level=0.1, noise_level_bounds=(
            1e-2, 1)) + C(0.5, (1e-3, 1e1)) * RBF(len(x_center), (1e-3, 1e1))
        gp = GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=8)

        gp.fit(x_scaled, y)
        print(" Fit: std: ", np.std(y - gp.predict(x_scaled)),
              "using number of features ", len(y))  # should NOT be perfect

        return lambda x, x0=x_center, scl=length_scale_est: gp.predict(
            (x - x0) / scl)
Example #26
0
class AugmentedGaussianProcess:
    """A pair of Gaussian processes: one for the samples and another for the error.

    Arguments:
     * sample_kernel: the kernel used for the sample predictor
     * error_kernel: the kernel used for the error predictor; if not specified,
       defaults to the sample kernel plus a white noise term.

    Keyword-only arguments:
     * fit_white_noise: if True, add a white noise term to the kernel and include
       the white noise term in the sample error rather than the model error.
    
    Attributes:
     * submodel_samples: the sample predictor (replaced by each call to fit())
     * submodel_errors: the error predictor

    Note that direct access to the submodels doesn't include the corrections
    applied by fit_white_noise.
    """

    def __init__(self, sample_kernel, error_kernel=None, *, fit_white_noise=False):
        if fit_white_noise:
            sample_kernel = sample_kernel + kernels.WhiteKernel()

        if error_kernel is None:
            if fit_white_noise:
                error_kernel = sample_kernel
            else:
                error_kernel = sample_kernel + kernels.WhiteKernel()

        self.fit_white_noise = fit_white_noise
        self.sample_kernel = sample_kernel
        self.submodel_samples = GaussianProcessRegressor(self.sample_kernel)
        self.submodel_errors = GaussianProcessRegressor(error_kernel)

    def fit(self, X, Y, Y_err):
        """Fit the model to a set of data with errors."""

        self.submodel_samples = GaussianProcessRegressor(self.sample_kernel, alpha=Y_err**2)

        self.submodel_samples.fit(X, Y)
        self.submodel_errors.fit(X, Y_err)

    def predict(self, X, return_std=False, return_efficiency=False):
        """Predict the mean, possibly also the standard error and sampling efficiency.

        If return_std is False, returns the predicted mean.
        If return_std is True, also returns the standard error of the prediction.
        If return_efficiency is also True, also returns the sampling
        efficicency, defined as the portion of the total sampling error
        attributable to the model uncertainty.
        """

        if return_std:
            mean, std = self.submodel_samples.predict(X, return_std=True)
            sigma = self.predict_sample_error(X)
            if self.fit_white_noise:
                white_noise_level = self.submodel_samples.kernel_.k2.noise_level
                std = np.sqrt(std**2 - white_noise_level)
            if return_efficiency:
                efficiency = 1 - sigma / np.sqrt(sigma**2 + std**2)
                return mean, std, efficiency
            else:
                return mean, std
        else:
            return self.submodel_samples.predict(X)

    def predict_sample_error(self, X):
        """Predict the sample error."""

        sigma = self.submodel_errors.predict(X)
        if self.fit_white_noise:
            white_noise_level = self.submodel_samples.kernel_.k2.noise_level
            sigma = np.sqrt(sigma**2 + white_noise_level)
        return sigma
Example #27
0
class SW_GPTS_Learner:
    def __init__(self, arms, sigma=5, window_length=0):
        self.arms = arms
        self.n_arms = len(arms)
        self.means = np.zeros(self.n_arms)
        self.sigmas = np.ones(self.n_arms) * sigma

        self.pulled_arms = np.array([])
        self.collected_rewards = np.array([])

        self.window_length = window_length

        alpha = 1.5

        theta = 1
        l = 1

        kernel = C(theta, (1e-5, 1e5)) * RBF(l, (1e-10, 1e10))

        self.gp = GaussianProcessRegressor(kernel=kernel,
                                           alpha=alpha**2,
                                           normalize_y=True,
                                           n_restarts_optimizer=10)

    def sample_values(self):
        """
        Sample the value of all the arms from the learner Gaussian Process
        :return:
        (numpy.array) An array containing the sampled values
        """
        sampled_values = np.random.normal(self.means, self.sigmas)
        return sampled_values

    def update_observations(self, arm_index, reward):
        """
        Update the parameters based on an external sampling on the real function

        Parameters:
        arm_index (int): the index of the arm that has been pulled
        reward (float): the reward received by pulling the arm
        """
        self.pulled_arms = np.append(self.pulled_arms, arm_index)
        self.collected_rewards = np.append(self.collected_rewards, reward)

        if len(self.pulled_arms) > self.window_length:
            self.pulled_arms = self.pulled_arms[-self.window_length:]
            self.collected_rewards = self.collected_rewards[-self.
                                                            window_length:]

    def update_model(self):
        """
        Update the gaussian process based on the recent observations
        """
        x = np.atleast_2d(self.pulled_arms).T
        y = self.collected_rewards

        if len(y) == 1:
            return

        self.gp.fit(x, y)
        self.means = self.gp.predict(np.array(list(np.ndindex(len(
            self.arms)))))
        self.sigmas = np.maximum(self.sigmas, 1e-2)

    def update(self, arm_index, reward):
        """
        Update the parameters of the model and the gaussian process based on an external sampling
        of the real function

        Parameters:
            arm_index (int): the index of the arm that has been pulled
            reward (float): the reward received by pulling the arm
        """
        self.update_observations(arm_index, reward)
        self.update_model()

    def pull_arm(self):
        """
        Choose which arm to pull based on the result of a sampling of the gaussian process
        
        Returns:
        (int): the index of the arm that maximizes the observation
        """
        sampled_values = np.random.normal(self.means, self.sigmas)
        return np.argmax(sampled_values)

    # FOR TESTING PURPOSES
    def plot(self, ax, function):
        ax.plot(self.arms, function(self.arms))

        ax.plot(self.pulled_arms, self.collected_rewards, 'ro')

        ax.plot(self.arms, self.means)

    def predict(self, point):
        return self.gp.predict([[point]])
Example #28
0
class BayesianLearning(BasePredictor):
    def __init__(self, surrogate=None, n_features=None):
        # initialzing some of the default values
        # The default surrogate function is gaussian_process with matern kernel
        if surrogate is None:
            if n_features is not None:
                # anisotropic kernel
                length_scale = [2.] * n_features
            else:
                length_scale = 2.

            self.surrogate = GaussianProcessRegressor(
                kernel=Matern(nu=2.5,
                              length_scale=length_scale,
                              length_scale_bounds=(0.1, 1024)),
                n_restarts_optimizer=3,
                # random_state=1,
                # optimizer=None,
                normalize_y=False)
        else:
            self.surrogate = surrogate

        # keep track of the iteration counts
        self.iteration_count = 0

        # The size of the exploration domain, default to 1000
        self.domain_size = 1000

    """
    This is based on the upper confidence bound algorithm used for the aquision function
    Input: X is the values from which we need to select the best value using the aquision function.
    return: The value which is best bases on the UCB and also the mean of this value.
    """

    def Upper_Confidence_Bound(self, X):
        ''' Compute the upper confidence bound as per UCL paper
        algorithm 2 GP-BUCB: C used here is C1 value which empirically works well'''
        mu, sigma = self.surrogate.predict(X, return_std=True)
        mu = mu.reshape(mu.shape[0], 1)

        sigma = sigma.reshape(sigma.shape[0], 1)

        tolerance = 1e-6

        sigma_inv_sq = 1.0 / (
            tolerance + (sigma * sigma)
        )  # tolerance is used to avoid the divide by zero error

        C = 8 / (np.log(1 + sigma_inv_sq))

        alpha_inter = self.domain_size * (self.iteration_count) * (
            self.iteration_count) * math.pi * math.pi / (6 * 0.1)

        if alpha_inter == 0:
            print('Error: alpha_inter is zero in Upper_Confidence_Bound')

        alpha = 2 * math.log(alpha_inter)  # We have set delta = 0.1
        alpha = math.sqrt(alpha)

        beta = np.exp(2 * C) * alpha
        beta = np.sqrt(beta)
        Value = mu + (beta) * sigma
        x_index = np.argmax(Value)
        mu_value = mu[x_index]

        return X[x_index], mu_value

    """
    Check if the returned index value is already present in X_Sample
    """

    def Upper_Confidence_Bound_Remove_Duplicates(self, X, X_Sample,
                                                 batch_size):
        mu, sigma = self.surrogate.predict(X, return_std=True)
        mu = mu.reshape(mu.shape[0], 1)

        sigma = sigma.reshape(sigma.shape[0], 1)

        tolerance = 1e-6

        sigma_inv_sq = 1.0 / (
            tolerance + (sigma * sigma)
        )  # tolerance is used to avoid the divide by zero error

        C = 8 / (np.log(1 + sigma_inv_sq))

        alpha_inter = self.domain_size * (self.iteration_count) * (
            self.iteration_count) * math.pi * math.pi / (6 * 0.1)

        if alpha_inter == 0:
            print('Error: alpha_inter is zero in Upper_Confidence_Bound')

        alpha = 2 * math.log(alpha_inter)  # We have set delta = 0.1
        alpha = math.sqrt(alpha)

        beta = np.exp(2 * C) * alpha
        beta = np.sqrt(beta)

        if batch_size == 1:
            exploration_factor = alpha
        else:
            exploration_factor = beta

        Value = mu + exploration_factor * sigma

        return self.remove_duplicates(X, X_Sample, mu, Value)

    """
    Returns the acqutition function
    """

    def Get_Upper_Confidence_Bound(self, X):
        mu, sigma = self.surrogate.predict(X, return_std=True)
        mu = mu.reshape(mu.shape[0], 1)

        sigma = sigma.reshape(sigma.shape[0], 1)
        alpha_inter = self.domain_size * (self.iteration_count) * (
            self.iteration_count) * math.pi * math.pi / (6 * 0.1)

        if alpha_inter == 0:
            print('Error: alpha_inter is zero in Upper_Confidence_Bound')

        alpha = 2 * math.log(alpha_inter)  # We have set delta = 0.1
        alpha = math.sqrt(alpha)

        Value = mu + (alpha) * sigma

        return Value

    """
    Returns the most optmal x along with mean value from the domain of x and making sure it is not a Duplicate (depending on closeness)
    used in batch setting: As mean is also returned
    """

    def remove_duplicates(self, X, X_Sample, mu, Value):
        # print('*'*200)
        v_sorting_index = np.argsort(-Value, axis=0)
        index = 0
        # go through all the values in X_Sample and check if anyvalue is close
        # to the optimal x value, if yes, don't consider this optimal x value

        while index < v_sorting_index.shape[0]:
            x_optimal = X[v_sorting_index[index]]

            # check if x_optimal is in X_Sample
            check_closeness = self.closeness(x_optimal, X_Sample)

            if check_closeness == False:  # No close element to x_optimal in X_Sample
                break

                # we will look for next optimal value to try
            else:
                index = index + 1

        # If entire domain is same to the already selected samples, we will just pick the best by value then
        if (index == v_sorting_index.shape[0]):
            index = 0

        return X[v_sorting_index[index]], mu[v_sorting_index[index]]

    """
    Returns the most optmal x only from the domain of x and making sure it is not a Duplicate (depending on closeness)
    Intended for usage in serial and clustering setting: As no mean is also returned, and no hullicination is considered
    """

    def remove_duplicates_serial(self, X, X_Sample, Value):
        # print('*'*200)
        v_sorting_index = np.argsort(-Value, axis=0)
        index = 0
        # go through all the values in X_Sample and check if anyvalue is close
        # to the optimal x value, if yes, don't consider this optimal x value

        while index < v_sorting_index.shape[0]:
            x_optimal = X[v_sorting_index[index]]

            # check if x_optimal is in X_Sample
            check_closeness = self.closeness(x_optimal, X_Sample)

            if check_closeness == False:  # No close element to x_optimal in X_Sample
                break

                # we will look for next optimal value to try
            else:
                index = index + 1

        # If entire domain is same to the already selected samples, we will just pick the best by value then
        if (index == v_sorting_index.shape[0]):
            index = 0

        return X[v_sorting_index[index]]

    def closeness(self, x_optimal, X_Sample):
        # check if x_optimal is close to X_Sample
        tolerance = 1e-3

        for i in range(X_Sample.shape[0]):
            diff = np.sum(np.absolute(X_Sample[i] - x_optimal))
            if (diff < tolerance):
                # print('Removed Duplicate')
                return True

        return False

    """
    This is the main function which returns the next batch to try along with the mean values for this batch
    """

    def get_next_batch(self, X, Y, X_tries, batch_size=3):
        # print('In get_next_batch')

        X_temp = X
        Y_temp = Y

        batch = []

        for i in range(batch_size):
            self.iteration_count = self.iteration_count + 1
            self.surrogate.fit(X_temp, Y_temp)

            X_next, u_value = self.Upper_Confidence_Bound_Remove_Duplicates(
                X_tries, X_temp, batch_size)
            u_value = u_value.reshape(-1, 1)
            Y_temp = np.vstack((Y_temp, u_value))
            X_temp = np.vstack((X_temp, X_next))

            batch.append([X_next])

        batch = np.array(batch)

        batch = batch.reshape(-1, X.shape[1])
        return batch

    """
    Using clustering to select next batch
    """

    def get_next_batch_clustering(self, X, Y, X_tries, batch_size=3):
        # print('In get_next_batch')

        X_temp = X
        Y_temp = Y

        self.surrogate.fit(X_temp, Y_temp)
        self.iteration_count = self.iteration_count + 1

        Acquition = self.Get_Upper_Confidence_Bound(X_tries)

        if batch_size > 1:
            kmeans = KMeans(n_clusters=4, random_state=0).fit(Acquition)
            cluster_pred = kmeans.labels_.reshape(kmeans.labels_.shape[0])
            # select the best cluster in the acquition function, and now cluster in the domain space itself
            acq_cluster_max_index = np.argmax(kmeans.cluster_centers_)

            # select the points in acq_cluster_max_index
            x_best_acq_domain = []
            x_best_acq_value = []

            for i in range(X_tries.shape[0]):
                if cluster_pred[i] == acq_cluster_max_index:
                    x_best_acq_domain.append(X_tries[i])
                    x_best_acq_value.append(Acquition[i])

            x_best_acq_domain = np.array(x_best_acq_domain)
            x_best_acq_value = np.array(x_best_acq_value)

            # Do the domain space based clustering on the best points
            kmeans = KMeans(n_clusters=batch_size,
                            random_state=0).fit(x_best_acq_domain)
            cluster_pred_domain = kmeans.labels_.reshape(
                kmeans.labels_.shape[0])

            # partition the space into the cluster in X and select the best X from each space
            partitioned_space = dict()
            partitioned_acq = dict()
            for i in range(batch_size):
                partitioned_space[i] = []
                partitioned_acq[i] = []

            for i in range(x_best_acq_domain.shape[0]):
                partitioned_space[cluster_pred_domain[i]].append(
                    x_best_acq_domain[i])
                partitioned_acq[cluster_pred_domain[i]].append(
                    x_best_acq_value[i])

            batch = []

            for i in partitioned_space:
                x_local = partitioned_space[i]
                acq_local = partitioned_acq[i]
                acq_local = np.array(acq_local)
                x_index = np.argmax(acq_local)
                x_final_selected = x_local[x_index]
                batch.append([x_final_selected])

        else:  # batch_size ==1
            batch = []
            x_index = np.argmax(Acquition)
            x_final_selected = self.remove_duplicates_serial(
                X_tries, X_temp, Acquition)
            # x_final_selected = X_tries[x_index]
            batch.append([x_final_selected])

        batch = np.array(batch)
        batch = batch.reshape(-1, X.shape[1])
        return batch

    """
    Get the predictions from the surrogate function
    along with the variance
    """

    def predict(self, X):
        pred_y, sigma = self.surrogate.predict(X, return_std=True)
        return pred_y, sigma

    """
    fit the optimizer on the X and Y values
    """

    def fit(self, X, Y):
        self.surrogate.fit(X, Y)
Example #29
0
    data = pd.read_csv(data_file)
    list(data.columns)
    Y = data["target"]
    data = data.drop(columns=['ID', 'target'])
except Exception as e:
    print(e)

# Instanciate a Gaussian Process model
kernel = C(1.0, (1e-3, 1e3)) * RBF(10, (1e-2, 1e2))
gp = GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=9)

# Fit to data using Maximum Likelihood Estimation of the parameters
gp.fit(data, Y)

# Make the prediction on the meshed x-axis (ask for MSE as well)
y_pred, sigma = gp.predict(data, return_std=True)

from sklearn.metrics import mean_squared_error
from math import sqrt

y_pred = np.around(y_pred, decimals=2)
ypredDF = pd.DataFrame(y_pred.round(), columns=["YHat"])

ypredDF["Y"] = Y

ypredDF.shape

trainrms = sqrt(mean_squared_error(ypredDF['Y'], ypredDF['YHat']))
print("RBF : trainrms {}".format(trainrms))

print(ypredDF.loc[ypredDF['Y'] == ypredDF['YHat']])
Example #30
0
class R_MODEL(object):
    def __init__(self, env, context_dim, pd_dim=12):
        self.env = env
        self.context_dim = context_dim
        self.pd_dim = pd_dim
        self.observation_dim = 12  # single-hole assembly 中为12,分别为Px, Py, Pz, Ox, Oy, Oz, Fx, Fy, Fz, Tx, Ty, Tz
        self.action_dim = 6  # DDPG输出动作的维度,此任务中为6,分别为Px, Py, Pz, Ox, Oy, Oz
        self.action_bound = 1  # DDPG输出动作的上下界
        self.MAX_EP = 5  # 对于一个context,训练DDPG的episode数
        self.MAX_EP_STEPS = 400  # maximized DDPG step number
        self.var = 0.6  # control exploration

        # New gpr model
        self.kernel = DotProduct() + WhiteKernel()
        # the GP model between dA and dX
        self.state_transfer_model = GaussianProcessRegressor(
            kernel=self.kernel, random_state=0)
        # the GP model between POS and F
        self.contact_model = GaussianProcessRegressor(kernel=self.kernel,
                                                      random_state=0)

        # New DDPG
        self.ddpg = DDPG(self.observation_dim, self.action_dim,
                         self.action_bound)

    # run DDPG training to collect data and train state transfer model
    def train_state_model(self, gpreps, n):
        # run DDPG training and collect data for model learning
        memory = self.__run_ddpg(gpreps, n)

        # process the memory for forward training
        I, J, X, Y, U = [], [], [], [], []
        dA, dX, POS, F = [], [], [], []
        # dA: action in real space of each step
        # dX: position change during each step
        # POS: position of each step
        # F: contact force of each step
        uk = np.array([0, 0, 0, 0, 0, 0])
        uk_1 = np.array([0, 0, 0, 0, 0, 0])

        for t in range(len(memory)):
            i, j, kp, kd, x, y, u, r, d = memory[
                t]  # i: episode number, j: step number
            I.append(i)
            J.append(j)
            X.append(x)
            Y.append(y)
            U.append(u)
            pos = X[t][:6]
            force = X[t][-6:]
            POS.append(pos)
            F.append(force)

            if t >= 2:
                if I[t] == I[t - 1] and I[t - 1] == I[
                        t - 2] and J[t] == J[t - 1] + 1 and J[t - 1] == J[
                            t -
                            2] + 1:  # if the consecutive 3 data come from 3 consecutive steps in a single episode:
                    ds = (Y[t] - X[t])[:6]
                    dX.append(ds)
                    # reproduce the PD control
                    rk = np.array([0, 0, 15, 0, 0, 0])
                    yk = np.array(X[t][-6:])
                    ek = rk - yk
                    yk = np.array(X[t - 1][-6:])
                    ek_1 = rk - yk
                    yk = np.array(X[t - 2][-6:])
                    ek_2 = rk - yk
                    # discrete PD algorithm
                    uk = uk_1 + kp * (ek - ek_1) + kd * (ek - 2 * ek_1 + ek_2)
                    uk_1 = uk
                    da = uk
                    for i in range(6):
                        da[i] = round(da[i], 4)
                    da = da + da * U[t]
                    dA.append(da)
                else:  # if the episode ends
                    # renew variables
                    uk = np.array([0, 0, 0, 0, 0, 0])
                    uk_1 = np.array([0, 0, 0, 0, 0, 0])
        dA, dX, POS, F = np.array(dA), np.array(dX), np.array(POS), np.array(F)

        # reward model training
        self.state_transfer_model = GaussianProcessRegressor(
            kernel=self.kernel, random_state=0).fit(dA, dX)
        self.contact_model = GaussianProcessRegressor(kernel=self.kernel,
                                                      random_state=0).fit(
                                                          POS, F)

    def __run_ddpg(self, gpreps, n):
        # n training cycles
        for j in range(n):
            self.env.reset()
            # Choose pd parameters
            s = [1, 2]  # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
            w = gpreps.choose_action(
                s)  # Kp = action[:, :6], Kd = action[:, 6:]
            kp = w[:, :6][0]
            kd = w[:, 6:][0]
            self.env.pd_control(kd, kp)

            # Start DDPG training
            for i in range(self.MAX_EP):
                self.env.restart()
                observation = self.env.init_state
                ep_reward = 0
                for j in range(self.MAX_EP_STEPS):
                    # Add exploration noise
                    action = self.ddpg.select_action(observation)
                    # print(j, 'th step: ', action)
                    action = np.clip(np.random.normal(action, self.var),
                                     -self.action_bound, self.action_bound)

                    # Observe and store
                    observation_, uncode_observation, reward, done, safe = self.env.step(
                        action)
                    self.ddpg.store_transition(i, j, kp, kd, observation,
                                               observation_, action, reward,
                                               done)

                    # Sample and learn
                    if self.ddpg.pointer > 200:
                        self.ddpg.train()
                        self.var *= .9995  # decay the action randomness

                    # update data
                    observation = observation_
                    ep_reward += reward

                    # 判别结果种类
                    if not safe:
                        print('Episode', i + 1, 'Assembly Failed', 'step', j,
                              'reward', ep_reward)
                        break
                    if done:
                        print('Episode', i + 1, 'Assembly Finished', 'step', j,
                              'reward', ep_reward)
                        break
                    if j == self.MAX_EP_STEPS - 1:
                        print('Episode:', i + 1, ' Assembly Unfinished',
                              'reward', ep_reward)
                        # if ep_reward > -300:RENDER = True
                        break
        return self.ddpg.replay_buffer

    # get an artificial trajectory and compute the reward
    def trajectory(self, context, w):
        # set pd parameters
        kp = w[:, :6][0]
        kd = w[:, 6:][0]
        self.env.pd_control(kd, kp)

        # Start artificial trajectory
        observation = np.array([
            0., -0.327, -53.77, 0., 0., 0., -0.001, 0., -0.604, 0., 0.001, 0.
        ])  # init observation
        ep_reward = 0
        for j in range(self.MAX_EP_STEPS):
            action = self.ddpg.select_action(observation)
            action = np.clip(np.random.normal(action, self.var),
                             -self.action_bound, self.action_bound)
            action = cal.actions(observation, action, True)
            ds = self.state_transfer_model.predict(np.array([action]),
                                                   return_std=0,
                                                   return_cov=0)[0]
            new_pos = ds + observation[:6]
            new_force = self.contact_model.predict(np.array([new_pos]),
                                                   return_std=0,
                                                   return_cov=0)[0]
            observation_ = np.hstack((new_pos, new_force))

            # judge the safety and done, then calculate the reward
            reward = -0.01
            ep_reward += reward
            if observation_[6] >= 50 or observation_[7] >= 50 or observation_[
                    8] >= 200 or observation_[9] >= 3 or observation_[
                        10] >= 3 or observation_[11] >= 3:
                reward = (-1 + (observation_[2] + 52.7) / 40)
                ep_reward += reward
                break
            if observation_[2] > -12:
                reward = 1 - j / self.MAX_EP_STEPS
                ep_reward += reward
                break
        return ep_reward
from sklearn.gaussian_process.kernels import RBF, ConstantKernel as C
from sklearn.gaussian_process.kernels import DotProduct, WhiteKernel
from sklearn.metrics import mean_squared_error

kernel = DotProduct() + WhiteKernel()
kernel_2 = C(1.0, (1e-3, 1e3)) * RBF(10, (1e-1, 1e-1))

gp = GaussianProcessRegressor(kernel=kernel,
                              alpha=2,
                              n_restarts_optimizer=5,
                              random_state=0,
                              normalize_y=True)

gp.fit(x_train[:, 0].reshape((-1, 1)), y_train)

y_pred, sigma = gp.predict(x_valid[:, 0].reshape((-1, 1)), return_std=True)
print("Mean squared Error: ", mean_squared_error(y_pred, y_valid))
#print(gp.score(x_valid,y_valid))

#Plotting the graph with confidence interval
plt.figure()
plt.title("XGBoost Regression")
plt.plot(np.arange(len(y_valid)), y_valid, 'r:', label='original')
plt.plot(np.arange(len(y_valid)), y_pred, label='Prediction')
x_v = np.arange(len(y_valid))
plt.fill(np.concatenate([x_v, x_v[::-1]]),
         np.concatenate(
             [y_pred - 1.9600 * sigma, (y_pred + 1.9600 * sigma)[::-1]]),
         alpha=.5,
         fc='b',
         ec='None',
Example #32
0
gp_matern.fit(X, y)
gp_lls.fit(X, y)

print "Learned kernel Matern: %s" % gp_matern.kernel_
print "Log-marginal-likelihood Matern: %s" \
    % gp_matern.log_marginal_likelihood(gp_matern.kernel_.theta)


print "Learned kernel LLS: %s" % gp_lls.kernel_
print "Log-marginal-likelihood LLS: %s" \
    % gp_lls.log_marginal_likelihood(gp_lls.kernel_.theta)

# Compute GP mean and standard deviation on test data
X_ = np.linspace(-1, 1, 500)

y_mean_lls, y_std_lls = gp_lls.predict(X_[:, np.newaxis], return_std=True)
y_mean_matern, y_std_matern = \
    gp_matern.predict(X_[:, np.newaxis], return_std=True)

plt.figure(figsize=(7, 7))
plt.subplot(2, 1, 1)
plt.plot(X_, f(X_), c='k', label="true function")
plt.scatter(X[:, 0], y, color='k', label="samples")
plt.plot(X_, y_mean_lls, c='r', label="GP LLS")
plt.fill_between(X_, y_mean_lls - y_std_lls, y_mean_lls + y_std_lls,
                 alpha=0.5, color='r')
plt.plot(X_, y_mean_matern, c='b', label="GP Matern")
plt.fill_between(X_, y_mean_matern - y_std_matern, y_mean_matern + y_std_matern,
                 alpha=0.5, color='b')
plt.legend(loc="best")
plt.title("Comparison of learned models")
Example #33
0
score=reg.score(X_test, y_test)
print(score)
score=reg.score(X_test,y_test)
pred=reg.predict(X_test)
print(mean_squared_error(y_test,pred))
visualize_scatterplot(pred,y_test,score,method='linear')


#MLP
regr = MLPRegressor(random_state=1,max_iter=10000).fit(X_train, y_train)
pred=regr.predict(X_test)
score=regr.score(X_test,y_test)
print(mean_squared_error(y_test,pred))
print(score)
visualize_scatterplot(pred,y_test,score,method="MLP")


#Gaussian Process
kernel = DotProduct() + WhiteKernel()
gpr = GaussianProcessRegressor(kernel=kernel,
         random_state=0).fit(X_train, y_train)

pred=gpr.predict(X_test)
score=gpr.score(X_test, y_test)
print(mean_squared_error(y_test,pred))
print(score)
visualize_scatterplot(pred,y_test,score,method="Gaussian")



Example #34
0
    def test_GP_example(self):
        from sklearn.gaussian_process import GaussianProcessRegressor
        from sklearn.gaussian_process.kernels import RBF, ConstantKernel as C

        np.random.seed(1)

        def f(x):
            """The function to predict."""
            return x * np.sin(x)

        # ----------------------------------------------------------------------
        #  First the noiseless case
        X = np.atleast_2d([1., 3., 5., 6., 7., 8.]).T

        # Observations
        y = f(X).ravel()

        # Mesh the input space for evaluations of the real function, the prediction and
        # its MSE
        x = np.atleast_2d(np.linspace(0, 10, 1000)).T

        # Instanciate a Gaussian Process model
        kernel = C(1.0, (1e-3, 1e3)) * RBF(10, (1e-2, 1e2))
        gp = GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=9)

        # Fit to data using Maximum Likelihood Estimation of the parameters
        gp.fit(X, y)

        # Make the prediction on the meshed x-axis (ask for MSE as well)
        y_pred, sigma = gp.predict(x, return_std=True)

        # Plot the function, the prediction and the 95% confidence interval based on
        # the MSE
        fig = plt.figure()
        plt.plot(x, f(x), 'r:', label=u'$f(x) = x\,\sin(x)$')
        plt.plot(X, y, 'r.', markersize=10, label=u'Observations')
        plt.plot(x, y_pred, 'b-', label=u'Prediction')
        plt.fill(np.concatenate([x, x[::-1]]),
                 np.concatenate([y_pred - 1.9600 * sigma,
                                 (y_pred + 1.9600 * sigma)[::-1]]),
                 alpha=.5, fc='b', ec='None', label='95% confidence interval')
        plt.xlabel('$x$')
        plt.ylabel('$f(x)$')
        plt.ylim(-10, 20)
        plt.legend(loc='upper left')

        # ----------------------------------------------------------------------
        # now the noisy case
        X = np.linspace(0.1, 9.9, 20)
        X = np.atleast_2d(X).T

        # Observations and noise
        y = f(X).ravel()
        dy = 0.5 + 1.0 * np.random.random(y.shape)
        noise = np.random.normal(0, dy)
        y += noise

        # Instanciate a Gaussian Process model
        gp = GaussianProcessRegressor(kernel=kernel, alpha=(dy / y) ** 2,
                                      n_restarts_optimizer=10)

        # Fit to data using Maximum Likelihood Estimation of the parameters
        gp.fit(X, y)

        # Make the prediction on the meshed x-axis (ask for MSE as well)
        y_pred, sigma = gp.predict(x, return_std=True)

        # Plot the function, the prediction and the 95% confidence interval based on
        # the MSE
        plt.plot(x, f(x), 'r:', label=u'$f(x) = x\,\sin(x)$')
        plt.errorbar(X.ravel(), y, dy, fmt='r.', markersize=10, label=u'Observations')
        plt.plot(x, y_pred, 'b-', label=u'Prediction')
        plt.fill(np.concatenate([x, x[::-1]]),
                 np.concatenate([y_pred - 1.9600 * sigma,
                                 (y_pred + 1.9600 * sigma)[::-1]]),
                 alpha=.5, fc='b', ec='None', label='95% confidence interval')
        plt.xlabel('$x$')
        plt.ylabel('$f(x)$')
        plt.ylim(-10, 20)
        plt.legend(loc='upper left')

        plt.show()
Example #35
0
autoscaled_x = (x - x.mean()) / x.std()
autoscaled_x_for_prediction = (x_for_prediction - x.mean()) / x.std()
autoscaled_y = (y - y.mean()) / y.std()
mean_of_y = y.mean()
std_of_y = y.std()

# Gaussian process regression
estimated_y_for_prediction = np.zeros(
    [x_for_prediction.shape[0], number_of_y_variables])
std_of_estimated_y_for_prediction = np.zeros(
    [x_for_prediction.shape[0], number_of_y_variables])
plt.rcParams['font.size'] = 18
for y_number in range(number_of_y_variables):
    model = GaussianProcessRegressor(ConstantKernel() * RBF() + WhiteKernel())
    model.fit(autoscaled_x, autoscaled_y.iloc[:, y_number])
    estimated_y_for_prediction_tmp, std_of_estimated_y_for_prediction_tmp = model.predict(
        autoscaled_x_for_prediction, return_std=True)
    estimated_y_for_prediction[:, y_number] = estimated_y_for_prediction_tmp
    std_of_estimated_y_for_prediction[:,
                                      y_number] = std_of_estimated_y_for_prediction_tmp

    estimated_y = model.predict(autoscaled_x)
    estimated_y = estimated_y * std_of_y.iloc[y_number] + mean_of_y.iloc[
        y_number]
    plt.figure(figsize=figure.figaspect(1))
    plt.scatter(y.iloc[:, y_number], estimated_y)
    y_max = max(y.iloc[:, y_number].max(), estimated_y.max())
    y_min = min(y.iloc[:, y_number].min(), estimated_y.min())
    plt.plot([y_min - 0.05 * (y_max - y_min), y_max + 0.05 * (y_max - y_min)],
             [y_min - 0.05 * (y_max - y_min), y_max + 0.05 * (y_max - y_min)],
             'k-')
    plt.ylim(y_min - 0.05 * (y_max - y_min), y_max + 0.05 * (y_max - y_min))
                              normalize_y=False,
                              optimizer='fmin_l_bfgs_b',
                              random_state=None)
'''
OKish Parameter Values 
gp = GaussianProcessRegressor(alpha=1e-7, copy_X_train=True,
kernel=1**2 + Matern(length_scale=0.2, nu=0.5) + WhiteKernel(noise_level=0.1),
n_restarts_optimizer=10, normalize_y=False,
optimizer='fmin_l_bfgs_b', random_state=None)
'''

# Fit to data using Maximum Likelihood Estimation of the parameters
gp.fit(x_train, y_train)

#x = np.linspace(min(X),max(X),len(X))[:,np.newaxis]
y_pred, sigma = gp.predict(x_test, return_std=True)

## Percentage within the specified factor
Fac = 5
IDX = y_pred / y_test >= 1
K = y_pred[IDX]
Q = y_test[IDX]
L = y_pred[~IDX]
M = y_test[~IDX]
Upper_indices = [i for i, x in enumerate(K <= Fac * Q) if x == True]
Lower_indices = [i for i, x in enumerate(L >= M / Fac) if x == True]
Percent_within_Fac = (len(Upper_indices) +
                      len(Lower_indices)) / len(y_pred) * 100
print("Percentage captured within a factor of {} = {:.2f}".format(
    Fac, Percent_within_Fac))
Example #37
0
plt.show()

#################

# SKLearn equivalent method
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import ConstantKernel, RBF

rbf = ConstantKernel(1.0) * RBF(length_scale=1.0)
gpr = GaussianProcessRegressor(kernel=rbf, alpha=noise**2)

# Reuse training data from previous 1D example
gpr.fit(X_train, Y_train)

# Compute posterior predictive mean and covariance
mu_s, cov_s = gpr.predict(X, return_cov=True)

# Obtain optimized kernel parameters
l = gpr.kernel_.k2.get_params()['length_scale']
sigma_f = np.sqrt(gpr.kernel_.k1.get_params()['constant_value'])

# Compare with previous results
assert(np.isclose(l_opt, l))
assert(np.isclose(sigma_f_opt, sigma_f))

# Plot the results
plot_gp(mu_s, cov_s, X, X_train=X_train, Y_train=Y_train)


############
# https://sheffieldml.github.io/GPy/
Example #38
0
def test_predict_cov_vs_std(kernel):
    # Test that predicted std.-dev. is consistent with cov's diagonal.
    gpr = GaussianProcessRegressor(kernel=kernel).fit(X, y)
    y_mean, y_cov = gpr.predict(X2, return_cov=True)
    y_mean, y_std = gpr.predict(X2, return_std=True)
    assert_almost_equal(np.sqrt(np.diag(y_cov)), y_std)
# print("Log-marginal-likelihood: %.3f"
#       % gp_test.log_marginal_likelihood(gp_test.kernel_.theta))

gp_test.fit(XT, y)

print("GPML kernel: %s" % gp_test.kernel_)
print("Log-marginal-likelihood: %.3f"
      % gp_test.log_marginal_likelihood(gp_test.kernel_.theta))


X_ = []
for i in range(15):
    X_.append([i+0.5, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1])
XT_ = scaler.transform(X_)
print 'X_ ', XT_
y_pred, y_std = gp_test.predict(XT_, return_std=True)

# Plot the predict result
X = np.array(X)
y = np.array(y)
X_ = np.array(X_)
plt.scatter(X[:, 0], y, c='k')
plt.plot(X_[:, 0], y_pred)
plt.fill_between(X_[:, 0], y_pred - y_std, y_pred + y_std, alpha = 0.5, color='k')

plt.xlim(X_[:, 0].min(), X_[:, 0].max())
plt.xlabel("x")
plt.ylabel(r"u")
plt.title(r"Test SquareExpWithBool Kernel")
plt.tight_layout()
plt.show()
Example #40
0
# :class:`~sklearn.gaussian_process.kernels.RBF` will serve at fitting the
# non-linearity between the data and the target.
#
# However, we will show that the hyperparameter space contains several local
# minima. It will highlights the importance of initial hyperparameter values.
#
# We will create a model using a kernel with a high noise level and a large
# length scale, which will explain all variations in the data by noise.
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF, WhiteKernel

kernel = 1.0 * RBF(length_scale=1e1, length_scale_bounds=(
    1e-2, 1e3)) + WhiteKernel(noise_level=1, noise_level_bounds=(1e-5, 1e1))
gpr = GaussianProcessRegressor(kernel=kernel, alpha=0.0)
gpr.fit(X_train, y_train)
y_mean, y_std = gpr.predict(X, return_std=True)

# %%
plt.plot(X, y, label="Expected signal")
plt.scatter(x=X_train[:, 0],
            y=y_train,
            color="black",
            alpha=0.4,
            label="Observations")
plt.errorbar(X, y_mean, y_std)
plt.legend()
plt.xlabel("X")
plt.ylabel("y")
_ = plt.title(
    f"Initial: {kernel}\nOptimum: {gpr.kernel_}\nLog-Marginal-Likelihood: "
    f"{gpr.log_marginal_likelihood(gpr.kernel_.theta)}",
#plt.ylim(0, 2000)
#plt.legend(loc='upper right') 
#plt.show()

x_T = np.atleast_2d(np.linspace(-0.07, 0.01, 1000)).T
x_n = np.atleast_2d(np.linspace(-0.075, 0.01, 1000)).T
kernel_T = C(10.0, (1e-5, 1e5)) * RBF(10.0, (1e-5, 1e5))
kernel_n = C(0.01, (1e-5, 1e-1)) * RBF(0.01, (1e-5, 1e5))

gp_L_mode_T = GaussianProcessRegressor(kernel=kernel_T, alpha=(200.*noise(L_mode_T_x))**2.,
                              n_restarts_optimizer=10)
L_mode_T_X = np.atleast_2d(L_mode_T_x).T
# Fit to data using Maximum Likelihood Estimation of the parameters
gp_L_mode_T.fit(L_mode_T_X, L_mode_T_y)
# Make the prediction on the meshed x-axis (ask for MSE as well)
L_mode_T_yp, L_mode_T_sigma = gp_L_mode_T.predict(x_T, return_std=True)

gp_H_mode_T = GaussianProcessRegressor(kernel=kernel_T, alpha=(200.*noise(H_mode_T_x))**2.,
                              n_restarts_optimizer=10)
H_mode_T_X = np.atleast_2d(H_mode_T_x).T
# Fit to data using Maximum Likelihood Estimation of the parameters
gp_H_mode_T.fit(H_mode_T_X, H_mode_T_y)
# Make the prediction on the meshed x-axis (ask for MSE as well)
H_mode_T_yp, H_mode_T_sigma = gp_H_mode_T.predict(x_T, return_std=True)

gp_I_mode_T = GaussianProcessRegressor(kernel=kernel_T, alpha=(200.*noise(I_mode_T_x))**2.,
                              n_restarts_optimizer=10)
I_mode_T_X = np.atleast_2d(I_mode_T_x).T
# Fit to data using Maximum Likelihood Estimation of the parameters
gp_I_mode_T.fit(I_mode_T_X, I_mode_T_y)
# Make the prediction on the meshed x-axis (ask for MSE as well)
gp_kernel = ExpSineSquared(1.0, 5.0,
                           periodicity_bounds=(1e-2, 1e1)) + WhiteKernel(1e-1)
gpr = GaussianProcessRegressor(kernel=gp_kernel)
stime = time.time()
gpr.fit(X, y)
print("Time for GPR fitting: %.3f" % (time.time() - stime))

# Predict using kernel ridge
X_plot = np.linspace(0, 20, 10000)[:, None]
stime = time.time()
y_kr = kr.predict(X_plot)
print("Time for KRR prediction: %.3f" % (time.time() - stime))

# Predict using gaussian process regressor
stime = time.time()
y_gpr = gpr.predict(X_plot, return_std=False)
print("Time for GPR prediction: %.3f" % (time.time() - stime))

stime = time.time()
y_gpr, y_std = gpr.predict(X_plot, return_std=True)
print("Time for GPR prediction with standard-deviation: %.3f" %
      (time.time() - stime))

# Plot results
plt.figure(figsize=(10, 5))
lw = 2
plt.scatter(X, y, c="k", label="data")
plt.plot(X_plot, np.sin(X_plot), color="navy", lw=lw, label="True")
plt.plot(X_plot,
         y_kr,
         color="turquoise",
Example #43
0
    def test_scikit_GP_SNRefsdal(self):
        from sklearn.gaussian_process import GaussianProcessRegressor
        from sklearn.gaussian_process.kernels import RBF, ConstantKernel as C

        # add data
        dm = -29.38  # D = 7.5e6 pc
        # dm = -30.4  # D = 12.e6 pc
        image = "S1"
        bname = 'F160W'
        curves = snrefsdal.read_curves(snrefsdal.path_data, image)
        lc = curves.get(bname)
        # lc.mshift = dm
        t = lc.Time
        y = lc.Mag
        yerr = lc.MagErr
        #
        # Instanciate a Gaussian Process model
        # kernel = C(1.0, (1e-3, 1e3)) * RBF(10, (1e-2, 1e2))
        # Instanciate a Gaussian Process model
        # kernel = C(1.0, (1e-3, 1e3)) * RBF(10, (1e-2, 1e2))
        # gp = GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=9)
        gp = GaussianProcessRegressor()

        # Fit to data using Maximum Likelihood Estimation of the parameters
        X = np.atleast_2d(t).T
        gp.fit(X, y)

        # gp = GaussianProcessRegressor()

        # Fit to data using Maximum Likelihood Estimation of the parameters
        # gp.fit(t, y)

        # Make the prediction on the meshed x-axis (ask for MSE as well)
        # y_star, err_y_star = gp.predict(t, return_std=True)
        # Make the prediction on the meshed x-axis (ask for MSE as well)
        y_pred, sigma = gp.predict(t, return_std=True)

        # k = gptools.SquaredExponentialKernel(param_bounds=[(min(np.abs(y)), max(np.abs(y))),
        #                                                    (0, np.std(t))])
        # # k = gptools.SquaredExponentialKernel(param_bounds=[(min(np.abs(y)), max(np.abs(y))),
        # #                                                    (0, np.std(t))])
        # gp = gptools.GaussianProcess(k)
        # # gp = gptools.GaussianProcess(k, mu=gptools.LinearMeanFunction())
        # gp.add_data(t, y, err_y=yerr)
        #
        # gp.optimize_hyperparameters()
        # y_star, err_y_star = gp.predict(t)

        fig = plt.figure()
        ax = fig.add_axes((0.1, 0.3, 0.8, 0.65))
        ax.invert_yaxis()

        ax.plot(t, y, color='blue', label='L bol', lw=2.5)
        ax.errorbar(t, y, yerr=yerr, fmt='o', color='blue', label='%s obs.')

        #
        # ax.plot(t, y_star, color='red', ls='--', lw=1.5, label='GP')
        ax.plot(t, y_pred, '-', color='gray')
        # ax.fill_between(t, y_star - 2 * err_y_star, y_star + 2 * err_y_star, color='gray', alpha=0.3)
        ax.fill(np.concatenate([t, t[::-1]]),
                np.concatenate([y_pred - 1.9600 * sigma,
                                (y_pred + 1.9600 * sigma)[::-1]]),
                alpha=.5, fc='b', ec='None', label='95% confidence interval')

        plt.show()
Example #44
0
print("GPML kernel: %s" % gp_test.kernel_)
print("Log-marginal-likelihood: %.3f"
      % gp_test.log_marginal_likelihood(gp_test.kernel_.theta))
print("GPML kernel: %s" % gp_test.kernel_)
print("Log-marginal-likelihood: %.3f"
       % gp_test.log_marginal_likelihood_data(XT, y))


start_time = time()
X_ = []
for i in range(3):
    X_.append([i+0.5, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1])
XT_ = scaler.transform(X_)
print 'X_ ', XT_
y_pred, y_std = gp_test.predict(XT_, return_std=True)
print 'y_pred: ', y_pred
print 'time used for prediction: ', time() - start_time

# Plot the predict result
X = np.array(X)
y = np.array(y)
X_ = np.array(X_)
plt.scatter(X[:, 0], y, c='k')
plt.plot(X_[:, 0], y_pred)
plt.fill_between(X_[:, 0], y_pred - y_std, y_pred + y_std, alpha = 0.5, color='k')

plt.xlim(X_[:, 0].min(), X_[:, 0].max())
plt.xlabel("x")
plt.ylabel(r"u")
plt.title(r"Test SquareExpWithBool Kernel")
gp_kernel = ExpSineSquared(1.0, 5.0, periodicity_bounds=(1e-2, 1e1)) \
    + WhiteKernel(1e-1)
gpr = GaussianProcessRegressor(kernel=gp_kernel)
stime = time.time()
gpr.fit(X, y)
print("Time for GPR fitting: %.3f" % (time.time() - stime))

# Predict using kernel ridge
X_plot = np.linspace(0, 20, 10000)[:, None]
stime = time.time()
y_kr = kr.predict(X_plot)
print("Time for KRR prediction: %.3f" % (time.time() - stime))

# Predict using kernel ridge
stime = time.time()
y_gpr = gpr.predict(X_plot, return_std=False)
print("Time for GPR prediction: %.3f" % (time.time() - stime))

stime = time.time()
y_gpr, y_std = gpr.predict(X_plot, return_std=True)
print("Time for GPR prediction with standard-deviation: %.3f"
      % (time.time() - stime))

# Plot results
plt.figure(figsize = (10,5))
plt.scatter(X, y, c='k', label='data')
plt.plot(X_plot, np.sin(X_plot), c='k', label='True')
plt.plot(X_plot, y_kr, c='g', label='KRR (%s)' % kr.best_params_)
plt.plot(X_plot, y_gpr, c='r', label='GPR (%s)' % gpr.kernel_)
plt.fill_between(X_plot[:, 0], y_gpr - y_std, y_gpr + y_std, color='r',
                 alpha=0.2)
# heteroscedastic    
prototypes = KMeans(n_clusters=8).fit(X).cluster_centers_
kernel = C(1.0, (1e-10, 1000)) * RBF(length_scale = [10., 100.], length_scale_bounds=[(1e-3, 1e3),(1e-4, 1e4)]) \
    + HeteroscedasticKernel.construct(prototypes, 1e-3, (1e-10, 50.0),
                                      gamma=1.0, gamma_bounds="fixed")
#gp.fit(X[:, np.newaxis], y)


gp = GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=100)

# Fit to data using Maximum Likelihood Estimation of the parameters
gp.fit(X, y.reshape(-1,1)) #removing reshape results in a different error

# Make the prediction on the meshed x-axis (ask for MSE as well)
y_pred, sigma = gp.predict(inputs_x_array, return_std=True)

fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.scatter(inputs_x_array[:,0],inputs_x_array[:,1],y_pred)
ax.scatter(X[:,0],X[:,1],y,color='orange')
ax.set_xlabel('X Label (radius)')
ax.set_ylabel('Y Label (time)')
ax.set_zlabel('Z Label (density)')
plt.show()

index_y1 = 3 #only valid until len_x2
print("Time is "+str(inputs_x_array[:,1][index_y1::len_x2][0])+"s")
plt.figure()
plt.scatter(inputs_x_array[:,0][index_y1::len_x2],y_pred[index_y1::len_x2]) #from x1_min to x1_max
plt.xlabel('X Label (radius)')
def run(args, bkghist, trainHisto, optKernel):


    GPh = GPHisto(bkghist)

    # The distributions with no window removed.
    X_t = GPh.getXArr()
    y_t = GPh.getYArr()
    dy_t = GPh.getErrArr()
    if args.noWindow:
        X = X_t
        y = y_t
        dy = dy_t


    x = GPh.getXArr()
    gp = None

    kernel = optKernel
    gp = GaussianProcessRegressor(kernel=kernel
                                    ,optimizer=None
                                    ,alpha=dy**2
                                    )


    gp.fit(X,y)
    print gp.kernel_

    length = float(re.search('length_scale=(\d+(\.\d+)?)', gp.kernel_.__repr__()).group(1))

    y_pred, sigma = gp.predict(x, return_std=True)

    outhist = GPh.getHisto(y_pred, sigma, 'GP Fit')
    #bkg = GPh.getHisto(y, dy, 'Full Background')


    ###  RooFit part
    myy = RooRealVar('myy','myy',105,160)
    #nSig = RooRealVar('nSig','nSig',-200,1000)
    sigMass = 125
    #pdf = RooGP.RooGP("mypdf", "CustomPDF",myy ,nSig, sigMass, trainHisto, dataHisto)
    pdf = RooGPBkg.RooGPBkg("bkgPDF", "bkg only PDF",myy, trainHisto, bkghist)

    data = RooDataHist("dh", "dh", RooArgList(myy), bkghist)


    c1 = TCanvas('c1','c1')
    frame = myy.frame()
    data.plotOn(frame, RooFit.MarkerColor(kRed))
    fitResult = pdf.fitTo(data, RooFit.Save())
#    pdf.gpHisto.Draw()
#    data.Draw('same')
#    outhist.Draw('samehist')
#    outhist.SetFillColorAlpha(kWhite, 0)
#    bkghist.Draw('same')
#    bkghist.SetMarkerColor(kBlack)
#    outhist.Divide(bkghist)
#    outhist.Draw()
    pdf.plotOn(frame)
    #fitResult.plotOn(frame)
    frame.Draw()
    #nSig.Print()
    c1.Print(args.outDir+'/test_GP.pdf')


    pass  #Run
dy = GPh.getErrArr()

x = np.atleast_2d(np.linspace(start=0., stop=10, num=1000)).T  # Predict at each data point

#kernel = C(1.0, (sigmaMin, 1e5)) * RBF(2.0, (lMin, 1e3)) #squared exponential kernel
kernel = C(1.0, 1e-3,1e5) * FallExp() * Gibbs()

gp = GaussianProcessRegressor(kernel=kernel
                                #,optimizer=None
                                ,alpha=(dy**2)
                                ,n_restarts_optimizer=15
                                )

gp.fit(X,y)
print gp.kernel_
y_pred, sigma = gp.predict(x, return_std=True)

outhist = TH1F('GP','GP', 1000,0,10)
for index,cont in enumerate(y_pred):
    outhist.SetBinContent(index+1, cont)
    outhist.SetBinError(index+1, 1.96*sigma[index])


canv = TCanvas('plot')
canv.cd()

funcHist = func.GetHistogram()
funcHist.Scale(hist.Integral()*hist.GetBinWidth(2)/(funcHist.Integral()*funcHist.GetBinWidth(2)))

hist.SetLineColor(kBlue)
hist.SetMarkerStyle(20)
Example #49
0
class SurrogateACESOptimizer(ACESOptimizer):
    def __init__(self, context_boundaries, n_context_samples, kappa,
                 active=True, **kwargs):
        super(SurrogateACESOptimizer, self).__init__(
            context_boundaries=context_boundaries, active=active, **kwargs)
        self.n_context_samples = n_context_samples
        self.kappa = kappa

    def init(self, n_params, n_context_dims):
        super(SurrogateACESOptimizer, self).init(n_params, n_context_dims)

    def _determine_contextparams(self, optimizer):
        """Select context and params jointly using ACES."""
        # Choose the first samples uniform randomly
        if len(optimizer.X_) < optimizer.initial_random_samples:
            cx = np.random.uniform(self.cx_boundaries[:, 0],
                                   self.cx_boundaries[:, 1])
            return cx[:self.context_dims], cx[self.context_dims:]

        # Prepare entropy search objective
        self._init_es_ensemble()
        # Generate data for function mapping
        # query_context x query_parameters x eval_context -> entropy reduction
        n_query_points = 500
        n_data_dims = 2 * self.context_dims + self.dimension
        X = np.empty((n_query_points, n_data_dims))
        y = np.empty(n_query_points)
        for i in range(n_query_points):
            # Select query point and evaluation context randomly
            query = np.random.uniform(self.cx_boundaries[:, 0],
                                      self.cx_boundaries[:, 1])
            ind = np.random.choice(self.n_context_samples)
            # Store query point in X and value of entropy-search in y
            X[i, :self.context_dims + self.dimension] = query
            X[i, self.context_dims + self.dimension:] = \
                self.context_samples[ind] - query[:self.context_dims]
            y[i] = self.entropy_search_ensemble[ind](query)[0]

        # Fit GP model to this data
        kernel = C(1.0, (1e-10, 100.0)) \
            * RBF(length_scale=(1.0,)*n_data_dims,
                  length_scale_bounds=[(0.01, 10.0),]*n_data_dims) \
            + WhiteKernel(1.0, (1e-10, 100.0))
        self.es_surrogate = GaussianProcessRegressor(kernel=kernel)
        self.es_surrogate.fit(X, y)

        # Select query based on mean entropy reduction in surrogate model
        # predictions
        contexts = np.random.uniform(self.context_boundaries[:, 0],
                                     self.context_boundaries[:, 1],
                                     (250, self.context_dims))
        def objective_function(cx):
            X_query = np.empty((250, n_data_dims))
            X_query[:, :self.context_dims + self.dimension] = cx
            X_query[:, self.context_dims + self.dimension:] = \
                contexts - cx[:self.context_dims]
            es_pred, es_cov = \
                self.es_surrogate.predict(X_query, return_cov=True)
            return es_pred.mean() + self.kappa * np.sqrt(es_cov.mean())

        cx = global_optimization(
                objective_function, boundaries=self.cx_boundaries,
                optimizer=self.optimizer, maxf=optimizer.maxf)
        return cx[:self.context_dims], cx[self.context_dims:]


    def _init_es_ensemble(self):
        # Determine samples at which CES will be evaluated by
        # 1. uniform random sampling
        self.context_samples = \
            np.random.uniform(self.context_boundaries[:, 0],
                              self.context_boundaries[:, 1],
                              (self.n_context_samples*25, self.context_dims))
        # 2. subsampling via k-means clustering
        kmeans = KMeans(n_clusters=self.n_context_samples, n_jobs=1)
        self.context_samples = \
            kmeans.fit(self.context_samples).cluster_centers_

        # 3. Create entropy search ensemble
        self.entropy_search_ensemble = []
        for i in range(self.n_context_samples):
            cx_boundaries_i = np.copy(self.cx_boundaries)
            cx_boundaries_i[:self.context_dims] = \
                self.context_samples[i][:, np.newaxis]
            entropy_search_fixed_context = deepcopy(self.acquisition_function)
            entropy_search_fixed_context.set_boundaries(cx_boundaries_i)

            self.entropy_search_ensemble.append(entropy_search_fixed_context)

    def _create_acquisition_function(self, name, model, **kwargs):
        if not name in ["EntropySearch", "MinimalRegretSearch"]:
            raise ValueError("%s acquisition function not supported."
                             % name)
        return create_acquisition_function(name, model, **kwargs)
Example #50
0
# Generating sample randomly
data_x = [[4.0 * (-0.5 + random.random()), 4.0 * (-0.5 + random.random())]
          for i in range(200)]
data_y = [[x[0] * math.sin(3.0 * x[1])] for x in data_x]

# Training GPR (Gaussian Process for Regression) so that GPR can map from x to y.
# You can play with different kernels
#kernel= C(1.0, (1e-3, 1e3)) * RBF(1.0, (0.1, 10.0))
#kernel= C(1.0, (1.0, 1.0)) * RBF(1.0, (0.1, 10.0))
#kernel= C(1.0, (1e-3, 1e3)) * RBF(3.0, (3.0, 3.0))
#kernel= RBF(1.0, (0.1, 10.0))
kernel = RBF(3.0, (3.0, 3.0))
gp = GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=9)
gp.fit(data_x, data_y)
f = lambda x: gp.predict([x])[0, 0]

# Now we can compute y=f(x) for any x
print('f([0.0,0.0])=', f([0.0, 0.0]))
print('f([1.0,1.0])=', f([1.0, 1.0]))
print('f([1.5,2.0])=', f([1.5, 2.0]))

#Plot gp.predict(x)
plot, plot3d = PlotF(f, xmin=[-2, -2], xmax=[2, 2], dx=0.1, show=False)
#Plot data points
plot3d.scatter(np.array(data_x).T[0],
               np.array(data_x).T[1],
               data_y,
               marker='*',
               color='red')
plot.show()
Example #51
0
def main(_):
  num_parallel_thetas = FLAGS.num_parallel_thetas
  num_theta_batches = FLAGS.num_theta_batches
  num_steps_autoencoder = 0 if FLAGS.uniform_weights else TRAINING_STEPS

  input_dim = len(FEATURES)

  training_df = pd.read_csv(FLAGS.training_data_path, header=0, sep=',')
  testing_df = pd.read_csv(FLAGS.testing_data_path, header=0, sep=',')
  validation_df = pd.read_csv(FLAGS.validation_data_path, header=0, sep=',')

  add_price_quantiles(training_df)
  add_price_quantiles(testing_df)
  add_price_quantiles(validation_df)

  train_labels = np.log(training_df['price'])
  validation_labels = np.log(validation_df['price'])
  test_labels = np.log(testing_df['price'])
  train_features = training_df[FEATURES]
  validation_features = validation_df[FEATURES]
  test_features = testing_df[FEATURES]
  validation_price = validation_df['price']
  test_price = testing_df['price']

  tf.reset_default_graph()
  x = tf.placeholder(tf.float32, shape=(None, input_dim), name='x')
  y = tf.placeholder(tf.float32, shape=(None, 1), name='y')

  xy = tf.concat([x, y], axis=1)
  autoencoder_layer1 = tf.layers.dense(
      inputs=xy, units=100, activation=tf.sigmoid)
  autoencoder_embedding_layer = tf.layers.dense(
      inputs=autoencoder_layer1,
      units=FLAGS.embedding_dim,
      activation=tf.sigmoid)
  autoencoder_layer3 = tf.layers.dense(
      inputs=autoencoder_embedding_layer, units=100, activation=tf.sigmoid)
  autoencoder_out_x = tf.layers.dense(
      inputs=autoencoder_layer3, units=input_dim)
  autoencoder_out_y = tf.layers.dense(inputs=autoencoder_layer3, units=1)

  autoencoder_y_loss = tf.losses.mean_squared_error(
      labels=y, predictions=autoencoder_out_y)
  autoencoder_x_loss = tf.losses.mean_squared_error(
      labels=x, predictions=autoencoder_out_x)
  autoencoder_loss = autoencoder_x_loss + autoencoder_y_loss
  autoencoder_optimizer = tf.train.AdamOptimizer(LEARNING_RATE).minimize(
      autoencoder_loss)

  parallel_outputs = []
  parallel_losses = []
  parallel_optimizers = []

  parallel_thetas = tf.placeholder(
      tf.float32,
      shape=(num_parallel_thetas, FLAGS.embedding_dim),
      name='parallel_thetas')
  unstack_parallel_thetas = tf.unstack(parallel_thetas, axis=0)
  embedding = tf.placeholder(
      tf.float32, shape=(None, FLAGS.embedding_dim), name='embedding')

  with tf.variable_scope('regressors'):
    for theta_index in range(num_parallel_thetas):
      output = regressor(x)
      theta = tf.reshape(
          unstack_parallel_thetas[theta_index], shape=[FLAGS.embedding_dim, 1])
      optimizer, loss = optimization(output, y, embedding, theta, LEARNING_RATE)

      parallel_outputs.append(output)
      parallel_losses.append(loss)
      parallel_optimizers.append(optimizer)

  init = tf.global_variables_initializer()
  regressors_init = tf.variables_initializer(
      tf.global_variables(scope='regressors'))

  kernel = RBF(
      length_scale=FLAGS.sampling_radius,
      length_scale_bounds=(FLAGS.sampling_radius * 1e-3, FLAGS.sampling_radius *
                           1e3)) * ConstantKernel(1.0, (1e-3, 1e3))

  thetas = np.zeros(shape=(0, FLAGS.embedding_dim))
  validation_metrics = []
  test_metrics = []

  with tf.Session() as sess:
    sess.run(init)

    # Training autoencoder
    for _ in range(num_steps_autoencoder):
      batch_index = random.sample(range(len(train_labels)), BATCH_SIZE)
      batch_x = train_features.iloc[batch_index, :].values
      batch_y = train_labels.iloc[batch_index].values.reshape(BATCH_SIZE, 1)
      _, _ = sess.run([autoencoder_optimizer, autoencoder_loss],
                      feed_dict={
                          x: batch_x,
                          y: batch_y,
                      })

    # GetCandidatesAlpha (Algorithm 2 in paper)
    for theta_batch_index in range(num_theta_batches):
      sess.run(regressors_init)
      if FLAGS.uniform_weights:
        theta_batch = np.zeros(shape=(num_parallel_thetas, FLAGS.embedding_dim))
      elif theta_batch_index == 0:
        # We first start uniformly.
        theta_batch = sample_from_ball(
            size=(num_parallel_thetas, FLAGS.embedding_dim),
            sampling_radius=FLAGS.sampling_radius)
      else:
        # Use UCB to generate candidates.
        theta_batch = np.zeros(shape=(0, FLAGS.embedding_dim))
        sample_thetas = np.copy(thetas)
        sample_validation_metrics = validation_metrics[:]
        candidates = sample_from_ball(
            size=(10000, FLAGS.embedding_dim),
            sampling_radius=FLAGS.sampling_radius)
        for theta_index in range(num_parallel_thetas):
          gp = GaussianProcessRegressor(
              kernel=kernel, alpha=1e-4).fit(sample_thetas,
                                             sample_validation_metrics)

          metric_mles, metric_stds = gp.predict(candidates, return_std=True)
          metric_lcbs = metric_mles - FLAGS.p_q_value * metric_stds

          best_index = np.argmin(metric_lcbs)
          best_theta = [candidates[best_index]]
          best_theta_metric_ucb = metric_mles[best_index] \
            + FLAGS.p_q_value * metric_stds[best_index]
          theta_batch = np.concatenate([theta_batch, best_theta])

          # Add candidate to the GP, assuming the metric observation is the LCB.
          sample_thetas = np.concatenate([sample_thetas, best_theta])
          sample_validation_metrics.append(best_theta_metric_ucb)

      # Training regressors
      for _ in range(TRAINING_STEPS):
        batch_index = random.sample(range(len(train_labels)), BATCH_SIZE)
        batch_x = train_features.iloc[batch_index, :].values
        batch_y = train_labels.iloc[batch_index].values.reshape(BATCH_SIZE, 1)
        batch_embedding = sess.run(
            autoencoder_embedding_layer, feed_dict={
                x: batch_x,
                y: batch_y,
            })
        _, _ = sess.run(
            [parallel_optimizers, parallel_losses],
            feed_dict={
                x: batch_x,
                y: batch_y,
                embedding: batch_embedding,
                parallel_thetas: theta_batch,
            })

      parallel_validation_outputs = sess.run(
          parallel_outputs,
          feed_dict={
              x: validation_features.values,
              y: validation_labels.values.reshape(len(validation_labels), 1),
          })
      parallel_validation_metrics = [
          metric(validation_labels, validation_output, validation_price)
          for validation_output in parallel_validation_outputs
      ]
      thetas = np.concatenate([thetas, theta_batch])
      validation_metrics.extend(parallel_validation_metrics)

      parallel_test_outputs = sess.run(
          parallel_outputs,
          feed_dict={
              x: test_features.values,
              y: test_labels.values.reshape(len(test_labels), 1),
          })
      parallel_test_metrics = [
          metric(test_labels, test_output, test_price)
          for test_output in parallel_test_outputs
      ]
      test_metrics.extend(parallel_test_metrics)

  best_observed_index = np.argmin(validation_metrics)
  print('[metric] validation={}'.format(
      validation_metrics[best_observed_index]))
  print('[metric] test={}'.format(test_metrics[best_observed_index]))

  return 0
Example #52
0
best_model_dict = gp_hyper.cv_results_['params'][gp_hyper.best_index_]
best_kernel = best_model_dict["kernel"]

# Define the kernel as product with the constant kernel
kernel = C(1.0, (1e-3, 1e3)) * best_kernel
best_model = GaussianProcessRegressor(kernel=best_kernel,
                                      alpha=1e-4 * sigma,
                                      n_restarts_optimizer=args.restarts)

# Fit to data using the Maximum Likelihood Estimation of the parameters
print("\nFitting Gaussian Process: {}".format(best_kernel))
best_model.fit(tau, sigma)

# Make the predictin on the meshed x-axis (ask for MSE as well)
print("\nMaking predictions")
sigma_pred2, std_dev2 = best_model.predict(tau_exact, return_std=True)

# Computing chi2 estimator
num = pow(sigma_exact - sigma_pred2, 2)
# Correct negative variances
corrected_std = np.maximum(pow(std_dev2, 2), 1e-10)
chi2 = np.sum(num / corrected_std) / len(sigma_pred2)

# Computing ratio GP vs exact
ratio_GP2 = abs(sigma_pred2 / sigma_exact)
print("sigma pred: {}".format(sigma_pred2.shape))
print("sigma exact: {}".format(sigma_exact.shape))
print("ratio: {}".format(ratio_GP2.shape))
print("chi2: {}".format(chi2))

# Plot the function, the prediction and the confidence intervals for the best model
class GPRPredictor:
    def __init__(self, res, **kwargs):
        """
        Class to evaluate GPR fits constructed by GPRFitter class in
        pySurrogate/fit_gpr.py
        """

        self.data_mean = res['data_mean']
        self.data_std = res['data_std']

        # load GPR fit
        self.GPR_obj = GaussianProcessRegressor()
        GPR_params = res['GPR_params']
        self._set_gpr_params(self.GPR_obj, GPR_params)

        # load LinearRegression fit
        lin_reg_params = res['lin_reg_params']
        if lin_reg_params is not None:
            self.linearModel = linear_model.LinearRegression()
            self._set_lin_reg_params(self.linearModel, lin_reg_params)
        else:
            self.linearModel = None

    def _set_kernel_params(self, kernel_params):
        """ Recursively sets paramters for a kernel and returns the final
            kernel.
        """

        # copy so as to not overwrite
        kernel_params = kernel_params.copy()

        # initialize kernel
        name = kernel_params['name']
        kernel = getattr(gaussian_process.kernels, name)
        del kernel_params['name']

        params = {}
        for key in kernel_params.keys():
            if type(kernel_params[key]) == dict:
                # recursively set kernels
                params[key] = self._set_kernel_params(kernel_params[key])
            else:
                params[key] = kernel_params[key]

        if name == 'Sum' or name == 'Product':
            kernel = kernel(params['k1'], params['k2'])
        else:
            kernel = kernel()

        kernel.set_params(**params)
        return kernel

    def _set_gpr_params(self, gp_obj, gp_params):
        """ Sets the fitted hyperparameter for a GPR object.
            This can be used to load a previously constructed fit.

            NOTE: If you get errors like:
            "AttributeError: 'GaussianProcessRegressor' object has
            no attribute -----",
            try adding that attribute to GPR_SAVE_ATTRS_DICT
        """
        for attr in GPR_SAVE_ATTRS_DICT:
            if attr == 'kernel_':
                param = self._set_kernel_params(gp_params[attr])
            elif attr == '_y_train_std':
                # In scikit-learn versions before 0.23, there was no
                # _y_train_std, which is the same as saying _y_train_std=1. If
                # this fit was constructed using an earlier version of
                # scikit-learn, it would not have an attribute called
                # _y_train_std, so we just set it to 1. This way, the fit can
                # be evaluated with any version of scikit-learn.
                if '_y_train_std' not in gp_params.keys():
                    param = 1
                else:
                    param = gp_params[attr]
            else:
                param = gp_params[attr]
            setattr(gp_obj, attr, param)

    def _set_lin_reg_params(self, lr_obj, lr_params):
        """ Sets the fitted parameters for a LinearRegression object.
            This can be used to load a previously constructed fit.

            NOTE: If you get errors like:
            "AttributeError: 'LinearRegression' object has
            no attribute -----",
            try adding that attribute to LINREG_SAVE_ATTRS_DICT
        """
        for attr in LINREG_SAVE_ATTRS_DICT:
            param = lr_params[attr]
            setattr(lr_obj, attr, param)

    def _reconstruct_normalized_data(self, data_normed, data_normed_err):
        """
        The inverse operation of 'GPRFitter._normalize()'
        Returns the reconstructed data and error estimate.
        """
        return data_normed * self.data_std + self.data_mean, \
            data_normed_err * self.data_std

    def GPR_predict(self, x, estimate_err=False):
        """
        Evaluates a GPR fit.
        First evalutates the GPR fit to get the prediction for the normalized
        data. Then reconstructs the un-normalized data.
        Finally adds the linear model fit if it was done in GPRFitter.
        """

        # Get fit prediction and error estimate for normalized data
        fit_res = self.GPR_obj.predict(x, return_cov=estimate_err)

        if estimate_err:
            y_normalized_pred, cov_normalized_pred = fit_res
            err_normalized_pred = np.sqrt(cov_normalized_pred.flatten())
        else:
            y_normalized_pred = fit_res
            err_normalized_pred = fit_res * 0

        # Reconstruct to get un-normalized prediction
        y_pred, err_pred = self._reconstruct_normalized_data( \
                                            y_normalized_pred, \
                                            err_normalized_pred)

        if self.linearModel is not None:
            # Add the linear prediction that was subtracted before
            # doing the fit
            y_pred = y_pred + self.linearModel.predict(x)

        val_dict = {
            'y': y_pred,
        }

        if estimate_err:
            val_dict['y_gprErr'] = err_pred

        return val_dict
Example #54
0
        x_pr_grid,
        B_postsamples,
        T_fwdsamples,
        seed=200)

    jnp.save('plot_files/ccopula_lidar_logpdf_pr{}'.format(x_pr_val),
             logpdf_pr)
    jnp.save('plot_files/ccopula_lidar_logcdf_pr{}'.format(x_pr_val),
             logcdf_pr)

    #Convergence plot
    seed = 200
    T_fwdsamples = 10000
    logcdf_pr_conv, logpdf_pr_conv, pdiff, cdiff = check_convergence_pr_cregression(
        copula_cregression_obj, x, y_pr_grid, x_pr_grid, 1, T_fwdsamples, seed)
    jnp.save('plot_files/ccopula_lidar_pr_pdiff_pr{}'.format(x_pr_val), pdiff)

#Gaussian Process
print('Method: GP')
from sklearn.gaussian_process.kernels import RBF, ConstantKernel, WhiteKernel
from sklearn.gaussian_process import GaussianProcessRegressor

kernel = ConstantKernel() * RBF() + WhiteKernel()
gp = GaussianProcessRegressor(kernel=kernel,
                              n_restarts_optimizer=10,
                              normalize_y=True)
gp.fit(x, y)
mean_gp, std_gp = gp.predict(x_plot.reshape(-1, 1), return_std=True)
jnp.save('plot_files/gp_lidar_mean', mean_gp)
jnp.save('plot_files/gp_lidar_std', std_gp)
Example #55
0
# Observations
y = f(X).ravel()

# Mesh the input space for evaluations of the real function, the prediction and
# its MSE
x = np.atleast_2d(np.linspace(0, 10, 1000)).T

# Instanciate a Gaussian Process model
kernel = C(1.0, (0.001, 1000)) * RBF(10, (0.01, 100))
gp = GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=9)

# Fit to data using Maximum Likelihood Estimation of the parameters
gp.fit(X, y)

# Make the prediction on the meshed x-axis (ask for MSE as well)
y_pred, sigma = gp.predict(x, return_std=True)

# Plot the function, the prediction and the 95% confidence interval based on
# the MSE
fig = plt.figure()
plt.plot(x, f(x), 'r:', label=u'$f(x) = x\,\sin(x)$')
plt.plot(X, y, 'r.', markersize=10, label=u'Observations')
plt.plot(x, y_pred, 'b-', label=u'Prediction')
plt.fill(np.concatenate([x, x[::-1]]),
         np.concatenate(
             [y_pred - 1.9600 * sigma, (y_pred + 1.9600 * sigma)[::-1]]),
         alpha=.5,
         fc='b',
         ec='None',
         label='95% confidence interval')
plt.xlabel('$x$')
kernel = RBF(length_scale=1, length_scale_bounds=(1e-1,1e1))
#ESS(length_scale=2, periodicity=10, length_scale_bounds=(5e-3, 5e3),periodicity_bounds=(5e-3, 5e3)) 

gp = GaussianProcessRegressor(kernel=kernel, normalize_y=False,\
                              n_restarts_optimizer=15)


# Fit to data using Maximum Likelihood Estimation of the parameters
gp.fit(X_avg1, y_avg1)
#print "Fitting done"
#print gp.get_params()

print "Marginal likelihood: ", gp.log_marginal_likelihood()
#print gp.score(X, y)
# Make the prediction on the meshed x-axis (ask for MSE as well)
y_pred0, sigma0 = gp.predict(X_avg1, return_std=True)
y_pred, sigma = gp.predict(x_avg1, return_std=True)
print "Predicting done"
#print "__________\n", gp.score(y_pred.reshape(-1, 1), y_test)

# Plot the function, the prediction and the 95% confidence interval based on
# the MSE
fig = plt.figure()
nounou_minute = []

plt.plot(X_avg1, y_pred0, "g--")
plt.plot(x_avg1, y_pred, 'b--')
plt.fill(np.concatenate([X_avg1, X_avg1[::-1]]),
         np.concatenate([y_pred0-1.96*sigma0,
                         (y_pred0+1.96*sigma0)[::-1]]),
        alpha=0.5, fc="g", ec="None")
def run(args, mass, winLow, winHigh):
    f = TFile(args.input)
    bkghist_template = f.Get('hmgg_c0')
    bkghist_template.Rebin(8)

    stats = 100000
    seed = 10

    bkghist = toyModel(bkghist_template, stats, seed)

    if args.doSig:
        #get signal hist
        sighist = buildSignal(125,1000, bkghist.GetNbinsX())

        #inject signal into background
        bkghist.Add(sighist)

    GPh = GPHisto(bkghist)
    GPh.setWindow(winLow,winHigh)
    X = GPh.getXWindowArr()
    y = GPh.getYWindowArr()
    dy = GPh.getErrWindowArr()

    X_t = GPh.getXArr()
    y_t = GPh.getYArr()
    dy_t = GPh.getErrArr()

    if args.noWindow:
        X = X_t
        y = y_t
        dy = dy_t

    #X, y, dy = histoToArrayTest(bkghist, 120, 140)
    #X, y, dy = histoToArrayCut(bkghist, 120, 125)
    #X_t, y_t, dy_t = histoToArray(bkghist)

    #X = np.atleast_2d(X).T
    #y = y.ravel()
    #dy = dy.ravel()

    #x = np.atleast_2d(np.linspace(start=105, stop=160, num=1000)).T  # Predict a relatively smooth function
    #x = np.atleast_2d(np.linspace(start=105, stop=160, num=219)).T  # Predict at each data point

    x = GPh.getXArr()

    #kernel = C(800.0, (1e-3, 1e3)) * RBF(100.0, (1e-3, 1e3)) #squared exponential kernel
    #kernel = C(10.0, (1e-3, 1e15)) * RBF(np.sqrt(2)*(7**2), (1e-3,1e5 )) #squared exponential kernel
    kernel = C(1000.0, (1e-3, 1e15)) * FallExp(1.0, (1e-5, 1e2), 1.0, (1e-3,1e15)) * Gibbs(1.0, (1e-3, 1e5), 1.0, (1e-3,1e5))
    #kernel = C(10.0, (1e-3, 1e6)) * Gibbs(1.0, (1e-3, 1e5), 1.0, (1e-3,1e5))


    print "dy[5] =",dy[5]
    print "err =", bkghist.GetBinError(5), "Original =", bkghist_template.GetBinError(5)
    gp = GaussianProcessRegressor(kernel=kernel
                                    ,optimizer='fmin'
                                    ,alpha=dy**2
                                    ,n_restarts_optimizer=15
                                    )

    gp.fit(X,y)
    print gp.kernel_
    y_pred, sigma = gp.predict(x, return_std=True)


    if args.mplot:
        fig = plt.figure()
        #plt.plot(X, y, 'r.', markersize=10, label=u'Background')
        plt.errorbar(X.ravel(), y, dy, fmt='r.', markersize=8, label=u'Training Points', zorder=2)
        plt.errorbar(X_t.ravel(), y_t, dy_t, fmt='k.', markersize=7, label=u'Background', zorder=1)
        plt.plot(x, y_pred, 'b-', label=u'Prediction', zorder=3)
        plt.fill(np.concatenate([x, x[::-1]]),
                 np.concatenate([y_pred - 1.9600 * sigma,
                                (y_pred + 1.9600 * sigma)[::-1]]),
                 alpha=.5, fc='b', ec='None', label='95% confidence interval', zorder=3)
        plt.xlabel('$M_{\gamma \gamma}$')
        plt.ylabel('$events$')
        plt.title('Optimized Kernel: {}'.format(gp.kernel_))
        #plt.yscale('log')
        #plt.ylim(-10, 20)
        plt.legend(loc='upper right')
        plt.savefig(args.tag+'GPFit.pdf')
        #plt.show()
    else:
        outfile = TFile('out.root','RECREATE')
        #outhist = arrayToHisto('GP Fit', 105, 160, y_pred, sigma)
        outhist = GPh.getHisto(y_pred, 1.96*sigma, 'GP Fit')
        if args.noWindow:
            bkgWindow = GPh.getHisto(y, dy, 'Full Background')
        else:
            bkgWindow = GPh.getWinHisto(y, dy, 'Full Background')
        bkgSubtracted = bkghist.Clone('bkgSubtracted')
        bkgSubtracted.Add(outhist,-1)  #Subtract background prediction from background with injected signal.

        canv = TCanvas('canv', 'canv')
        pad1 = TPad("pad1", "pad1", 0, 0.3, 1, 1.0)
        pad1.SetBottomMargin(0)
        pad1.SetGridx()
        pad1.Draw()
        pad1.cd()
        outhist.SetStats(0)
        bkghist_template.SetStats(0)
        bkgWindow.SetStats(0)
        bkgWindow.SetMarkerColor(kBlue)
        bkgWindow.SetLineColor(kBlue)
        outhist.SetMarkerColor(kBlack)
        outhist.SetLineColor(kBlack)
        print outhist.GetBinError(10)

        #bkgNorm = bkgWindow.Integral(1, bkgWindow.FindBin(winLow))
        #tmpNorm = bkghist_template.Integral(1,bkghist_template.FindBin(winLow))
        bkgNorm = bkgWindow.Integral()
        tmpNorm = bkghist_template.Integral()
        bkghist_template.Scale(bkgNorm/tmpNorm)
        bkghist_template.SetTitle(str(gp.kernel_)+" nToys: "+str(stats))
        print "Bin 24:  {0} : {1} : {2}".format((outhist.GetBinContent(24)-outhist.GetBinError(24)), bkghist_template.GetBinContent(24), (outhist.GetBinContent(24)+outhist.GetBinError(24))    )

        ####### Poly2 fit
        #canv4 = TCanvas('c4','c4')
        expPol_func = TF1("expPol","[0]*exp((x-100)/100 * ([1] + [2]*(x-100)/100))",105,160)
        expPol_func.SetParameters(0,0,0)
        expPol_func.SetParLimits(1,-10.,10.)
        expPol_func.SetParLimits(2,-10.,10.)
        bkgWindow.Fit("expPol","","",105,160)
        expFitResult = bkgWindow.GetFunction("expPol")
        expPolHist = expFitResult.GetHistogram()
        print expPolHist.GetNbinsX()
        #expPolHist.Divide(outhist)
        #expPolHist.Draw()
        #canv4.Print(args.tag+'/expPol_GP_ratio.pdf')

        bkghist_template.Draw('')
        bkgWindow.Draw('same')
        outhist.Draw('histsame')

        #outhist.GetYaxis().SetLabelSize(0.)
        axis = TGaxis( -5, 20, -5, 220, 20,220,510,"")
        axis.SetLabelFont(43)
        axis.SetLabelSize(15)
        axis.Draw()

        canv.cd()
        pad2 = TPad("pad2", "pad2", 0, 0.02, 1, 0.3)
        pad2.SetTopMargin(0)
        pad2.SetBottomMargin(0.28)
        pad2.SetGridx()
        pad2.Draw()
        pad2.cd()


        h3 = bkghist_template.Clone("h3")
        h3.SetLineColor(kBlack)
        h3.SetMinimum(0.95)
        h3.SetMaximum(1.05)
        h3.Sumw2()
        h3.SetStats(0)
        h3.Divide(outhist)
        h3.SetMarkerColor(kBlack)
        h3.SetMarkerStyle(20)
        h3.SetMarkerSize(0.5)
        h3.Draw("ep")

        h4 = bkghist_template.Clone("h4")
        h4.SetLineColor(kRed)
        h4.SetMinimum(0.95)
        h4.SetMaximum(1.05)
        h4.Sumw2()
        h4.SetStats(0)
        h4.Divide(expFitResult)
        h4.SetMarkerColor(kRed)
        h4.SetMarkerStyle(20)
        h4.SetMarkerSize(0.5)
        h4.Draw("epsame")

        line = TLine(105,1,160,1)
        line.Draw('same')


        # outhist settings
        outhist.SetLineColor(kBlack);
        outhist.SetFillColorAlpha(33, 0.5)
        outhist.SetLineWidth(2);

        # Y axis outhist plot settings
        outhist.GetYaxis().SetTitleSize(20);
        outhist.GetYaxis().SetTitleFont(43);
        outhist.GetYaxis().SetTitleOffset(1.55);

        # bkghist settings
        bkghist.SetLineColor(kBlack);
        bkghist.SetMarkerSize(0.7)
        bkghist.SetLineWidth(2);

        # Ratio plot (h3) settings
        h3.SetTitle(""); # Remove the ratio title

        # Y axis ratio plot settings
        h3.GetYaxis().SetTitle("data/fit ");
        h3.GetYaxis().SetNdivisions(505);
        h3.GetYaxis().SetTitleSize(20);
        h3.GetYaxis().SetTitleFont(43);
        h3.GetYaxis().SetTitleOffset(1.);
        h3.GetYaxis().SetLabelFont(43); # Absolute font size in pixel (precision 3)
        h3.GetYaxis().SetLabelSize(15);

        # X axis ratio plot settings
        h3.GetXaxis().SetTitleSize(20);
        h3.GetXaxis().SetTitleFont(43);
        h3.GetXaxis().SetTitleOffset(3.);
        h3.GetXaxis().SetLabelFont(43); # Absolute font size in pixel (precision 3)
        h3.GetXaxis().SetLabelSize(15)

        canv.SetBottomMargin(0)

        canv.Write()
        #canv.Print(winLow+'_'+winHigh+'_GPFit.pdf')
        #canv.Print(args.tag+'/GPFit_'+str(winLow)+'_'+str(winHigh)+'.pdf')
        canv.Print(args.tag+'/GPFit_'+str(seed)+'.pdf')

        if args.doSig:
            ###  Plot signal stuff
            canv2 = TCanvas('c2','c2')
            canv2.cd()
            sighist.SetMarkerColor(kBlack)
            sighist.SetMarkerStyle(20)
            bkgSubtracted.GetXaxis().SetRangeUser(105,158)
            bkgSubtracted.Draw('hist')
            sighist.Draw('samep')
            #canv2.Write()
            canv2.Print(args.tag+'/SigYield_root.pdf')

            canv3 = TCanvas('c3', 'c3')
            canv3.cd()
            ratio = sighist.Clone('ratio')
            ratio.Divide(bkgSubtracted)
            ratio.GetYaxis().SetRangeUser(-5,5)
            ratio.Draw()
            #canv3.Write()
            canv3.Print(args.tag+'/SigYield_Ratio_root.pdf')

        """
        dscb_func = TF1("dscb", DSCB, 105, 160, 7)
        dscb_func.SetParameters(1  # Normalization
                            ,mass  # mu
                            ,1.475   # alpha_low
                            ,1.902   # alpha_high
                            ,12.1   # n_low
                            ,11.6   # n_high
                            ,1.86  ) # sigma

        #dscb_func.FixParameter(0,1)  #Normalization Dont want to fix this
        dscb_func.FixParameter(1,mass) #Mass Fixed to middle of window
        dscb_func.FixParameter(2, 1.475) #alpha_low
        dscb_func.FixParameter(3, 1.902) #alpha_high
        dscb_func.FixParameter(4, 12.1) #n_low
        dscb_func.FixParameter(5, 11.6) #n_high
        dscb_func.FixParameter(6, 1.68) # sigma
        bkgSubtracted.Fit("dscb","","", winLow, winHigh)
        fitResult = bkgSubtracted.GetFunction("dscb")
        norm = fitResult.GetParameter(0)
        ss = fitResult.Integral(winLow,winHigh)
        """

        #canv.cd()
        #bkgSubtracted.GetXaxis().SetRangeUser(120,130)
        #bkgSubtracted.Draw()
        #fitResult.Draw('same')
        #canv.Print(args.tag+'/fitResult.pdf')
        #print fitResult.Integral(120,130)


        """
        canv4 = TCanvas('c4','c4')
        gp_pred_full = GPh.getHisto(y_pred_full, sigma_full, 'GP Fit full')
        gp_pred_full.Divide(outhist)
        gp_pred_full.GetYaxis().SetRangeUser(0.95,1.05)
        gp_pred_full.Draw()
        canv4.Print(args.tag+'/Full_window_ratio.pdf')
        """
        f.Close()
Example #58
0
from matplotlib.colors import LogNorm

from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF, WhiteKernel

rng = np.random.RandomState(0)
X = rng.uniform(0, 5, 20)[:, np.newaxis]
y = 0.5 * np.sin(3 * X[:, 0]) + rng.normal(0, 0.5, X.shape[0])

# First run
plt.figure(0)
kernel = 1.0 * RBF(length_scale=100.0, length_scale_bounds=(1e-2, 1e3)) \
    + WhiteKernel(noise_level=1, noise_level_bounds=(1e-10, 1e+1))
gp = GaussianProcessRegressor(kernel=kernel, alpha=0.0).fit(X, y)
X_ = np.linspace(0, 5, 100)
y_mean, y_cov = gp.predict(X_[:, np.newaxis], return_cov=True)
plt.plot(X_, y_mean, 'k', lw=3, zorder=9)
plt.fill_between(X_,
                 y_mean - np.sqrt(np.diag(y_cov)),
                 y_mean + np.sqrt(np.diag(y_cov)),
                 alpha=0.5,
                 color='k')
plt.plot(X_, 0.5 * np.sin(3 * X_), 'r', lw=3, zorder=9)
plt.scatter(X[:, 0], y, c='r', s=50, zorder=10, edgecolors=(0, 0, 0))
plt.title("Initial: %s\nOptimum: %s\nLog-Marginal-Likelihood: %s" %
          (kernel, gp.kernel_, gp.log_marginal_likelihood(gp.kernel_.theta)))
plt.tight_layout()

# Second run
plt.figure(1)
kernel = 1.0 * RBF(length_scale=1.0, length_scale_bounds=(1e-2, 1e3)) \
def bo(X, y):

    data = list(zip(X, y))

    x = np.atleast_2d(np.linspace(0, 10, 1024)).T
    x_= np.atleast_2d(np.linspace(0, 10, 1024)).T


    kernel = kernels.Matern() + kernels.WhiteKernel()

    gp = GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=16, )#normalize_y=True)

    gp.fit(X, y)
    # FIXME is it possible for mu(x) < min{x \in observed_x}?
    # is this due to that GaussainProcess's prior states that mu(x) = 0?
    # will this effect the performance of GO, since everything not observed will automatically give an additional boost since the prior plays a bigger role (look it up) [we know that the loss we in the end are optimizing is \in [0, 1]
    y_pred, sigma = gp.predict(x, return_std=True)


    #http://www.scipy-lectures.org/advanced/mathematical_optimization/

    # x_min = fmin(negate(silly_f), 5)  # TODO better maximizer
    # Strong points: it is robust to noise, as it does not rely on computing gradients. Thus it can work on functions that are not locally smooth such as experimental data points, as long as they display a large-scale bell-shape behavior. However it is slower than gradient-based methods on smooth, non-noisy functions.


    #opt_result = minimize(negate(silly_f), 5, bounds=[(0, 10)])  # TODO better maximizer
    #print(opt_result)
    #assert(opt_result.success)


    #x_min = opt_result.x


    # x_min = brent(negate(silly_f), brack=(0, 10))  # NOTE 1D only, NOTE not guaranteed to be within range brack=(0, 10) (see documentation)

    # TODO getting the gradient the gaussian would unlock all gradient based optimization methods!! (including L_BFGS)


    a = a_EI(gp, x_obs=X, y_obs=y, theta=0.01)
    a_x = np.apply_along_axis(a, 1, x)

    (x_min_,) = max(x, key=a)

    # TODO have a reasonable optimization (this doesn't scale well)
    #(x_min_,) = brute(
    #    negate(a),
    #    ranges=((0, 10),),
    #    Ns=64,
    #    finish=fmin,
    #)
    # FIXME brute can return numbers outside of the range! X = np.linspace(0, 10, 32), Ns=64, ranges=((0, 10)  (x_min_ = 10.22...)
    # I think it occurs when the function is pretty flat (but not constant)
    # TODO verify that finish function gets the same range as brute and don't wonder off (perhaps this is intended behaviour?)
    # TODO check https://github.com/scipy/scipy/blob/master/scipy/optimize/optimize.py#L2614 to see if it's possible for x_min to end up outside of the range (and if then when)

    print(x_min_)


    #plot_2d(x=x, x_=x_, y_pred=y_pred, sigma = sigma, a_x=a_x)
    #plot(x=x, y_pred=y_pred, x_obs=X, y_obs=y, x_min_=x_min_, sigma=sigma, a_x=a_x)
    #plt.show()

    # evaluate
    fx_min_ = f(x_min_)
    bo(
        X=np.vstack(
            (X,[x_min_,])
        ),
        y=np.hstack(
            (y,[fx_min_,])
        ),
    )
regressor = GaussianProcessRegressor(copy_X_train=False, alpha=0.01778279410038923,
        kernel=kernels.RationalQuadratic(alpha=1, length_scale=1), n_restarts_optimizer=4, normalize_y=False)
"""
regressor = GaussianProcessRegressor(copy_X_train=False)
parameters = {'kernel':(kernels.RationalQuadratic(),  kernels.RBF(), kernels.WhiteKernel()),
    'alpha': np.logspace(-10, 1, 5),
    'n_restarts_optimizer': range(1,5),
    'normalize_y': [True, False]}
clf = GridSearchCV(regressor, parameters, scoring=rmsle_scorer, verbose=10)
X_train, y_train = resample(X, y, n_samples=500)
clf.fit(X_train, y_train)
print("best_estimator_:", clf.best_estimator_)
print("best_score_:", clf.best_score_)
print("best_params_:", clf.best_params_)
print("best_score_:", clf.best_score_)

regressor.set_params(**clf.best_params_)
"""
X_train, y_train = resample(X, y, n_samples=5000)
regressor.fit(X_train, y_train)

print("Training done, testing...")
# Since we can't load the whole dataset, do batch testing
batch_size = 5000
X_test, y_test = resample(X, y, n_samples=100000)
y_pred = np.ndarray((0,))
for i in range(0, X_test.shape[0], batch_size):
    y_pred = np.hstack((y_pred, regressor.predict(X_test[i: i + batch_size])))
print("RMSLE =", root_mean_squared_log_error(y_test, y_pred)) # Last result: 0.469685