def init_with_data(self, init_X, init_Y):
        """      
        Input parameters
        ----------
        gp_params:            Gaussian Process structure      
        x,y:        # init data observations (in original scale)
        """

        # Turn it into np array and store.
        self.X_original = np.asarray(init_X)
        temp_init_point = np.divide((init_X - self.bounds[:, 0]),
                                    self.max_min_gap)

        self.X_original = np.asarray(init_X)
        self.X = np.asarray(temp_init_point)

        self.Y_original = np.asarray(init_Y)
        self.Y = (self.Y_original - np.mean(self.Y_original)) / (
            np.max(self.Y_original) - np.min(self.Y_original))

        self.NumPoints = np.append(self.NumPoints, len(init_Y))

        # Set acquisition function
        self.acq_func = AcquisitionFunction(self.acq)

        # Find unique rows of X to avoid GP from breaking
        ur = unique_rows(self.X)
        self.gp.fit(self.X[ur], self.Y[ur])
예제 #2
0
def test_acquisition_functions(acquisition_function: acquisition_functions.AcquisitionFunction):
    batch_size = 13

    test_loader = torch.utils.data.DataLoader(
        datasets.MNIST(
            "../data",
            train=False,
            transform=transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))]),
        ),
        batch_size=batch_size,
        shuffle=False,
    )

    bayesian_net = mnist_model.BayesianNet()

    estimator = acquisition_function.create(bayesian_net, k=1)
    estimator.eval()

    scores = torch.tensor([])

    num_iters = 5
    for data, _ in itertools.islice(test_loader, num_iters):
        output = estimator(data)
        scores = torch.cat((scores, output), dim=0)

    assert scores.shape == (batch_size * num_iters,)
예제 #3
0
def test_check_input_permutation(af_type: acquisition_functions.AcquisitionFunction):
    if af_type == acquisition_functions.AcquisitionFunction.random:
        return

    batch_size = 12

    test_data = torch.rand((batch_size, 10))

    mixture_a = test_data[::2, :]
    mixture_b = test_data[1::2, :]
    mixture_c = test_data

    class Forwarder(torch.nn.Module):
        def forward(self, batch):
            return batch

    forwarder = Forwarder()
    estimator = af_type.create(forwarder, k=1)
    estimator.eval()

    output_a = estimator(mixture_a)
    output_b = estimator(mixture_b)
    output_c = estimator(mixture_c)

    torch.testing.assert_allclose(
        torch.cat([output_a, output_b], dim=0), torch.cat([output_c[::2], output_c[1::2]], dim=0)
    )
예제 #4
0
    def expandBoundsDDB_FB(self):
        """
        Description: Expands the search space with the full Bayesian 
        implementation of our DDB method

        """
        print('Attempting to expand search space with DDB-FB method')
        alpha=self.alpha
        beta=self.beta
        bound_samples=100    # Number of radius sample to fit the log-logistic distribution
        # Find y^+ and x^+
        ymax=np.max(self.Y)
        # Generate test radii
        max_loc=np.argmax(self.Y)
        xmax=self.X[max_loc]
        test_bound=np.zeros(self.scalebounds.shape)
        bound_dist=np.zeros(bound_samples)
        bound_center=xmax
        test_bound[:,1]=bound_center+0.5
        test_bound[:,0]=bound_center-0.5
        max_radius=np.max(np.array([np.max(max_bound_size-test_bound[:,1]),np.max(test_bound[:,0])]))
        step=max_radius/bound_samples
        packing_number=np.zeros(bound_samples)
        # Generate a Thompson sample maxima to estimate internal maxima
        TS=AcquisitionFunction.ThompsonSampling(self.gp)
        tsb_x,tsb_y=acq_max_global(TS, self.gp, bounds=self.scalebounds)
        # Generate Gumbel samples to estimate the external maxima
        for i in range(0,bound_samples):
            bound_length=test_bound[:,1]-test_bound[:,0]
            volume=np.power(max_bound_size,self.dim)-np.prod(bound_length)
            packing_number[i]=round(volume/(5*self.gp.lengthscale))
            mu=stats.norm.ppf(1.0-1.0/packing_number[i])
            sigma=stats.norm.ppf(1.0-(1.0/packing_number[i])*np.exp(-1.0))-stats.norm.ppf(1.0-(1.0/(packing_number[i])))
            bound_dist[i]=np.exp(-np.exp(-(-tsb_y-mu)/sigma))
            test_bound[:,1]=test_bound[:,1]+step
            test_bound[:,0]=test_bound[:,0]-step
        bound_dist[np.isnan(bound_dist)]=1
        # Fit the log-logistic paramaters to the Gumbel samples
        xfit=np.arange(0,max_radius,max_radius/100)
        popt,pcov=optimize.curve_fit(self.sufficientBoundPDF,xfit[0:100],bound_dist,bounds=np.array([[5,1.1],[20,5]]))
        print("popt={}".format(popt))
        b=ymax/popt[0]
        a=popt[1]
        print("b={}, ymax={}".format(b,ymax))
        # Sample for the optimal radius
        for d in range(0,self.dim):
            gamma=np.random.gamma(shape=alpha,scale=1/beta,size=100)
            loglog=stats.fisk.pdf(gamma,ymax/b,scale=a)
            scaled_weights=loglog/np.sum(loglog)
            multi=np.random.multinomial(1,scaled_weights)
            r_index=np.argmax(multi)
            print("Radius of {} selected".format(gamma[r_index]))
            self.scalebounds[d,1]=xmax[d]+gamma[r_index]
            self.scalebounds[d,0]=xmax[d]-gamma[r_index]

        print("seach space extended to {} with DDB".format(self.scalebounds))
    def maximize_expanding_volume_L(self, gp_params):
        """
        Expanding volume following L ~ MaxIter

        Input parameters
        ----------

        gp_params: parameter for Gaussian Process

        Returns
        -------
        x: recommented point for evaluation
        """

        if self.acq['name'] == 'random':
            x_max = [
                np.random.uniform(x[0], x[1], size=1) for x in self.bounds
            ]
            x_max = np.asarray(x_max)
            x_max = x_max.T
            self.X_original = np.vstack((self.X_original, x_max))
            # evaluate Y using original X
            self.Y_original = np.append(self.Y_original, self.f(x_max))

            # update Y after change Y_original
            self.Y = (self.Y_original - np.mean(self.Y_original)) / (
                np.max(self.Y_original) - np.min(self.Y_original))

            self.time_opt = np.hstack((self.time_opt, 0))
            return

        # init a new Gaussian Process
        self.gp = PradaGaussianProcess(gp_params)

        # scale the data before updating the GP
        # convert it to scaleX
        self.max_min_gap = self.bounds[:, 1] - self.bounds[:, 0]
        temp = np.divide((self.X_original - self.bounds[:, 0]),
                         self.max_min_gap)
        self.X = np.asarray(temp)

        ur = unique_rows(self.X)
        self.gp.fit(self.X[ur], self.Y[ur])

        # Set acquisition function
        start_opt = time.time()

        y_max = self.Y.max()

        #self.L=self.estimate_L(self.scalebounds)
        # select the acquisition function

        self.acq_func = AcquisitionFunction(self.acq)

        # consider the expansion step

        # backup the previous bounds
        self.bounds_bk = self.bounds.copy()
        self.scalebounds_bk = self.scalebounds.copy()

        # the region considered is computed as follows: NewVol~OldVol*T/t
        # alternatively, we compute the radius NewL~Oldl*pow(T/t,1/d)
        new_radius = self.l_radius * np.power(
            self.MaxIter / len(self.Y_original), 1.0 / self.dim)
        # extra proportion
        extra_proportion = new_radius * 1.0 / self.l_radius

        #extra_radius=(new_radius-self.l_radius)/2

        if extra_proportion < 1:
            extra_proportion = 1

        max_bounds = self.bounds.copy()

        # expand half to the lower bound and half to the upper bound
        max_bounds[:, 0] = max_bounds[:, 0] - self.max_min_gap * (
            extra_proportion - 1) * 0.5
        max_bounds[:, 1] = max_bounds[:, 1] + self.max_min_gap * (
            extra_proportion - 1) * 0.5

        # make sure it is within the limit
        if not (self.b_limit_lower is None):
            temp_max_bounds_lower = [
                np.maximum(max_bounds[idx, 0], self.b_limit_lower[idx])
                for idx in xrange(self.dim)
            ]
            max_bounds[:, 0] = temp_max_bounds_lower

        if not (self.b_limit_upper is None):
            temp_max_bounds_upper = [
                np.minimum(max_bounds[idx, 1], self.b_limit_upper[idx])
                for idx in xrange(self.dim)
            ]
            max_bounds[:, 1] = temp_max_bounds_upper

        temp = [(max_bounds[d, :] - self.bounds_bk[d, 0]) * 1.0 /
                self.max_min_gap[d] for d in xrange(self.dim)]
        self.scalebounds = np.asarray(temp)

        # perform standard BO on the new bound (scaled)
        x_max_scale = acq_max(ac=self.acq_func.acq_kind,
                              gp=self.gp,
                              y_max=y_max,
                              bounds=self.scalebounds,
                              opt_toolbox=self.opt_toolbox)

        #val_acq=self.acq_func.acq_kind(x_max_scale,self.gp,y_max)
        #print "alpha[x_max]={:.5f}".format(np.ravel(val_acq)[0])

        # record the optimization time
        finished_opt = time.time()
        elapse_opt = finished_opt - start_opt
        self.time_opt = np.hstack((self.time_opt, elapse_opt))

        # Test if x_max is repeated, if it is, draw another one at random
        if np.any((self.X - x_max_scale).sum(axis=1) == 0):
            x_max_scale = np.random.uniform(self.scalebounds[:, 0],
                                            self.scalebounds[:, 1],
                                            size=self.scalebounds.shape[0])

        # check if the estimated data point is in the old bound or new for cropping
        IsCropping = 0
        if IsCropping == 1:
            flagOutside = 0
            for d in xrange(self.dim):
                if x_max_scale[d] > self.scalebounds_bk[d, 1] or x_max_scale[
                        d] < self.scalebounds_bk[d, 0]:  #outside the old bound
                    flagOutside = 1
                    self.scalebounds[d,
                                     0] = np.minimum(x_max_scale[d],
                                                     self.scalebounds_bk[d, 0])
                    self.scalebounds[d,
                                     1] = np.maximum(x_max_scale[d],
                                                     self.scalebounds_bk[d, 1])

                    # now the scalebounds is no longer 0-1

            if flagOutside == 0:  # not outside the old bound
                self.scalebounds = self.scalebounds_bk
                self.bounds = self.bounds_bk.copy()
            else:  # inside the old bound => recompute bound
                temp = [
                    self.scalebounds[d, :] * self.max_min_gap[d] +
                    self.bounds_bk[d, 0] for d in xrange(self.dim)
                ]
                if self.dim > 1:
                    self.bounds = np.reshape(temp, (self.dim, 2))
                else:
                    self.bounds = np.array(temp)
        else:
            temp = [
                self.scalebounds[d, :] * self.max_min_gap[d] +
                self.bounds_bk[d, 0] for d in xrange(self.dim)
            ]
            if self.dim > 1:
                self.bounds = np.reshape(temp, (self.dim, 2))
            else:
                self.bounds = np.array(temp)

        # compute X in original scale
        temp_X_new_original = x_max_scale * self.max_min_gap + self.bounds_bk[:,
                                                                              0]
        self.X_original = np.vstack((self.X_original, temp_X_new_original))

        # clone the self.X for updating GP
        self.max_min_gap = self.bounds[:, 1] - self.bounds[:, 0]
        temp = np.divide((self.X_original - self.bounds[:, 0]),
                         self.max_min_gap)
        self.X = np.asarray(temp)

        scalebounds = np.array([np.zeros(self.dim), np.ones(self.dim)])
        self.scalebounds = scalebounds.T
        # evaluate Y using original X

        self.Y_original = np.append(self.Y_original,
                                    self.f(temp_X_new_original))

        # update Y after change Y_original
        self.Y = (self.Y_original - np.mean(self.Y_original)) / (
            np.max(self.Y_original) - np.min(self.Y_original))

        # for plotting
        self.gp = PradaGaussianProcess(gp_params)
        ur = unique_rows(self.X)
        self.gp.fit(self.X[ur], self.Y[ur])

        # update volume and radius
        #self.vol=prod(self.max_min_gap)
        #self.l_radius=np.power(self.vol,1/self.dim)
        self.l_radius = np.exp(1.0 * np.sum(np.log(self.max_min_gap)) /
                               self.dim)
    def maximize_volume_doubling(self, gp_params):
        """
        Volume Doubling, double the volume (e.g., gamma=2) after every 3d evaluations

        Input parameters
        ----------
        gp_params: parameter for Gaussian Process
        Returns
        -------
        x: recommented point for evaluation
        """

        if self.acq['name'] == 'random':
            x_max = [
                np.random.uniform(x[0], x[1], size=1) for x in self.bounds
            ]
            x_max = np.asarray(x_max)
            x_max = x_max.T
            self.X_original = np.vstack((self.X_original, x_max))
            # evaluate Y using original X
            self.Y_original = np.append(self.Y_original, self.f(x_max))

            # update Y after change Y_original
            self.Y = (self.Y_original - np.mean(self.Y_original)) / (
                np.max(self.Y_original) - np.min(self.Y_original))

            self.time_opt = np.hstack((self.time_opt, 0))
            return

        # init a new Gaussian Process
        self.gp = PradaGaussianProcess(gp_params)

        # scale the data before updating the GP
        # convert it to scaleX
        self.max_min_gap = self.bounds[:, 1] - self.bounds[:, 0]
        temp = np.divide((self.X_original - self.bounds[:, 0]),
                         self.max_min_gap)
        self.X = np.asarray(temp)

        ur = unique_rows(self.X)
        self.gp.fit(self.X[ur], self.Y[ur])

        # Find unique rows of X to avoid GP from breaking

        # Set acquisition function
        start_opt = time.time()

        y_max = self.Y.max()

        # select the acquisition function
        self.acq_func = AcquisitionFunction(self.acq)

        self.scalebounds_bk = self.scalebounds.copy()
        self.bounds_bk = self.bounds

        # consider the expansion step after 3 iterations

        if (len(self.Y) % 3) == 0:
            new_radius = 2.0 * self.l_radius
            extra_radius = (new_radius - self.l_radius) / 2

            max_bounds = self.bounds.copy()
            max_bounds[:, 0] = max_bounds[:, 0] - extra_radius
            max_bounds[:, 1] = max_bounds[:, 1] + extra_radius

            # make sure it is within the limit
            if not (self.b_limit_lower is None):
                temp_max_bounds_lower = [
                    np.maximum(max_bounds[idx, 0], self.b_limit_lower[idx])
                    for idx in xrange(self.dim)
                ]
                max_bounds[:, 0] = temp_max_bounds_lower

            if not (self.b_limit_upper is None):
                temp_max_bounds_upper = [
                    np.minimum(max_bounds[idx, 1], self.b_limit_upper[idx])
                    for idx in xrange(self.dim)
                ]
                max_bounds[:, 1] = temp_max_bounds_upper

            self.bounds = np.asarray(max_bounds).copy()

            temp = [(max_bounds[d, :] - self.bounds_bk[d, 0]) * 1.0 /
                    self.max_min_gap[d] for d in xrange(self.dim)]
            self.scalebounds = np.asarray(temp)

        # perform standard BO on the new bound (scaled)
        x_max_scale = acq_max(ac=self.acq_func.acq_kind,
                              gp=self.gp,
                              y_max=y_max,
                              bounds=self.scalebounds,
                              opt_toolbox=self.opt_toolbox)

        #val_acq=self.acq_func.acq_kind(x_max_scale,self.gp,y_max)
        #print "alpha[x_max]={:.5f}".format(np.ravel(val_acq)[0])

        # record the optimization time
        finished_opt = time.time()
        elapse_opt = finished_opt - start_opt
        self.time_opt = np.hstack((self.time_opt, elapse_opt))

        # Test if x_max is repeated, if it is, draw another one at random
        if np.any((self.X - x_max_scale).sum(axis=1) == 0):
            x_max_scale = np.random.uniform(self.scalebounds[:, 0],
                                            self.scalebounds[:, 1],
                                            size=self.scalebounds.shape[0])

        # compute X in original scale
        temp_X_new_original = x_max_scale * self.max_min_gap + self.bounds_bk[:,
                                                                              0]
        self.X_original = np.vstack((self.X_original, temp_X_new_original))

        # clone the self.X for updating GP
        self.max_min_gap = self.bounds[:, 1] - self.bounds[:, 0]
        temp = np.divide((self.X_original - self.bounds[:, 0]),
                         self.max_min_gap)
        self.X = np.asarray(temp)

        scalebounds = np.array([np.zeros(self.dim), np.ones(self.dim)])
        self.scalebounds = scalebounds.T
        # evaluate Y using original X

        self.Y_original = np.append(self.Y_original,
                                    self.f(temp_X_new_original))

        # update Y after change Y_original
        self.Y = (self.Y_original - np.mean(self.Y_original)) / (
            np.max(self.Y_original) - np.min(self.Y_original))

        # for plotting
        self.gp = PradaGaussianProcess(gp_params)
        ur = unique_rows(self.X)

        try:
            self.gp.fit(self.X[ur], self.Y[ur])
        except:
            print "bug"

        # update volume and radius
        #self.vol=prod(self.max_min_gap)
        #self.l_radius=np.power(self.vol,1/self.dim)
        self.l_radius = np.exp(1.0 * np.sum(np.log(self.max_min_gap)) /
                               self.dim)
    def maximize_unbounded_regularizer(self, gp_params):
        """
        Unbounded Regularizer AISTAST 2016 Bobak

        Input parameters
        ----------
        gp_params: parameter for Gaussian Process
        Returns
        -------
        x: recommented point for evaluation
        """

        if self.acq['name'] == 'random':
            x_max = [
                np.random.uniform(x[0], x[1], size=1) for x in self.bounds
            ]
            x_max = np.asarray(x_max)
            x_max = x_max.T
            self.X_original = np.vstack((self.X_original, x_max))
            # evaluate Y using original X
            self.Y_original = np.append(self.Y_original, self.f(x_max))

            # update Y after change Y_original
            self.Y = (self.Y_original - np.mean(self.Y_original)) / (
                np.max(self.Y_original) - np.min(self.Y_original))

            self.time_opt = np.hstack((self.time_opt, 0))
            return

        # init a new Gaussian Process
        self.gp = PradaGaussianProcess(gp_params)

        # scale the data before updating the GP
        self.max_min_gap = self.bounds[:, 1] - self.bounds[:, 0]
        temp = np.divide((self.X_original - self.bounds[:, 0]),
                         self.max_min_gap)
        self.X = np.asarray(temp)

        ur = unique_rows(self.X)
        self.gp.fit(self.X[ur], self.Y[ur])

        # Find unique rows of X to avoid GP from breaking

        # Set acquisition function
        start_opt = time.time()

        y_max = self.Y.max()

        self.scalebounds_bk = self.scalebounds.copy()
        self.bounds_bk = self.bounds
        # consider the expansion step after 3 iterations

        if (len(self.Y) % 3) == 0:
            new_radius = 2.0 * self.l_radius
            extra_radius = (new_radius - self.l_radius) / 2

            max_bounds = self.bounds.copy()
            max_bounds[:, 0] = max_bounds[:, 0] - extra_radius
            max_bounds[:, 1] = max_bounds[:, 1] + extra_radius

            # make sure it is within the limit
            if not (self.b_limit_lower is None):
                temp_max_bounds_lower = [
                    np.maximum(max_bounds[idx, 0], self.b_limit_lower[idx])
                    for idx in xrange(self.dim)
                ]
                max_bounds[:, 0] = temp_max_bounds_lower

            if not (self.b_limit_upper is None):
                temp_max_bounds_upper = [
                    np.minimum(max_bounds[idx, 1], self.b_limit_upper[idx])
                    for idx in xrange(self.dim)
                ]
                max_bounds[:, 1] = temp_max_bounds_upper

            self.bounds = np.asarray(max_bounds)

            temp = [(max_bounds[d, :] - self.bounds[d, 0]) * 1.0 /
                    self.max_min_gap[d] for d in xrange(self.dim)]
            self.scalebounds = np.asarray(temp)

        # select the acquisition function
        self.acq['x_bar'] = np.mean(self.bounds)
        self.acq['R'] = np.power(self.l_radius, 1.0 / self.dim)

        self.acq_func = AcquisitionFunction(self.acq)

        # mean of the domain

        #acq['R']

        x_max_scale = acq_max(ac=self.acq_func.acq_kind,
                              gp=self.gp,
                              y_max=y_max,
                              bounds=self.scalebounds,
                              opt_toolbox=self.opt_toolbox)

        #val_acq=self.acq_func.acq_kind(x_max_scale,self.gp,y_max)
        #print "alpha[x_max]={:.5f}".format(np.ravel(val_acq)[0])

        # record the optimization time
        finished_opt = time.time()
        elapse_opt = finished_opt - start_opt
        self.time_opt = np.hstack((self.time_opt, elapse_opt))

        # Test if x_max is repeated, if it is, draw another one at random
        if np.any((self.X - x_max_scale).sum(axis=1) == 0):
            x_max_scale = np.random.uniform(self.scalebounds[:, 0],
                                            self.scalebounds[:, 1],
                                            size=self.scalebounds.shape[0])

        # check if the estimated data point is in the old bound or new
        flagOutside = 0
        for d in xrange(self.dim):
            if x_max_scale[d] > self.scalebounds_bk[d, 1] or x_max_scale[
                    d] < self.scalebounds_bk[d, 0]:  #outside the old bound
                flagOutside = 1
                self.scalebounds[d, 0] = np.minimum(x_max_scale[d],
                                                    self.scalebounds_bk[d, 0])
                self.scalebounds[d, 1] = np.maximum(x_max_scale[d],
                                                    self.scalebounds_bk[d, 1])

                # now the scalebounds is no longer 0-1

        if flagOutside == 0:  # not outside the old bound
            self.scalebounds = self.scalebounds_bk
        else:  # inside the old bound => recompute bound
            temp = [
                self.scalebounds[d, :] * self.max_min_gap[d] +
                self.bounds_bk[d, 0] for d in xrange(self.dim)
            ]
            if self.dim > 1:
                self.bounds = np.reshape(temp, (self.dim, 2))
            else:
                self.bounds = np.array(temp)

        # compute X in original scale
        temp_X_new_original = x_max_scale * self.max_min_gap + self.bounds_bk[:,
                                                                              0]
        self.X_original = np.vstack((self.X_original, temp_X_new_original))

        # clone the self.X for updating GP
        self.max_min_gap = self.bounds[:, 1] - self.bounds[:, 0]
        temp = np.divide((self.X_original - self.bounds[:, 0]),
                         self.max_min_gap)
        self.X = np.asarray(temp)

        scalebounds = np.array([np.zeros(self.dim), np.ones(self.dim)])
        self.scalebounds = scalebounds.T
        # evaluate Y using original X

        self.Y_original = np.append(self.Y_original,
                                    self.f(temp_X_new_original))

        # update Y after change Y_original
        self.Y = (self.Y_original - np.mean(self.Y_original)) / (
            np.max(self.Y_original) - np.min(self.Y_original))

        # for plotting
        self.gp = PradaGaussianProcess(gp_params)
        ur = unique_rows(self.X)
        self.gp.fit(self.X[ur], self.Y[ur])

        # update volume and radius
        #self.vol=prod(self.max_min_gap)
        #self.l_radius=np.power(self.vol,1/self.dim)
        self.l_radius = np.exp(1.0 * np.sum(np.log(self.max_min_gap)) /
                               self.dim)
class PradaBayOptFBO(object):
    def __init__(self,
                 gp_params,
                 f,
                 b_init_lower,
                 b_init_upper,
                 b_limit_lower,
                 b_limit_upper,
                 acq,
                 verbose=1,
                 opt_toolbox='nlopt'):
        """      
        Input parameters
        ----------
        f:              function to optimize:        
        pbounds0:       bounds on parameters predefined
                
        acq:            acquisition function, acq['name']=['ei','ucb','poi','lei']
                            ,acq['kappa'] for ucb, acq['k'] for lei
        opt:            optimization toolbox, 'nlopt','direct','scipy'
        
        Returns
        -------
        dim:            dimension
        bounds0:        initial bounds on original scale
        bounds_limit:   limit bounds on original scale
        bounds:         bounds on parameters (current)
        bounds_list:    bounds at all iterations
        bounds_bk:      bounds backup for computational purpose
        scalebounds:    bounds on normalized scale of 0-1 # be careful with scaling
        scalebounds_bk: bounds on normalized scale of 0-1 backup for computation
        time_opt:       will record the time spent on optimization
        gp:             Gaussian Process object
        
        MaxIter:        Maximum number of iterations
        """

        # Find number of parameters
        self.dim = len(b_init_lower)

        self.b_init_lower = b_init_lower
        self.b_init_upper = b_init_upper

        self.bounds0 = np.asarray([b_init_lower, b_init_upper]).T

        self.bounds = self.bounds0.copy()
        self.bounds_list = self.bounds0.copy()
        self.bounds_bk = self.bounds.copy()  # keep track

        # create a scalebounds 0-1
        scalebounds = np.array([np.zeros(self.dim), np.ones(self.dim)])
        self.scalebounds = scalebounds.T

        self.max_min_gap = self.bounds[:, 1] - self.bounds[:, 0]
        self.max_min_gap_bk = self.max_min_gap.copy()

        # Some function to be optimized
        self.f = f
        # optimization toolbox
        self.opt_toolbox = opt_toolbox
        # acquisition function type

        self.acq = acq

        # store X in original scale
        self.X_original = None

        # store X in 0-1 scale
        self.X = None

        # store y=f(x)
        # (y - mean)/(max-min)
        self.Y = None

        # y original scale
        self.Y_original = None

        self.time_opt = 0

        self.k_Neighbor = 2

        # Lipschitz constant
        self.L = 0

        # Gaussian Process class
        self.gp = PradaGaussianProcess(gp_params)

        # acquisition function
        self.acq_func = None

        # stop condition
        self.stop_flag = 0

        # volume of initial box

        # compute in log space

        #self.vol0=prod(self.max_min_gap)
        self.l_radius0 = np.exp(1.0 * np.sum(np.log(self.max_min_gap)) /
                                self.dim)

        self.l_radius = self.l_radius0

        self.MaxIter = gp_params['MaxIter']

        self.b_limit_lower = b_limit_lower
        self.b_limit_upper = b_limit_upper

        # visualization purpose
        self.X_invasion = []

    # will be later used for visualization
    def posterior(self, Xnew):
        self.gp.fit(self.X, self.Y)
        mu, sigma2 = self.gp.predict(Xnew, eval_MSE=True)
        return mu, np.sqrt(sigma2)

    def init(self, gp_params, n_init_points=3):
        """      
        Input parameters
        ----------
        gp_params:            Gaussian Process structure      
        n_init_points:        # init points
        """

        # Generate random points
        l = [
            np.random.uniform(x[0], x[1], size=n_init_points)
            for x in self.bounds
        ]

        # Concatenate new random points to possible existing
        # points from self.explore method.
        temp = np.asarray(l)
        temp = temp.T
        init_X = list(temp.reshape((n_init_points, -1)))

        self.X_original = np.asarray(init_X)

        # Evaluate target function at all initialization
        y_init = self.f(init_X)
        y_init = np.reshape(y_init, (n_init_points, 1))

        self.Y_original = np.asarray(y_init)
        self.Y = (self.Y_original - np.mean(self.Y_original)) / (
            np.max(self.Y_original) - np.min(self.Y_original))

        self.X = self.X_original.copy()

    def max_volume(self, gp, max_bounds_scale, max_lcb):
        """
        A function to find the a data point that maximums a searching volume
    
        Input Parameters
        ----------
        ac: The acquisition function object that return its point-wise value.
        gp: A gaussian process fitted to the relevant data.
        y_max: The current maximum known value of the target function.
        bounds: The variables bounds to limit the search of the acq max.
        
        Returns
        -------
        x_max, The arg max of the acquisition function.
        """
        def compute_utility_score_for_maximizing_volume_wrapper(
                x_tries, gp, dim, max_lcb):
            if len(x_tries.shape) == 1:
                return compute_utility_score_for_maximizing_volume(
                    x_tries, gp, dim, max_lcb)
            return np.apply_along_axis(
                compute_utility_score_for_maximizing_volume, 1, x_tries, gp,
                dim, max_lcb)

        def compute_utility_score_for_maximizing_volume(
                x_tries, gp, dim, max_lcb):
            new_bounds = self.scalebounds

            kappa = 2
            mean, var = gp.predict(x_tries, eval_MSE=True)

            var.flags['WRITEABLE'] = True
            #var=var.copy()
            var[var < 1e-10] = 0

            myucb = mean + kappa * np.sqrt(var)
            myucb = np.ravel(myucb)

            if np.asscalar(myucb) < np.asscalar(max_lcb):
                return myucb

            # store the points (outside the previous bound) that sastify the constraint   (original scale)

            # convert to original scale before adding to
            x_tries_original = x_tries * self.max_min_gap + self.bounds_bk[:,
                                                                           0]

            # check if it is outside the old bound
            flagOutside = 0
            for d in xrange(self.dim):
                if x_tries[d] > self.scalebounds_bk[d, 1] or x_tries[
                        d] < self.scalebounds_bk[d, 0]:  #outside the old bound
                    flagOutside = 1
                    break

            if flagOutside == 1:  # append to the invasion set
                if len(self.X_invasion) == 0:
                    self.X_invasion = x_tries_original
                    self.Y_invasion = myucb

                else:
                    self.X_invasion = np.vstack(
                        (self.X_invasion, x_tries_original))
                    self.Y_invasion = np.vstack((self.Y_invasion, myucb))

            # expanse the bound
            for d in xrange(dim):
                # expand lower bound
                if x_tries[d] < new_bounds[d, 0]:
                    new_bounds[d, 0] = x_tries[d]

                if x_tries[d] > new_bounds[d, 1]:
                    new_bounds[d, 1] = x_tries[d]

            self.scalebounds = new_bounds
            # update the utility score
            return myucb

        dim = max_bounds_scale.shape[0]
        # Start with the lower bound as the argmax
        #x_max = max_bounds[:, 0]
        max_acq = None

        myopts = {'maxiter': 1000, 'fatol': 0.001, 'xatol': 0.001}

        # multi start
        for i in xrange(5 * dim):
            # Find the minimum of minus the acquisition function

            x_tries = np.random.uniform(max_bounds_scale[:, 0],
                                        max_bounds_scale[:, 1],
                                        size=(100 * dim, dim))

            # evaluate L(x)
            # estimate new L
            y_tries = compute_utility_score_for_maximizing_volume_wrapper(
                x_tries, gp, dim, max_lcb)

            #find x optimal for init
            idx_max = np.argmax(y_tries)
            x_init_max = x_tries[idx_max]

            res = minimize(
                lambda x: -compute_utility_score_for_maximizing_volume_wrapper(
                    x, gp, dim, max_lcb),
                #x_init_max.reshape(1, -1),bounds=bounds,options=myopts,method="nelder-mead")#L-BFGS-B
                x_init_max.reshape(1, -1),
                bounds=max_bounds_scale,
                options=myopts,
                method="L-BFGS-B")  #L-BFGS-B

            # value at the estimated point
            val = compute_utility_score_for_maximizing_volume(
                res.x, gp, dim, max_lcb)

            # Store it if better than previous minimum(maximum).
            if max_acq is None or val >= max_acq:
                x_max = res.x
                max_acq = val
                #print max_acq

        # Clip output to make sure it lies within the bounds. Due to floating
        # point technicalities this is not always the case.

    def run_FBO(self, gp_params):
        """
        Main optimization method for filtering strategy for BO.

        Input parameters
        ----------

        gp_params: parameter for Gaussian Process

        Returns
        -------
        x: recommented point for evaluation
        """

        # for random approach
        if self.acq['name'] == 'random':
            x_max = [
                np.random.uniform(x[0], x[1], size=1) for x in self.bounds
            ]
            x_max = np.asarray(x_max)
            x_max = x_max.T
            self.X_original = np.vstack((self.X_original, x_max))
            # evaluate Y using original X
            self.Y_original = np.append(self.Y_original, self.f(x_max))

            # update Y after change Y_original
            self.Y = (self.Y_original - np.mean(self.Y_original)) / (
                np.max(self.Y_original) - np.min(self.Y_original))

            self.time_opt = np.hstack((self.time_opt, 0))
            return

        # init a new Gaussian Process
        self.gp = PradaGaussianProcess(gp_params)

        # scale the data before updating the GP
        # convert it to scaleX
        self.max_min_gap = self.bounds[:, 1] - self.bounds[:, 0]
        temp = np.divide((self.X_original - self.bounds[:, 0]),
                         self.max_min_gap)
        self.X = np.asarray(temp)

        ur = unique_rows(self.X)
        self.gp.fit(self.X[ur], self.Y[ur])

        # Set acquisition function
        start_opt = time.time()

        # obtain the maximum on the observed set (for EI)
        y_max = self.Y.max()

        #self.L=self.estimate_L(self.scalebounds)
        # select the acquisition function

        self.acq_func = AcquisitionFunction(self.acq)

        # consider the expansion step

        # finding the maximum over the lower bound
        # mu(x)-kappa x sigma(x)
        mu_acq = {}
        mu_acq['name'] = 'lcb'
        mu_acq['dim'] = self.dim
        mu_acq['kappa'] = 2
        acq_mu = AcquisitionFunction(mu_acq)

        # obtain the argmax(lcb), make sure the scale bound vs original bound
        x_lcb_max = acq_max(ac=acq_mu.acq_kind,
                            gp=self.gp,
                            y_max=y_max,
                            bounds=self.scalebounds,
                            opt_toolbox=self.opt_toolbox)

        # obtain the max(lcb)
        max_lcb = acq_mu.acq_kind(x_lcb_max, gp=self.gp, y_max=y_max)
        max_lcb = np.ravel(max_lcb)

        # finding the region outside the box, that has the ucb > max_lcb
        self.max_min_gap_bk = self.max_min_gap.copy()
        self.bounds_bk = self.bounds.copy()
        self.scalebounds_bk = self.scalebounds.copy()
        self.X_invasion = []

        # the region considered is computed as follows: NewVol~OldVol*T/t
        # alternatively, we compute the radius NewL~Oldl*pow(T/t,1/d)
        new_radius = self.l_radius * np.power(
            self.MaxIter / len(self.Y_original), 1.0 / self.dim)

        # extra proportion
        extra_proportion = new_radius * 1.0 / self.l_radius

        #extra_radius=(new_radius-self.l_radius)/2

        # check if extra radius is negative
        if extra_proportion < 1:
            extra_proportion = 1

        max_bounds = self.bounds.copy()

        # expand half to the lower bound and half to the upper bound, X'_t
        max_bounds[:, 0] = max_bounds[:, 0] - self.max_min_gap * (
            extra_proportion - 1)
        max_bounds[:, 1] = max_bounds[:, 1] + self.max_min_gap * (
            extra_proportion - 1)

        #max_bounds[:,0]=max_bounds[:,0]-extra_radius
        #max_bounds[:,1]=max_bounds[:,1]+extra_radius

        # make sure the max_bounds is within the limit
        if not (self.b_limit_lower is None):
            temp_max_bounds_lower = [
                np.maximum(max_bounds[idx, 0], self.b_limit_lower[idx])
                for idx in xrange(self.dim)
            ]
            max_bounds[:, 0] = temp_max_bounds_lower

        if not (self.b_limit_upper is None):
            temp_max_bounds_upper = [
                np.minimum(max_bounds[idx, 1], self.b_limit_upper[idx])
                for idx in xrange(self.dim)
            ]
            max_bounds[:, 1] = temp_max_bounds_upper

        temp = [
            (max_bounds[d, :] - self.bounds[d, 0]) * 1.0 / self.max_min_gap[d]
            for d in xrange(self.dim)
        ]
        max_bounds_scale = np.asarray(temp)

        # find suitable candidates in new regions
        # ucb(x) > max_lcb st max L(x)

        # new bound in scale space
        # we note that the scalebound will be changed inside this function
        self.max_volume(self.gp, max_bounds_scale, max_lcb)

        #print "new bounds scale"
        #print self.scalebounds

        # perform standard BO on the new bound (scaled)
        x_max_scale = acq_max(ac=self.acq_func.acq_kind,
                              gp=self.gp,
                              y_max=y_max,
                              bounds=self.scalebounds,
                              opt_toolbox=self.opt_toolbox)

        val_acq = self.acq_func.acq_kind(x_max_scale, self.gp, y_max)

        # record the optimization time
        finished_opt = time.time()
        elapse_opt = finished_opt - start_opt
        self.time_opt = np.hstack((self.time_opt, elapse_opt))

        # Test if x_max is repeated, if it is, draw another one at random
        if np.any((self.X - x_max_scale).sum(axis=1) == 0):
            x_max_scale = np.random.uniform(self.scalebounds[:, 0],
                                            self.scalebounds[:, 1],
                                            size=self.scalebounds.shape[0])

        # check if the estimated data point is in the old bound or new
        flagOutside = 0
        for d in xrange(self.dim):
            if x_max_scale[d] > self.scalebounds_bk[d, 1] or x_max_scale[
                    d] < self.scalebounds_bk[d, 0]:  #outside the old bound
                flagOutside = 1
                self.scalebounds[d, 0] = np.minimum(x_max_scale[d],
                                                    self.scalebounds_bk[d, 0])
                self.scalebounds[d, 1] = np.maximum(x_max_scale[d],
                                                    self.scalebounds_bk[d, 1])
            else:
                self.scalebounds[d, :] = self.scalebounds_bk[d, :]

                # now the scalebounds is no longer 0-1

        if flagOutside == 0:  # not outside the old bound, use the old bound
            self.scalebounds = self.scalebounds_bk
            self.bounds = self.bounds_bk.copy()
        else:  # outside the old bound => expand the bound as the minimum bound containing the old bound and the selected point
            temp = [
                self.scalebounds[d, :] * self.max_min_gap[d] +
                self.bounds_bk[d, 0] for d in xrange(self.dim)
            ]
            if self.dim > 1:
                self.bounds = np.reshape(temp, (self.dim, 2))
            else:
                self.bounds = np.array(temp)

        self.bounds_list = np.hstack((self.bounds_list, self.bounds))

        # compute X in original scale
        temp_X_new_original = x_max_scale * self.max_min_gap + self.bounds_bk[:,
                                                                              0]
        self.X_original = np.vstack((self.X_original, temp_X_new_original))

        # clone the self.X for updating GP
        self.max_min_gap = self.bounds[:, 1] - self.bounds[:, 0]
        temp = np.divide((self.X_original - self.bounds[:, 0]),
                         self.max_min_gap)
        self.X = np.asarray(temp)

        scalebounds = np.array([np.zeros(self.dim), np.ones(self.dim)])
        self.scalebounds = scalebounds.T
        # evaluate Y using original X

        self.Y_original = np.append(self.Y_original,
                                    self.f(temp_X_new_original))

        # update Y after change Y_original
        self.Y = (self.Y_original - np.mean(self.Y_original)) / (
            np.max(self.Y_original) - np.min(self.Y_original))

        # for plotting
        self.gp = PradaGaussianProcess(gp_params)
        ur = unique_rows(self.X)
        self.gp.fit(self.X[ur], self.Y[ur])

        # update volume and radius
        #self.vol=prod(self.max_min_gap)
        #self.l_radius=np.power(self.vol,1/self.dim)
        self.l_radius = np.exp(1.0 * np.sum(np.log(self.max_min_gap)) /
                               self.dim)

#======================================================================================
#======================================================================================================
#======================================================================================================
#======================================================================================================

    def maximize_volume_doubling(self, gp_params):
        """
        Volume Doubling, double the volume (e.g., gamma=2) after every 3d evaluations

        Input parameters
        ----------
        gp_params: parameter for Gaussian Process
        Returns
        -------
        x: recommented point for evaluation
        """

        if self.acq['name'] == 'random':
            x_max = [
                np.random.uniform(x[0], x[1], size=1) for x in self.bounds
            ]
            x_max = np.asarray(x_max)
            x_max = x_max.T
            self.X_original = np.vstack((self.X_original, x_max))
            # evaluate Y using original X
            self.Y_original = np.append(self.Y_original, self.f(x_max))

            # update Y after change Y_original
            self.Y = (self.Y_original - np.mean(self.Y_original)) / (
                np.max(self.Y_original) - np.min(self.Y_original))

            self.time_opt = np.hstack((self.time_opt, 0))
            return

        # init a new Gaussian Process
        self.gp = PradaGaussianProcess(gp_params)

        # scale the data before updating the GP
        # convert it to scaleX
        self.max_min_gap = self.bounds[:, 1] - self.bounds[:, 0]
        temp = np.divide((self.X_original - self.bounds[:, 0]),
                         self.max_min_gap)
        self.X = np.asarray(temp)

        ur = unique_rows(self.X)
        self.gp.fit(self.X[ur], self.Y[ur])

        # Find unique rows of X to avoid GP from breaking

        # Set acquisition function
        start_opt = time.time()

        y_max = self.Y.max()

        # select the acquisition function
        self.acq_func = AcquisitionFunction(self.acq)

        self.scalebounds_bk = self.scalebounds.copy()
        self.bounds_bk = self.bounds

        # consider the expansion step after 3 iterations

        if (len(self.Y) % 3) == 0:
            new_radius = 2.0 * self.l_radius
            extra_radius = (new_radius - self.l_radius) / 2

            max_bounds = self.bounds.copy()
            max_bounds[:, 0] = max_bounds[:, 0] - extra_radius
            max_bounds[:, 1] = max_bounds[:, 1] + extra_radius

            # make sure it is within the limit
            if not (self.b_limit_lower is None):
                temp_max_bounds_lower = [
                    np.maximum(max_bounds[idx, 0], self.b_limit_lower[idx])
                    for idx in xrange(self.dim)
                ]
                max_bounds[:, 0] = temp_max_bounds_lower

            if not (self.b_limit_upper is None):
                temp_max_bounds_upper = [
                    np.minimum(max_bounds[idx, 1], self.b_limit_upper[idx])
                    for idx in xrange(self.dim)
                ]
                max_bounds[:, 1] = temp_max_bounds_upper

            self.bounds = np.asarray(max_bounds).copy()

            temp = [(max_bounds[d, :] - self.bounds_bk[d, 0]) * 1.0 /
                    self.max_min_gap[d] for d in xrange(self.dim)]
            self.scalebounds = np.asarray(temp)

        # perform standard BO on the new bound (scaled)
        x_max_scale = acq_max(ac=self.acq_func.acq_kind,
                              gp=self.gp,
                              y_max=y_max,
                              bounds=self.scalebounds,
                              opt_toolbox=self.opt_toolbox)

        #val_acq=self.acq_func.acq_kind(x_max_scale,self.gp,y_max)
        #print "alpha[x_max]={:.5f}".format(np.ravel(val_acq)[0])

        # record the optimization time
        finished_opt = time.time()
        elapse_opt = finished_opt - start_opt
        self.time_opt = np.hstack((self.time_opt, elapse_opt))

        # Test if x_max is repeated, if it is, draw another one at random
        if np.any((self.X - x_max_scale).sum(axis=1) == 0):
            x_max_scale = np.random.uniform(self.scalebounds[:, 0],
                                            self.scalebounds[:, 1],
                                            size=self.scalebounds.shape[0])

        # compute X in original scale
        temp_X_new_original = x_max_scale * self.max_min_gap + self.bounds_bk[:,
                                                                              0]
        self.X_original = np.vstack((self.X_original, temp_X_new_original))

        # clone the self.X for updating GP
        self.max_min_gap = self.bounds[:, 1] - self.bounds[:, 0]
        temp = np.divide((self.X_original - self.bounds[:, 0]),
                         self.max_min_gap)
        self.X = np.asarray(temp)

        scalebounds = np.array([np.zeros(self.dim), np.ones(self.dim)])
        self.scalebounds = scalebounds.T
        # evaluate Y using original X

        self.Y_original = np.append(self.Y_original,
                                    self.f(temp_X_new_original))

        # update Y after change Y_original
        self.Y = (self.Y_original - np.mean(self.Y_original)) / (
            np.max(self.Y_original) - np.min(self.Y_original))

        # for plotting
        self.gp = PradaGaussianProcess(gp_params)
        ur = unique_rows(self.X)

        try:
            self.gp.fit(self.X[ur], self.Y[ur])
        except:
            print "bug"

        # update volume and radius
        #self.vol=prod(self.max_min_gap)
        #self.l_radius=np.power(self.vol,1/self.dim)
        self.l_radius = np.exp(1.0 * np.sum(np.log(self.max_min_gap)) /
                               self.dim)

    def maximize_unbounded_regularizer(self, gp_params):
        """
        Unbounded Regularizer AISTAST 2016 Bobak

        Input parameters
        ----------
        gp_params: parameter for Gaussian Process
        Returns
        -------
        x: recommented point for evaluation
        """

        if self.acq['name'] == 'random':
            x_max = [
                np.random.uniform(x[0], x[1], size=1) for x in self.bounds
            ]
            x_max = np.asarray(x_max)
            x_max = x_max.T
            self.X_original = np.vstack((self.X_original, x_max))
            # evaluate Y using original X
            self.Y_original = np.append(self.Y_original, self.f(x_max))

            # update Y after change Y_original
            self.Y = (self.Y_original - np.mean(self.Y_original)) / (
                np.max(self.Y_original) - np.min(self.Y_original))

            self.time_opt = np.hstack((self.time_opt, 0))
            return

        # init a new Gaussian Process
        self.gp = PradaGaussianProcess(gp_params)

        # scale the data before updating the GP
        self.max_min_gap = self.bounds[:, 1] - self.bounds[:, 0]
        temp = np.divide((self.X_original - self.bounds[:, 0]),
                         self.max_min_gap)
        self.X = np.asarray(temp)

        ur = unique_rows(self.X)
        self.gp.fit(self.X[ur], self.Y[ur])

        # Find unique rows of X to avoid GP from breaking

        # Set acquisition function
        start_opt = time.time()

        y_max = self.Y.max()

        self.scalebounds_bk = self.scalebounds.copy()
        self.bounds_bk = self.bounds
        # consider the expansion step after 3 iterations

        if (len(self.Y) % 3) == 0:
            new_radius = 2.0 * self.l_radius
            extra_radius = (new_radius - self.l_radius) / 2

            max_bounds = self.bounds.copy()
            max_bounds[:, 0] = max_bounds[:, 0] - extra_radius
            max_bounds[:, 1] = max_bounds[:, 1] + extra_radius

            # make sure it is within the limit
            if not (self.b_limit_lower is None):
                temp_max_bounds_lower = [
                    np.maximum(max_bounds[idx, 0], self.b_limit_lower[idx])
                    for idx in xrange(self.dim)
                ]
                max_bounds[:, 0] = temp_max_bounds_lower

            if not (self.b_limit_upper is None):
                temp_max_bounds_upper = [
                    np.minimum(max_bounds[idx, 1], self.b_limit_upper[idx])
                    for idx in xrange(self.dim)
                ]
                max_bounds[:, 1] = temp_max_bounds_upper

            self.bounds = np.asarray(max_bounds)

            temp = [(max_bounds[d, :] - self.bounds[d, 0]) * 1.0 /
                    self.max_min_gap[d] for d in xrange(self.dim)]
            self.scalebounds = np.asarray(temp)

        # select the acquisition function
        self.acq['x_bar'] = np.mean(self.bounds)
        self.acq['R'] = np.power(self.l_radius, 1.0 / self.dim)

        self.acq_func = AcquisitionFunction(self.acq)

        # mean of the domain

        #acq['R']

        x_max_scale = acq_max(ac=self.acq_func.acq_kind,
                              gp=self.gp,
                              y_max=y_max,
                              bounds=self.scalebounds,
                              opt_toolbox=self.opt_toolbox)

        #val_acq=self.acq_func.acq_kind(x_max_scale,self.gp,y_max)
        #print "alpha[x_max]={:.5f}".format(np.ravel(val_acq)[0])

        # record the optimization time
        finished_opt = time.time()
        elapse_opt = finished_opt - start_opt
        self.time_opt = np.hstack((self.time_opt, elapse_opt))

        # Test if x_max is repeated, if it is, draw another one at random
        if np.any((self.X - x_max_scale).sum(axis=1) == 0):
            x_max_scale = np.random.uniform(self.scalebounds[:, 0],
                                            self.scalebounds[:, 1],
                                            size=self.scalebounds.shape[0])

        # check if the estimated data point is in the old bound or new
        flagOutside = 0
        for d in xrange(self.dim):
            if x_max_scale[d] > self.scalebounds_bk[d, 1] or x_max_scale[
                    d] < self.scalebounds_bk[d, 0]:  #outside the old bound
                flagOutside = 1
                self.scalebounds[d, 0] = np.minimum(x_max_scale[d],
                                                    self.scalebounds_bk[d, 0])
                self.scalebounds[d, 1] = np.maximum(x_max_scale[d],
                                                    self.scalebounds_bk[d, 1])

                # now the scalebounds is no longer 0-1

        if flagOutside == 0:  # not outside the old bound
            self.scalebounds = self.scalebounds_bk
        else:  # inside the old bound => recompute bound
            temp = [
                self.scalebounds[d, :] * self.max_min_gap[d] +
                self.bounds_bk[d, 0] for d in xrange(self.dim)
            ]
            if self.dim > 1:
                self.bounds = np.reshape(temp, (self.dim, 2))
            else:
                self.bounds = np.array(temp)

        # compute X in original scale
        temp_X_new_original = x_max_scale * self.max_min_gap + self.bounds_bk[:,
                                                                              0]
        self.X_original = np.vstack((self.X_original, temp_X_new_original))

        # clone the self.X for updating GP
        self.max_min_gap = self.bounds[:, 1] - self.bounds[:, 0]
        temp = np.divide((self.X_original - self.bounds[:, 0]),
                         self.max_min_gap)
        self.X = np.asarray(temp)

        scalebounds = np.array([np.zeros(self.dim), np.ones(self.dim)])
        self.scalebounds = scalebounds.T
        # evaluate Y using original X

        self.Y_original = np.append(self.Y_original,
                                    self.f(temp_X_new_original))

        # update Y after change Y_original
        self.Y = (self.Y_original - np.mean(self.Y_original)) / (
            np.max(self.Y_original) - np.min(self.Y_original))

        # for plotting
        self.gp = PradaGaussianProcess(gp_params)
        ur = unique_rows(self.X)
        self.gp.fit(self.X[ur], self.Y[ur])

        # update volume and radius
        #self.vol=prod(self.max_min_gap)
        #self.l_radius=np.power(self.vol,1/self.dim)
        self.l_radius = np.exp(1.0 * np.sum(np.log(self.max_min_gap)) /
                               self.dim)

    def maximize_expanding_volume_L(self, gp_params):
        """
        Expanding volume following L ~ MaxIter

        Input parameters
        ----------

        gp_params: parameter for Gaussian Process

        Returns
        -------
        x: recommented point for evaluation
        """

        if self.acq['name'] == 'random':
            x_max = [
                np.random.uniform(x[0], x[1], size=1) for x in self.bounds
            ]
            x_max = np.asarray(x_max)
            x_max = x_max.T
            self.X_original = np.vstack((self.X_original, x_max))
            # evaluate Y using original X
            self.Y_original = np.append(self.Y_original, self.f(x_max))

            # update Y after change Y_original
            self.Y = (self.Y_original - np.mean(self.Y_original)) / (
                np.max(self.Y_original) - np.min(self.Y_original))

            self.time_opt = np.hstack((self.time_opt, 0))
            return

        # init a new Gaussian Process
        self.gp = PradaGaussianProcess(gp_params)

        # scale the data before updating the GP
        # convert it to scaleX
        self.max_min_gap = self.bounds[:, 1] - self.bounds[:, 0]
        temp = np.divide((self.X_original - self.bounds[:, 0]),
                         self.max_min_gap)
        self.X = np.asarray(temp)

        ur = unique_rows(self.X)
        self.gp.fit(self.X[ur], self.Y[ur])

        # Set acquisition function
        start_opt = time.time()

        y_max = self.Y.max()

        #self.L=self.estimate_L(self.scalebounds)
        # select the acquisition function

        self.acq_func = AcquisitionFunction(self.acq)

        # consider the expansion step

        # backup the previous bounds
        self.bounds_bk = self.bounds.copy()
        self.scalebounds_bk = self.scalebounds.copy()

        # the region considered is computed as follows: NewVol~OldVol*T/t
        # alternatively, we compute the radius NewL~Oldl*pow(T/t,1/d)
        new_radius = self.l_radius * np.power(
            self.MaxIter / len(self.Y_original), 1.0 / self.dim)
        # extra proportion
        extra_proportion = new_radius * 1.0 / self.l_radius

        #extra_radius=(new_radius-self.l_radius)/2

        if extra_proportion < 1:
            extra_proportion = 1

        max_bounds = self.bounds.copy()

        # expand half to the lower bound and half to the upper bound
        max_bounds[:, 0] = max_bounds[:, 0] - self.max_min_gap * (
            extra_proportion - 1) * 0.5
        max_bounds[:, 1] = max_bounds[:, 1] + self.max_min_gap * (
            extra_proportion - 1) * 0.5

        # make sure it is within the limit
        if not (self.b_limit_lower is None):
            temp_max_bounds_lower = [
                np.maximum(max_bounds[idx, 0], self.b_limit_lower[idx])
                for idx in xrange(self.dim)
            ]
            max_bounds[:, 0] = temp_max_bounds_lower

        if not (self.b_limit_upper is None):
            temp_max_bounds_upper = [
                np.minimum(max_bounds[idx, 1], self.b_limit_upper[idx])
                for idx in xrange(self.dim)
            ]
            max_bounds[:, 1] = temp_max_bounds_upper

        temp = [(max_bounds[d, :] - self.bounds_bk[d, 0]) * 1.0 /
                self.max_min_gap[d] for d in xrange(self.dim)]
        self.scalebounds = np.asarray(temp)

        # perform standard BO on the new bound (scaled)
        x_max_scale = acq_max(ac=self.acq_func.acq_kind,
                              gp=self.gp,
                              y_max=y_max,
                              bounds=self.scalebounds,
                              opt_toolbox=self.opt_toolbox)

        #val_acq=self.acq_func.acq_kind(x_max_scale,self.gp,y_max)
        #print "alpha[x_max]={:.5f}".format(np.ravel(val_acq)[0])

        # record the optimization time
        finished_opt = time.time()
        elapse_opt = finished_opt - start_opt
        self.time_opt = np.hstack((self.time_opt, elapse_opt))

        # Test if x_max is repeated, if it is, draw another one at random
        if np.any((self.X - x_max_scale).sum(axis=1) == 0):
            x_max_scale = np.random.uniform(self.scalebounds[:, 0],
                                            self.scalebounds[:, 1],
                                            size=self.scalebounds.shape[0])

        # check if the estimated data point is in the old bound or new for cropping
        IsCropping = 0
        if IsCropping == 1:
            flagOutside = 0
            for d in xrange(self.dim):
                if x_max_scale[d] > self.scalebounds_bk[d, 1] or x_max_scale[
                        d] < self.scalebounds_bk[d, 0]:  #outside the old bound
                    flagOutside = 1
                    self.scalebounds[d,
                                     0] = np.minimum(x_max_scale[d],
                                                     self.scalebounds_bk[d, 0])
                    self.scalebounds[d,
                                     1] = np.maximum(x_max_scale[d],
                                                     self.scalebounds_bk[d, 1])

                    # now the scalebounds is no longer 0-1

            if flagOutside == 0:  # not outside the old bound
                self.scalebounds = self.scalebounds_bk
                self.bounds = self.bounds_bk.copy()
            else:  # inside the old bound => recompute bound
                temp = [
                    self.scalebounds[d, :] * self.max_min_gap[d] +
                    self.bounds_bk[d, 0] for d in xrange(self.dim)
                ]
                if self.dim > 1:
                    self.bounds = np.reshape(temp, (self.dim, 2))
                else:
                    self.bounds = np.array(temp)
        else:
            temp = [
                self.scalebounds[d, :] * self.max_min_gap[d] +
                self.bounds_bk[d, 0] for d in xrange(self.dim)
            ]
            if self.dim > 1:
                self.bounds = np.reshape(temp, (self.dim, 2))
            else:
                self.bounds = np.array(temp)

        # compute X in original scale
        temp_X_new_original = x_max_scale * self.max_min_gap + self.bounds_bk[:,
                                                                              0]
        self.X_original = np.vstack((self.X_original, temp_X_new_original))

        # clone the self.X for updating GP
        self.max_min_gap = self.bounds[:, 1] - self.bounds[:, 0]
        temp = np.divide((self.X_original - self.bounds[:, 0]),
                         self.max_min_gap)
        self.X = np.asarray(temp)

        scalebounds = np.array([np.zeros(self.dim), np.ones(self.dim)])
        self.scalebounds = scalebounds.T
        # evaluate Y using original X

        self.Y_original = np.append(self.Y_original,
                                    self.f(temp_X_new_original))

        # update Y after change Y_original
        self.Y = (self.Y_original - np.mean(self.Y_original)) / (
            np.max(self.Y_original) - np.min(self.Y_original))

        # for plotting
        self.gp = PradaGaussianProcess(gp_params)
        ur = unique_rows(self.X)
        self.gp.fit(self.X[ur], self.Y[ur])

        # update volume and radius
        #self.vol=prod(self.max_min_gap)
        #self.l_radius=np.power(self.vol,1/self.dim)
        self.l_radius = np.exp(1.0 * np.sum(np.log(self.max_min_gap)) /
                               self.dim)

    def maximize_expanding_volume_L_Cropping(self, gp_params):
        """
        Expanding volume following L ~ MaxIter

        Input parameters
        ----------

        gp_params: parameter for Gaussian Process

        Returns
        -------
        x: recommented point for evaluation
        """

        if self.acq['name'] == 'random':
            x_max = [
                np.random.uniform(x[0], x[1], size=1) for x in self.bounds
            ]
            x_max = np.asarray(x_max)
            x_max = x_max.T
            self.X_original = np.vstack((self.X_original, x_max))
            # evaluate Y using original X
            self.Y_original = np.append(self.Y_original, self.f(x_max))

            # update Y after change Y_original
            self.Y = (self.Y_original - np.mean(self.Y_original)) / (
                np.max(self.Y_original) - np.min(self.Y_original))

            self.time_opt = np.hstack((self.time_opt, 0))
            return

        # init a new Gaussian Process
        self.gp = PradaGaussianProcess(gp_params)

        # scale the data before updating the GP
        # convert it to scaleX
        self.max_min_gap = self.bounds[:, 1] - self.bounds[:, 0]
        temp = np.divide((self.X_original - self.bounds[:, 0]),
                         self.max_min_gap)
        self.X = np.asarray(temp)

        ur = unique_rows(self.X)
        self.gp.fit(self.X[ur], self.Y[ur])

        # Set acquisition function
        start_opt = time.time()

        y_max = self.Y.max()

        #self.L=self.estimate_L(self.scalebounds)
        # select the acquisition function

        self.acq_func = AcquisitionFunction(self.acq)

        # consider the expansion step

        # finding the region outside the box, that has the ucb > max_lcb

        self.bounds_bk = self.bounds.copy()
        self.scalebounds_bk = self.scalebounds.copy()

        # the region considered is computed as follows: NewVol~OldVol*T/t
        # alternatively, we compute the radius NewL~Oldl*pow(T/t,1/d)
        new_radius = self.l_radius * np.power(
            self.MaxIter / len(self.Y_original), 1.0 / self.dim)
        # extra proportion
        extra_proportion = new_radius * 1.0 / self.l_radius

        #extra_radius=(new_radius-self.l_radius)/2

        # check if extra radius is negative
        #if extra_radius<0:
        #extra_radius=0

        max_bounds = self.bounds.copy()

        # expand half to the lower bound and half to the upper bound
        max_bounds[:,
                   0] = max_bounds[:, 0] - self.max_min_gap * extra_proportion
        max_bounds[:,
                   1] = max_bounds[:, 1] + self.max_min_gap * extra_proportion

        # make sure the max_bound is still within the limit
        if not (self.b_limit_lower is None):
            temp_max_bounds_lower = [
                np.maximum(max_bounds[idx, 0], self.b_limit_lower[idx])
                for idx in xrange(self.dim)
            ]
            max_bounds[:, 0] = temp_max_bounds_lower

        if not (self.b_limit_upper is None):
            temp_max_bounds_upper = [
                np.minimum(max_bounds[idx, 1], self.b_limit_upper[idx])
                for idx in xrange(self.dim)
            ]
            max_bounds[:, 1] = temp_max_bounds_upper

        temp = [(max_bounds[d, :] - self.bounds_bk[d, 0]) * 1.0 /
                self.max_min_gap[d] for d in xrange(self.dim)]
        self.scalebounds = np.asarray(temp)

        # perform standard BO on the new bound (scaled)
        x_max_scale = acq_max(ac=self.acq_func.acq_kind,
                              gp=self.gp,
                              y_max=y_max,
                              bounds=self.scalebounds,
                              opt_toolbox=self.opt_toolbox)

        #val_acq=self.acq_func.acq_kind(x_max_scale,self.gp,y_max)
        #print "alpha[x_max]={:.5f}".format(np.ravel(val_acq)[0])

        # record the optimization time
        finished_opt = time.time()
        elapse_opt = finished_opt - start_opt
        self.time_opt = np.hstack((self.time_opt, elapse_opt))

        # Test if x_max is repeated, if it is, draw another one at random
        if np.any((self.X - x_max_scale).sum(axis=1) == 0):
            x_max_scale = np.random.uniform(self.scalebounds[:, 0],
                                            self.scalebounds[:, 1],
                                            size=self.scalebounds.shape[0])

        # check if the estimated data point is in the old bound or new for cropping
        IsCropping = 1
        if IsCropping == 1:
            flagOutside = 0
            for d in xrange(self.dim):
                if x_max_scale[d] > self.scalebounds_bk[d, 1] or x_max_scale[
                        d] < self.scalebounds_bk[d, 0]:  #outside the old bound
                    flagOutside = 1
                    self.scalebounds[d,
                                     0] = np.minimum(x_max_scale[d],
                                                     self.scalebounds_bk[d, 0])
                    self.scalebounds[d,
                                     1] = np.maximum(x_max_scale[d],
                                                     self.scalebounds_bk[d, 1])
                else:
                    self.scalebounds[d, :] = self.scalebounds_bk[d, :]

                    # now the scalebounds is no longer 0-1

            if flagOutside == 0:  # not outside the old bound
                self.scalebounds = self.scalebounds_bk
                self.bounds = self.bounds_bk.copy()
            else:  # inside the old bound => recompute bound
                temp = [
                    self.scalebounds[d, :] * self.max_min_gap[d] +
                    self.bounds_bk[d, 0] for d in xrange(self.dim)
                ]
                if self.dim > 1:
                    self.bounds = np.reshape(temp, (self.dim, 2))
                else:
                    self.bounds = np.array(temp)
        else:
            temp = [
                self.scalebounds[d, :] * self.max_min_gap[d] +
                self.bounds_bk[d, 0] for d in xrange(self.dim)
            ]
            if self.dim > 1:
                self.bounds = np.reshape(temp, (self.dim, 2))
            else:
                self.bounds = np.array(temp)

        # compute X in original scale
        temp_X_new_original = x_max_scale * self.max_min_gap + self.bounds_bk[:,
                                                                              0]
        self.X_original = np.vstack((self.X_original, temp_X_new_original))

        # clone the self.X for updating GP
        self.max_min_gap = self.bounds[:, 1] - self.bounds[:, 0]
        temp = np.divide((self.X_original - self.bounds[:, 0]),
                         self.max_min_gap)
        self.X = np.asarray(temp)

        scalebounds = np.array([np.zeros(self.dim), np.ones(self.dim)])
        self.scalebounds = scalebounds.T
        # evaluate Y using original X

        self.Y_original = np.append(self.Y_original,
                                    self.f(temp_X_new_original))

        # update Y after change Y_original
        self.Y = (self.Y_original - np.mean(self.Y_original)) / (
            np.max(self.Y_original) - np.min(self.Y_original))

        # for plotting
        self.gp = PradaGaussianProcess(gp_params)
        ur = unique_rows(self.X)
        self.gp.fit(self.X[ur], self.Y[ur])

        # update volume and radius
        #self.vol=prod(self.max_min_gap)
        #self.l_radius=np.power(self.vol,1/self.dim)
        self.l_radius = np.exp(1.0 * np.sum(np.log(self.max_min_gap)) /
                               self.dim)
    def run_FBO(self, gp_params):
        """
        Main optimization method for filtering strategy for BO.

        Input parameters
        ----------

        gp_params: parameter for Gaussian Process

        Returns
        -------
        x: recommented point for evaluation
        """

        # for random approach
        if self.acq['name'] == 'random':
            x_max = [
                np.random.uniform(x[0], x[1], size=1) for x in self.bounds
            ]
            x_max = np.asarray(x_max)
            x_max = x_max.T
            self.X_original = np.vstack((self.X_original, x_max))
            # evaluate Y using original X
            self.Y_original = np.append(self.Y_original, self.f(x_max))

            # update Y after change Y_original
            self.Y = (self.Y_original - np.mean(self.Y_original)) / (
                np.max(self.Y_original) - np.min(self.Y_original))

            self.time_opt = np.hstack((self.time_opt, 0))
            return

        # init a new Gaussian Process
        self.gp = PradaGaussianProcess(gp_params)

        # scale the data before updating the GP
        # convert it to scaleX
        self.max_min_gap = self.bounds[:, 1] - self.bounds[:, 0]
        temp = np.divide((self.X_original - self.bounds[:, 0]),
                         self.max_min_gap)
        self.X = np.asarray(temp)

        ur = unique_rows(self.X)
        self.gp.fit(self.X[ur], self.Y[ur])

        # Set acquisition function
        start_opt = time.time()

        # obtain the maximum on the observed set (for EI)
        y_max = self.Y.max()

        #self.L=self.estimate_L(self.scalebounds)
        # select the acquisition function

        self.acq_func = AcquisitionFunction(self.acq)

        # consider the expansion step

        # finding the maximum over the lower bound
        # mu(x)-kappa x sigma(x)
        mu_acq = {}
        mu_acq['name'] = 'lcb'
        mu_acq['dim'] = self.dim
        mu_acq['kappa'] = 2
        acq_mu = AcquisitionFunction(mu_acq)

        # obtain the argmax(lcb), make sure the scale bound vs original bound
        x_lcb_max = acq_max(ac=acq_mu.acq_kind,
                            gp=self.gp,
                            y_max=y_max,
                            bounds=self.scalebounds,
                            opt_toolbox=self.opt_toolbox)

        # obtain the max(lcb)
        max_lcb = acq_mu.acq_kind(x_lcb_max, gp=self.gp, y_max=y_max)
        max_lcb = np.ravel(max_lcb)

        # finding the region outside the box, that has the ucb > max_lcb
        self.max_min_gap_bk = self.max_min_gap.copy()
        self.bounds_bk = self.bounds.copy()
        self.scalebounds_bk = self.scalebounds.copy()
        self.X_invasion = []

        # the region considered is computed as follows: NewVol~OldVol*T/t
        # alternatively, we compute the radius NewL~Oldl*pow(T/t,1/d)
        new_radius = self.l_radius * np.power(
            self.MaxIter / len(self.Y_original), 1.0 / self.dim)

        # extra proportion
        extra_proportion = new_radius * 1.0 / self.l_radius

        #extra_radius=(new_radius-self.l_radius)/2

        # check if extra radius is negative
        if extra_proportion < 1:
            extra_proportion = 1

        max_bounds = self.bounds.copy()

        # expand half to the lower bound and half to the upper bound, X'_t
        max_bounds[:, 0] = max_bounds[:, 0] - self.max_min_gap * (
            extra_proportion - 1)
        max_bounds[:, 1] = max_bounds[:, 1] + self.max_min_gap * (
            extra_proportion - 1)

        #max_bounds[:,0]=max_bounds[:,0]-extra_radius
        #max_bounds[:,1]=max_bounds[:,1]+extra_radius

        # make sure the max_bounds is within the limit
        if not (self.b_limit_lower is None):
            temp_max_bounds_lower = [
                np.maximum(max_bounds[idx, 0], self.b_limit_lower[idx])
                for idx in xrange(self.dim)
            ]
            max_bounds[:, 0] = temp_max_bounds_lower

        if not (self.b_limit_upper is None):
            temp_max_bounds_upper = [
                np.minimum(max_bounds[idx, 1], self.b_limit_upper[idx])
                for idx in xrange(self.dim)
            ]
            max_bounds[:, 1] = temp_max_bounds_upper

        temp = [
            (max_bounds[d, :] - self.bounds[d, 0]) * 1.0 / self.max_min_gap[d]
            for d in xrange(self.dim)
        ]
        max_bounds_scale = np.asarray(temp)

        # find suitable candidates in new regions
        # ucb(x) > max_lcb st max L(x)

        # new bound in scale space
        # we note that the scalebound will be changed inside this function
        self.max_volume(self.gp, max_bounds_scale, max_lcb)

        #print "new bounds scale"
        #print self.scalebounds

        # perform standard BO on the new bound (scaled)
        x_max_scale = acq_max(ac=self.acq_func.acq_kind,
                              gp=self.gp,
                              y_max=y_max,
                              bounds=self.scalebounds,
                              opt_toolbox=self.opt_toolbox)

        val_acq = self.acq_func.acq_kind(x_max_scale, self.gp, y_max)

        # record the optimization time
        finished_opt = time.time()
        elapse_opt = finished_opt - start_opt
        self.time_opt = np.hstack((self.time_opt, elapse_opt))

        # Test if x_max is repeated, if it is, draw another one at random
        if np.any((self.X - x_max_scale).sum(axis=1) == 0):
            x_max_scale = np.random.uniform(self.scalebounds[:, 0],
                                            self.scalebounds[:, 1],
                                            size=self.scalebounds.shape[0])

        # check if the estimated data point is in the old bound or new
        flagOutside = 0
        for d in xrange(self.dim):
            if x_max_scale[d] > self.scalebounds_bk[d, 1] or x_max_scale[
                    d] < self.scalebounds_bk[d, 0]:  #outside the old bound
                flagOutside = 1
                self.scalebounds[d, 0] = np.minimum(x_max_scale[d],
                                                    self.scalebounds_bk[d, 0])
                self.scalebounds[d, 1] = np.maximum(x_max_scale[d],
                                                    self.scalebounds_bk[d, 1])
            else:
                self.scalebounds[d, :] = self.scalebounds_bk[d, :]

                # now the scalebounds is no longer 0-1

        if flagOutside == 0:  # not outside the old bound, use the old bound
            self.scalebounds = self.scalebounds_bk
            self.bounds = self.bounds_bk.copy()
        else:  # outside the old bound => expand the bound as the minimum bound containing the old bound and the selected point
            temp = [
                self.scalebounds[d, :] * self.max_min_gap[d] +
                self.bounds_bk[d, 0] for d in xrange(self.dim)
            ]
            if self.dim > 1:
                self.bounds = np.reshape(temp, (self.dim, 2))
            else:
                self.bounds = np.array(temp)

        self.bounds_list = np.hstack((self.bounds_list, self.bounds))

        # compute X in original scale
        temp_X_new_original = x_max_scale * self.max_min_gap + self.bounds_bk[:,
                                                                              0]
        self.X_original = np.vstack((self.X_original, temp_X_new_original))

        # clone the self.X for updating GP
        self.max_min_gap = self.bounds[:, 1] - self.bounds[:, 0]
        temp = np.divide((self.X_original - self.bounds[:, 0]),
                         self.max_min_gap)
        self.X = np.asarray(temp)

        scalebounds = np.array([np.zeros(self.dim), np.ones(self.dim)])
        self.scalebounds = scalebounds.T
        # evaluate Y using original X

        self.Y_original = np.append(self.Y_original,
                                    self.f(temp_X_new_original))

        # update Y after change Y_original
        self.Y = (self.Y_original - np.mean(self.Y_original)) / (
            np.max(self.Y_original) - np.min(self.Y_original))

        # for plotting
        self.gp = PradaGaussianProcess(gp_params)
        ur = unique_rows(self.X)
        self.gp.fit(self.X[ur], self.Y[ur])

        # update volume and radius
        #self.vol=prod(self.max_min_gap)
        #self.l_radius=np.power(self.vol,1/self.dim)
        self.l_radius = np.exp(1.0 * np.sum(np.log(self.max_min_gap)) /
                               self.dim)
class PradaBayOptBatch(object):
    def __init__(self,
                 gp_params,
                 f,
                 pbounds,
                 acq,
                 verbose=1,
                 opt_toolbox='scipy'):
        """      
        Input parameters
        ----------
        f:              function to optimize:        
        pbounds:        bounds on parameters        
        acq:            acquisition function, 'ei', 'ucb'        
        opt:            optimization toolbox, 'nlopt','direct','scipy'
        
        Returns
        -------
        dim:            dimension
        bounds:         bounds on original scale
        scalebounds:    bounds on normalized scale of 0-1
        time_opt:       will record the time spent on optimization
        gp:             Gaussian Process object
        """
        # Store the original dictionary
        self.pbounds = pbounds

        # Find number of parameters
        self.dim = len(pbounds)

        # Create an array with parameters bounds

        if isinstance(pbounds, dict):
            # Get the name of the parameters
            self.keys = list(pbounds.keys())

            self.bounds = []
            for key in self.pbounds.keys():
                self.bounds.append(self.pbounds[key])
            self.bounds = np.asarray(self.bounds)
        else:
            self.bounds = np.asarray(pbounds)

        scalebounds = np.array([np.zeros(self.dim), np.ones(self.dim)])
        self.scalebounds = scalebounds.T

        self.max_min_gap = self.bounds[:, 1] - self.bounds[:, 0]

        # Some function to be optimized
        self.f = f

        # optimization tool: direct, scipy, nlopt
        self.opt_toolbox = opt_toolbox
        # acquisition function type
        self.acq = acq

        # store the batch size for each iteration
        self.NumPoints = []
        # Numpy array place holders
        self.X_original = None

        # scale the data to 0-1 fit GP better
        self.X = None  # X=( X_original - min(bounds) / (max(bounds) - min(bounds))

        self.Y = None  # Y=( Y_original - mean(bounds) / (max(bounds) - min(bounds))
        self.Y_original = None
        self.opt_time = 0

        self.L = 0  # lipschitz

        self.gp = PradaGaussianProcess(gp_params)

        # Acquisition Function
        #self.acq_func = None
        self.acq_func = AcquisitionFunction(acq=self.acq)

    def posterior(self, Xnew):
        #xmin, xmax = -2, 10
        ur = unique_rows(self.X)

        self.gp.fit(self.X[ur], self.Y[ur])
        mu, sigma2 = self.gp.predict(Xnew, eval_MSE=True)
        return mu, np.sqrt(sigma2)

    def init(self, n_init_points):
        """      
        Input parameters
        ----------
        gp_params:            Gaussian Process structure      
        n_init_points:        # init points
        """

        # Generate random points
        l = [
            np.random.uniform(x[0], x[1], size=n_init_points)
            for x in self.bounds
        ]

        # Concatenate new random points to possible existing
        # points from self.explore method.
        #self.init_points += list(map(list, zip(*l)))
        temp = np.asarray(l)
        temp = temp.T
        init_X = list(temp.reshape((n_init_points, -1)))

        # Evaluate target function at all initialization
        y_init = self.f(init_X)

        # Turn it into np array and store.
        self.X_original = np.asarray(init_X)
        temp_init_point = np.divide((init_X - self.bounds[:, 0]),
                                    self.max_min_gap)

        self.X_original = np.asarray(init_X)
        self.X = np.asarray(temp_init_point)
        y_init = np.reshape(y_init, (n_init_points, 1))

        self.Y_original = np.asarray(y_init)
        self.Y = (self.Y_original - np.mean(self.Y_original)) / (
            np.max(self.Y_original) - np.min(self.Y_original))

        self.NumPoints = np.append(self.NumPoints, n_init_points)

        # Set parameters if any was passed
        #self.gp=PradaGaussianProcess(gp_params)

        # Find unique rows of X to avoid GP from breaking
        #ur = unique_rows(self.X)
        #self.gp.fit(self.X[ur], self.Y[ur])

        #print "#Batch={:d} f_max={:.4f}".format(n_init_points,self.Y.max())

    def init_with_data(self, init_X, init_Y):
        """      
        Input parameters
        ----------
        gp_params:            Gaussian Process structure      
        x,y:        # init data observations (in original scale)
        """

        # Turn it into np array and store.
        self.X_original = np.asarray(init_X)
        temp_init_point = np.divide((init_X - self.bounds[:, 0]),
                                    self.max_min_gap)

        self.X_original = np.asarray(init_X)
        self.X = np.asarray(temp_init_point)

        self.Y_original = np.asarray(init_Y)
        self.Y = (self.Y_original - np.mean(self.Y_original)) / (
            np.max(self.Y_original) - np.min(self.Y_original))

        self.NumPoints = np.append(self.NumPoints, len(init_Y))

        # Set acquisition function
        self.acq_func = AcquisitionFunction(self.acq)

        # Find unique rows of X to avoid GP from breaking
        ur = unique_rows(self.X)
        self.gp.fit(self.X[ur], self.Y[ur])

    def smooth_the_peak(self, my_peak):

        # define the local bound around the estimated point
        local_bound = np.zeros((self.dim, 2))
        for dd in range(self.dim):
            try:
                local_bound[dd, 0] = my_peak[-1][dd] - 0.005
                local_bound[dd, 1] = my_peak[-1][dd] + 0.005
            except:
                local_bound[dd, 0] = my_peak[dd] - 0.005
                local_bound[dd, 1] = my_peak[dd] + 0.005

        local_bound = np.clip(local_bound, self.scalebounds[:, 0],
                              self.scalebounds[:, 1])

        dim = len(local_bound)
        num_data = 1000 * dim
        samples = np.zeros(shape=(num_data, dim))
        #for k in range(0,dim): samples[:,k] = np.random.uniform(low=local_bound[k][0],high=local_bound[k][1],size=num_data)
        for dd in range(0, dim):
            samples[:, dd] = np.linspace(local_bound[dd][0],
                                         local_bound[dd][1], num_data)

        # smooth the peak
        """
        n_bins =  100*np.ones(self.dim)
        mygrid = np.mgrid[[slice(row[0], row[1], n*1j) for row, n in zip(local_bound, n_bins)]]
        mygrid=mygrid.reshape(100**self.dim, self.dim)
        utility_grid=self.acq_func.acq_kind(mygrid,self.gp,self.Y.max())        
        
        mysamples=np.vstack((mygrid,utility_grid))
        samples_smooth=filters.uniform_filter(mysamples, size=[2,2], output=None, mode='reflect', cval=0.0, origin=0)
        """

        # get the utility after smoothing
        samples_smooth = samples
        utility_smooth = self.acq_func.acq_kind(samples_smooth, self.gp,
                                                self.Y.max())

        # get the peak value y
        #peak_y=np.max(utility_smooth)

        # get the peak location x
        #peak_x=samples_smooth[np.argmax(utility_smooth)]

        peak_x = my_peak
        # linear regression
        regr = linear_model.LinearRegression()

        regr.fit(samples_smooth, utility_smooth)
        #residual_ss=np.mean((regr.predict(samples_smooth) - utility_smooth) ** 2)
        mystd = np.std(utility_smooth)

        return peak_x, mystd

    def check_real_peak(self, my_peak, threshold=0.1):

        # define the local bound around the estimated point
        local_bound = np.zeros((self.dim, 2))
        for dd in range(self.dim):
            try:
                local_bound[dd, 0] = my_peak[-1][dd] - 0.01
                local_bound[dd, 1] = my_peak[-1][dd] + 0.01
            except:
                local_bound[dd, 0] = my_peak[dd] - 0.01
                local_bound[dd, 1] = my_peak[dd] + 0.01

        #local_bound=np.clip(local_bound,self.scalebounds[:,0],self.scalebounds[:,1])
        local_bound[:, 0] = local_bound[:, 0].clip(self.scalebounds[:, 0],
                                                   self.scalebounds[:, 1])
        local_bound[:, 1] = local_bound[:, 1].clip(self.scalebounds[:, 0],
                                                   self.scalebounds[:, 1])

        dim = len(local_bound)
        num_data = 100 * dim
        samples = np.zeros(shape=(num_data, dim))
        for dd in range(0, dim):
            samples[:, dd] = np.linspace(local_bound[dd][0],
                                         local_bound[dd][1], num_data)

        # get the utility after smoothing
        myutility = self.acq_func.acq_kind(samples, self.gp, self.Y.max())

        # linear regression
        #regr = linear_model.LinearRegression()
        #regr.fit(samples, myutility)
        #residual_ss=np.mean((regr.predict(samples_smooth) - utility_smooth) ** 2)

        #mystd=np.std(myutility)
        mystd = np.mean(myutility)

        IsPeak = 0
        if mystd > threshold / (self.dim**2):
            IsPeak = 1
        return IsPeak, mystd

    def estimate_L(self, bounds):
        '''
        Estimate the Lipschitz constant of f by taking maximizing the norm of the expectation of the gradient of *f*.
        '''
        def df(x, model, x0):
            mean_derivative = gp_model.predictive_gradient(self.X, self.Y, x)

            temp = mean_derivative * mean_derivative
            if len(temp.shape) <= 1:
                res = np.sqrt(temp)
            else:
                res = np.sqrt(
                    np.sum(temp, axis=1)
                )  # simply take the norm of the expectation of the gradient
            return -res

        gp_model = self.gp

        dim = len(bounds)
        num_data = 1000 * dim
        samples = np.zeros(shape=(num_data, dim))
        for k in range(0, dim):
            samples[:, k] = np.random.uniform(low=bounds[k][0],
                                              high=bounds[k][1],
                                              size=num_data)

        #samples = np.vstack([samples,gp_model.X])
        pred_samples = df(samples, gp_model, 0)
        x0 = samples[np.argmin(pred_samples)]

        res = minimize(df,
                       x0,
                       method='L-BFGS-B',
                       bounds=bounds,
                       args=(gp_model, x0),
                       options={'maxiter': 100})

        try:
            minusL = res.fun[0][0]
        except:
            if len(res.fun.shape) == 1:
                minusL = res.fun[0]
            else:
                minusL = res.fun

        L = -minusL
        if L < 1e-6:
            L = 0.0001  ## to avoid problems in cases in which the model is flat.
        return L

    def maximize_batch_PS(self, gp_params, B=5, kappa=2):
        """
        Finding a batch of points using Peak Suppression approach
        
        Input Parameters
        ----------

        gp_params:          Parameters to be passed to the Gaussian Process class
        
        kappa:              constant value in UCB
              
        Returns
        -------
        X: a batch of [x_1..x_Nt]
        """

        const_liar = self.Y_original.min()

        # Set acquisition function
        #self.acq_func = AcquisitionFunction(kind=self.acq, kappa=kappa)

        y_max = self.Y.max()

        # Set parameters if any was passed
        self.gp = PradaGaussianProcess(gp_params)

        # Find unique rows of X to avoid GP from breaking
        ur = unique_rows(self.X)
        self.gp.fit(self.X[ur], self.Y[ur])

        start_opt = time.time()

        # copy GP, X and Y
        temp_gp = self.gp
        temp_X = self.X
        temp_Y = self.Y

        #store new_x
        new_X = []
        stdPeak = [0] * B
        IsPeak = [0] * B
        for ii in range(B):

            # Finding argmax of the acquisition function.
            x_max = acq_max(ac=self.acq_func.acq_kind,
                            gp=temp_gp,
                            y_max=y_max,
                            bounds=self.scalebounds,
                            opt_toolbox=self.opt_toolbox)

            # Test if x_max is repeated, if it is, draw another one at random
            if np.any((np.abs(temp_X - x_max)).sum(axis=1) < 0.002 *
                      self.dim) | np.isnan(x_max.sum()):
                #x_max = np.random.uniform(self.scalebounds[:, 0], self.scalebounds[:, 1],size=self.scalebounds.shape[0])

                IsPeak[ii] = 0
                stdPeak[ii] = 0
                print "reject"
            else:
                IsPeak[ii], stdPeak[ii] = self.check_real_peak(x_max)

            print "IsPeak={:d} std={:.5f}".format(IsPeak[ii], stdPeak[ii])

            if ii == 0:
                new_X = x_max
            else:
                new_X = np.vstack((new_X, x_max.reshape((1, -1))))

            temp_X = np.vstack((temp_X, x_max.reshape((1, -1))))
            temp_Y = np.append(temp_Y, const_liar)

            #temp_gp.fit(temp_X,temp_Y)
            temp_gp.fit_incremental(x_max, np.asarray([const_liar]))
            """
            toplot_bo=copy.deepcopy(self)
            toplot_bo.gp=copy.deepcopy(temp_gp)
            toplot_bo.X=temp_X
            toplot_bo.X_original=[val*self.max_min_gap+self.bounds[:,0] for idx, val in enumerate(temp_X)]
            toplot_bo.X_original=np.asarray(toplot_bo.X_original)
            toplot_bo.Y=temp_Y
            toplot_bo.Y_original=temp_Y*(np.max(self.Y_original)-np.min(self.Y_original))+np.mean(self.Y_original)
            visualization.plot_bo(toplot_bo)
            """

        IsPeak = np.asarray(IsPeak)

        # check if there is no real peak, then pick up the top peak (highest std)

        # rank the peak
        idx = np.sort(stdPeak)

        if np.sum(IsPeak) == 0:
            top_peak = np.argmax(stdPeak)
            new_X = new_X[top_peak]
        else:
            new_X = new_X[IsPeak == 1]

        print new_X

        finished_opt = time.time()
        elapse_opt = finished_opt - start_opt
        self.opt_time = np.hstack((self.opt_time, elapse_opt))

        # Updating the GP.
        #new_X=new_X.reshape((-1, self.dim))

        # Test if x_max is repeated, if it is, draw another one at random
        temp_new_X = []
        for idx, val in enumerate(new_X):
            if np.all(
                    np.any(np.abs(self.X - val) > 0.02,
                           axis=1)):  # check if a data point is already taken
                temp_new_X = np.append(temp_new_X, val)

        if len(temp_new_X) == 0:
            temp_new_X = np.zeros((1, self.dim))
            for idx in range(0, self.dim):
                temp_new_X[0,
                           idx] = np.random.uniform(self.scalebounds[idx, 0],
                                                    self.scalebounds[idx,
                                                                     1], 1)
        else:
            temp_new_X = temp_new_X.reshape((-1, self.dim))

        self.X = np.vstack((self.X, temp_new_X))

        # convert back to original scale
        temp_X_new_original = [
            val * self.max_min_gap + self.bounds[:, 0]
            for idx, val in enumerate(temp_new_X)
        ]
        temp_X_new_original = np.asarray(temp_X_new_original)
        self.X_original = np.vstack((self.X_original, temp_X_new_original))

        for idx, val in enumerate(temp_X_new_original):
            self.Y_original = np.append(self.Y_original, self.f(val))

        # update Y after change Y_original
        self.Y = (self.Y_original - np.mean(self.Y_original)) / (
            np.max(self.Y_original) - np.min(self.Y_original))
        self.NumPoints = np.append(self.NumPoints,
                                   temp_X_new_original.shape[0])

        print "#Batch={:d} f_max={:.4f}".format(temp_X_new_original.shape[0],
                                                self.Y_original.max())

    def maximize_batch_CL(self, gp_params, B=5):
        """
        Finding a batch of points using Constant Liar approach
        
        Input Parameters
        ----------

        gp_params:          Parameters to be passed to the Gaussian Process class
        
        kappa:              constant value in UCB
              
        Returns
        -------
        X: a batch of [x_1..x_Nt]
        """

        self.NumPoints = np.append(self.NumPoints, B)

        if self.acq['name'] == 'random':
            x_max = [
                np.random.uniform(x[0], x[1], size=B) for x in self.bounds
            ]
            x_max = np.asarray(x_max)
            x_max = x_max.T
            self.X_original = np.vstack((self.X_original, x_max))
            # evaluate Y using original X

            #self.Y = np.append(self.Y, self.f(temp_X_new_original))
            self.Y_original = np.append(self.Y_original, self.f(x_max))

            # update Y after change Y_original
            self.Y = (self.Y_original - np.mean(self.Y_original)) / (
                np.max(self.Y_original) - np.min(self.Y_original))

            self.opt_time = np.hstack((self.opt_time, 0))
            return

        #const_liar=self.Y.mean()
        #const_liar=self.Y_original.mean()
        #const_liar=self.Y.max()

        # Set acquisition function
        self.acq_func = AcquisitionFunction(self.acq)

        y_max = self.Y.max()

        # Set parameters if any was passed
        self.gp = PradaGaussianProcess(gp_params)

        # Find unique rows of X to avoid GP from breaking
        ur = unique_rows(self.X)
        self.gp.fit(self.X[ur], self.Y[ur])

        start_opt = time.time()

        # copy GP, X and Y
        temp_gp = self.gp
        temp_X = self.X
        temp_Y = self.Y
        #temp_Y_original=self.Y_original

        #store new_x
        new_X = []
        for ii in range(B):

            # Finding argmax of the acquisition function.
            x_max = acq_max(ac=self.acq_func.acq_kind,
                            gp=temp_gp,
                            y_max=y_max,
                            bounds=self.scalebounds)
            val_acq = self.acq_func.acq_kind(x_max, temp_gp, y_max)
            print "CL alpha[x_max]={:.5f}".format(np.ravel(val_acq)[0])

            # Test if x_max is repeated, if it is, draw another one at random
            # If it is repeated, print a warning
            #if np.any((self.X - x_max).sum(axis=1) == 0) | np.isnan(x_max.sum()):
            #x_max = np.random.uniform(self.scalebounds[:, 0], self.scalebounds[:, 1],size=self.scalebounds.shape[0])

            if ii == 0:
                new_X = x_max
            else:
                new_X = np.vstack((new_X, x_max.reshape((1, -1))))

            temp_X = np.vstack((temp_X, x_max.reshape((1, -1))))

            const_liar, const_liar_variance = temp_gp.predict(x_max,
                                                              eval_MSE=1)
            temp_Y = np.append(temp_Y, const_liar)

            temp_gp.fit(temp_X, temp_Y)

        # Updating the GP.
        new_X = new_X.reshape((B, -1))

        finished_opt = time.time()
        elapse_opt = finished_opt - start_opt
        self.opt_time = np.hstack((self.opt_time, elapse_opt))

        #print new_X

        self.X = np.vstack((self.X, new_X))

        # convert back to original scale
        temp_X_new_original = [
            val * self.max_min_gap + self.bounds[:, 0]
            for idx, val in enumerate(new_X)
        ]
        temp_X_new_original = np.asarray(temp_X_new_original)
        self.X_original = np.vstack((self.X_original, temp_X_new_original))

        for idx, val in enumerate(temp_X_new_original):
            self.Y_original = np.append(self.Y_original, self.f(val))

        # update Y after change Y_original
        self.Y = (self.Y_original - np.mean(self.Y_original)) / (
            np.max(self.Y_original) - np.min(self.Y_original))
        #print "#Batch={:d} f_max={:.4f}".format(B,self.Y_original.max())

        return new_X, temp_X_new_original

    def maximize_batch_CL_incremental(self, gp_params, B=5):
        """
        Finding a batch of points using Constant Liar approach
        
        Input Parameters
        ----------

        gp_params:          Parameters to be passed to the Gaussian Process class
        
        kappa:              constant value in UCB
              
        Returns
        -------
        X: a batch of [x_1..x_Nt]
        """

        self.NumPoints = np.append(self.NumPoints, B)

        if self.acq['name'] == 'random':
            x_max = [
                np.random.uniform(x[0], x[1], size=B) for x in self.bounds
            ]
            x_max = np.asarray(x_max)
            x_max = x_max.T
            self.X_original = np.vstack((self.X_original, x_max))
            # evaluate Y using original X

            #self.Y = np.append(self.Y, self.f(temp_X_new_original))
            self.Y_original = np.append(self.Y_original, self.f(x_max))

            # update Y after change Y_original
            self.Y = (self.Y_original - np.mean(self.Y_original)) / (
                np.max(self.Y_original) - np.min(self.Y_original))

            self.opt_time = np.hstack((self.opt_time, 0))
            return

        #const_liar=self.Y.mean()
        #const_liar=self.Y_original.min()
        #const_liar=self.Y.max()

        # Set acquisition function
        self.acq_func = AcquisitionFunction(self.acq)

        y_max = self.Y.max()

        # Set parameters if any was passed
        self.gp = PradaGaussianProcess(gp_params)

        # Find unique rows of X to avoid GP from breaking
        ur = unique_rows(self.X)
        self.gp.fit(self.X[ur], self.Y[ur])

        start_opt = time.time()

        # copy GP, X and Y
        temp_gp = copy.deepcopy(self.gp)
        temp_X = self.X
        temp_Y = self.Y
        #temp_Y_original=self.Y_original

        #store new_x
        new_X = []
        for ii in range(B):

            # Finding argmax of the acquisition function.
            x_max = acq_max(ac=self.acq_func.acq_kind,
                            gp=temp_gp,
                            y_max=y_max,
                            bounds=self.scalebounds)

            # Test if x_max is repeated, if it is, draw another one at random
            if np.any(
                    np.any(np.abs(self.X - x_max) < 0.02,
                           axis=1)):  # check if a data point is already taken
                x_max = np.random.uniform(self.scalebounds[:, 0],
                                          self.scalebounds[:, 1],
                                          size=self.scalebounds.shape[0])

            if ii == 0:
                new_X = x_max
            else:
                new_X = np.vstack((new_X, x_max.reshape((1, -1))))

            const_liar = temp_gp.predict(x_max, eval_MSE=true)

            #temp_X= np.vstack((temp_X, x_max.reshape((1, -1))))
            #temp_Y = np.append(temp_Y, const_liar )

            #temp_gp.fit(temp_X,temp_Y)

            # update the Gaussian Process and thus the acquisition function
            #temp_gp.compute_incremental_var(temp_X,x_max)
            temp_gp.fit_incremental(x_max, np.asarray([const_liar]))

        # Updating the GP.
        new_X = new_X.reshape((B, -1))

        finished_opt = time.time()
        elapse_opt = finished_opt - start_opt
        self.opt_time = np.hstack((self.opt_time, elapse_opt))

        #print new_X

        self.X = np.vstack((self.X, new_X))

        # convert back to original scale
        temp_X_new_original = [
            val * self.max_min_gap + self.bounds[:, 0]
            for idx, val in enumerate(new_X)
        ]
        temp_X_new_original = np.asarray(temp_X_new_original)
        self.X_original = np.vstack((self.X_original, temp_X_new_original))

        for idx, val in enumerate(temp_X_new_original):
            self.Y_original = np.append(self.Y_original, self.f(val))

        # update Y after change Y_original
        self.Y = (self.Y_original - np.mean(self.Y_original)) / (
            np.max(self.Y_original) - np.min(self.Y_original))
        #print "#Batch={:d} f_max={:.4f}".format(B,self.Y_original.max())

    def fitIGMM(self, obs, IsPlot=0):
        """
        Fitting the Infinite Gaussian Mixture Model and GMM where applicable
        Input Parameters
        ----------
        
        obs:        samples  generated under the acqusition function by BGSS
        
        IsPlot:     flag variable for visualization    
        
        
        Returns
        -------
        mean vector: mu_1,...mu_K
        """

        if self.dim <= 2:
            n_init_components = 3
        else:
            n_init_components = np.int(self.dim * 1.1)

        dpgmm = mixture.DPGMM(n_components=n_init_components,
                              covariance_type="full",
                              min_covar=10)
        dpgmm.fit(obs)

        # check if DPGMM fail, then use GMM.
        mydist = euclidean_distances(dpgmm.means_, dpgmm.means_)
        np.fill_diagonal(mydist, 99)
        if dpgmm.converged_ is False or np.min(mydist) < (0.01 * self.dim):
            dpgmm = mixture.GMM(n_components=n_init_components,
                                covariance_type="full",
                                min_covar=1e-3)
            dpgmm.fit(obs)

        if self.dim >= 5:
            # since kmeans does not provide weight and means, we will manually compute it
            try:
                dpgmm.weights_ = np.histogram(dpgmm.labels_,
                                              np.int(self.dim * 1.2))
                dpgmm.weights_ = np.true_divide(dpgmm.weights_[0],
                                                np.sum(dpgmm.weights_[0]))
                dpgmm.means_ = dpgmm.cluster_centers_
            except:
                pass

        # truncated for variational inference
        weight = dpgmm.weights_
        weight_sorted = np.sort(weight)
        weight_sorted = weight_sorted[::-1]
        temp_cumsum = np.cumsum(weight_sorted)

        cutpoint = 0
        for idx, val in enumerate(temp_cumsum):
            if val > 0.73:
                cutpoint = weight_sorted[idx]
                break

        ClusterIndex = [
            idx for idx, val in enumerate(dpgmm.weights_) if val >= cutpoint
        ]

        myMeans = dpgmm.means_[ClusterIndex]
        #dpgmm.means_=dpgmm.means_[ClusterIndex]
        dpgmm.truncated_means_ = dpgmm.means_[ClusterIndex]

        #myCov=dpgmm.covars_[ClusterIndex]

        if IsPlot == 1 and self.dim <= 2:
            visualization.plot_histogram(self, obs)
            visualization.plot_mixturemodel(dpgmm, self, obs)

        new_X = myMeans.reshape((len(ClusterIndex), -1))
        new_X = new_X.tolist()

        return new_X

    def maximize_batch_B3O(self, gp_params, kappa=2, IsPlot=0):
        """
        Finding a batch of points using Budgeted Batch Bayesian Optimization approach
        
        Input Parameters
        ----------

        gp_params:          Parameters to be passed to the Gaussian Process class
        
        kappa:              constant value in UCB
        
        IsPlot:             flag variable for visualization    
        
        Returns
        -------
        X: a batch of [x_1..x_Nt]
        """

        # Set acquisition function
        self.acq_func = AcquisitionFunction(self.acq)

        # Step 2 in the Algorithm

        # Set parameters for Gaussian Process
        self.gp = PradaGaussianProcess(gp_params)

        if len(self.gp.KK_x_x_inv) == 0:  # check if empty
            self.gp.fit(self.X, self.Y)
        #else:
        #self.gp.fit_incremental(self.X[ur], self.Y[ur])

        # record optimization time
        start_gmm_opt = time.time()

        if IsPlot == 1 and self.dim <= 2:  #plot
            visualization.plot_bo(self)

        # Step 4 in the Algorithm
        # generate samples from Acquisition function

        # check the bound 0-1 or original bound
        obs = acq_batch_generalized_slice_sampling_generate(
            self.acq_func.acq_kind,
            self.gp,
            self.scalebounds,
            N=500,
            y_max=self.Y.max())

        # Step 5 and 6 in the Algorithm
        if len(obs) == 0:  # monotonous acquisition function
            print "Monotonous acquisition function"
            new_X = np.random.uniform(self.bounds[:, 0],
                                      self.bounds[:, 1],
                                      size=self.bounds.shape[0])
            new_X = new_X.reshape((1, -1))
            new_X = new_X.tolist()

        else:
            new_X = self.fitIGMM(obs, IsPlot)

        # Test if x_max is repeated, if it is, draw another one at random
        temp_new_X = []
        for idx, val in enumerate(new_X):
            if np.all(
                    np.any(np.abs(self.X - val) > 0.02,
                           axis=1)):  # check if a data point is already taken
                temp_new_X = np.append(temp_new_X, val)

        if len(temp_new_X) == 0:
            temp_new_X = np.zeros((1, self.dim))
            for idx in range(0, self.dim):
                temp_new_X[0,
                           idx] = np.random.uniform(self.scalebounds[idx, 0],
                                                    self.scalebounds[idx,
                                                                     1], 1)
        else:
            temp_new_X = temp_new_X.reshape((-1, self.dim))

        self.NumPoints = np.append(self.NumPoints, temp_new_X.shape[0])

        finished_gmm_opt = time.time()
        elapse_gmm_opt = finished_gmm_opt - start_gmm_opt

        self.opt_time = np.hstack((self.opt_time, elapse_gmm_opt))

        self.X = np.vstack((self.X, temp_new_X))

        temp_X_new_original = [
            val * self.max_min_gap + self.bounds[:, 0]
            for idx, val in enumerate(temp_new_X)
        ]
        temp_X_new_original = np.asarray(temp_X_new_original)

        # Step 7 in the algorithm
        # Evaluate y=f(x)

        temp = self.f(temp_X_new_original)
        temp = np.reshape(temp, (-1, 1))

        # Step 8 in the algorithm

        self.Y_original = np.append(self.Y_original, temp)

        self.Y = (self.Y_original - np.mean(self.Y_original)) / (
            np.max(self.Y_original) - np.min(self.Y_original))

        self.X_original = np.vstack((self.X_original, temp_X_new_original))

        print "#Batch={:d} f_max={:.4f}".format(temp_new_X.shape[0],
                                                self.Y_original.max())

        #ur = unique_rows(self.X)
        #self.gp.fit(self.X[ur], self.Y[ur])
        #self.gp.fit_incremental(temp_new_X, temp_new_Y)

#======================================================================================
#======================================================================================================
#======================================================================================================
#======================================================================================================

    def maximize_batch_BUCB(self, gp_params, B=5):
        """
        Finding a batch of points using GP-BUCB approach
        
        Input Parameters
        ----------

        gp_params:          Parameters to be passed to the Gaussian Process class
        
        B:                  fixed batch size for all iteration
        
        kappa:              constant value in UCB
        
        IsPlot:             flag variable for visualization    
        
        
        Returns
        -------
        X: a batch of [x_1..x_B]
        """
        self.B = B

        # Set acquisition function
        self.acq_func = AcquisitionFunction(self.acq)

        # Set parameters if any was passed
        self.gp = PradaGaussianProcess(gp_params)

        if len(self.gp.KK_x_x_inv) == 0:  # check if empty
            self.gp.fit(self.X, self.Y)
        #else:
        #self.gp.fit_incremental(self.X[ur], self.Y[ur])

        start_gmm_opt = time.time()

        y_max = self.gp.Y.max()
        # check the bound 0-1 or original bound
        temp_X = self.X
        temp_gp = self.gp
        temp_gp.X_bucb = temp_X
        temp_gp.KK_x_x_inv_bucb = self.gp.KK_x_x_inv

        # finding new X
        new_X = []
        for ii in range(B):
            # Finding argmax of the acquisition function.
            x_max = acq_max(ac=self.acq_func.acq_kind,
                            gp=temp_gp,
                            y_max=y_max,
                            bounds=self.scalebounds)

            if np.any(
                (temp_X - x_max).sum(axis=1) == 0) | np.isnan(x_max.sum()):
                x_max = np.random.uniform(self.scalebounds[:, 0],
                                          self.scalebounds[:, 1],
                                          size=self.scalebounds.shape[0])

            if ii == 0:
                new_X = x_max
            else:
                new_X = np.vstack((new_X, x_max.reshape((1, -1))))

            # update the Gaussian Process and thus the acquisition function
            temp_gp.compute_incremental_var(temp_X, x_max)

            temp_X = np.vstack((temp_X, x_max.reshape((1, -1))))
            temp_gp.X_bucb = temp_X

        # record the optimization time
        finished_gmm_opt = time.time()
        elapse_gmm_opt = finished_gmm_opt - start_gmm_opt

        self.time_gmm_opt = np.hstack((self.time_gmm_opt, elapse_gmm_opt))

        self.NumPoints = np.append(self.NumPoints, B)

        self.X = temp_X

        # convert back to original scale
        temp_X_new_original = [
            val * self.max_min_gap + self.bounds[:, 0]
            for idx, val in enumerate(new_X)
        ]
        temp_X_new_original = np.asarray(temp_X_new_original)
        self.X_original = np.vstack((self.X_original, temp_X_new_original))

        # evaluate y=f(x)
        temp = self.f(temp_X_new_original)
        temp = np.reshape(temp, (-1, 1))
        self.Y_original = np.append(self.Y_original, temp)
        self.Y = (self.Y_original - np.mean(self.Y_original)) / (
            np.max(self.Y_original) - np.min(self.Y_original))
        print "#Batch={:d} f_max={:.4f}".format(new_X.shape[0],
                                                self.Y_original.max())
    def maximize(self,gp_params,kappa=2):
        """
        Main optimization method.

        Input parameters
        ----------

        kappa: parameter for UCB acquisition only.

        gp_params: parameter for Gaussian Process

        Returns
        -------
        x: recommented point for evaluation
        """

        # init a new Gaussian Process
        self.gp=PradaGaussianProcess(gp_params)
        
        # Find unique rows of X to avoid GP from breaking
        ur = unique_rows(self.X)
        self.gp.fit(self.X[ur], self.Y[ur])
        

        # Set acquisition function
        start_opt=time.time()

        acq=self.acq
        
        # select the acquisition function
        if acq=='nei':
            self.L=self.estimate_L(self.bounds)
            self.util = AcquisitionFunction(kind=self.acq, L=self.L)
        else:
            if acq=="ucb":
                self.acq_func = AcquisitionFunction(kind=self.acq, kappa=kappa)
            else:
                self.acq_func = AcquisitionFunction(kind=self.acq, kappa=kappa)

        y_max = self.Y.max()
        
        # select the optimization toolbox        
        if self.opt=='nlopt':
            x_max,f_max = acq_max_nlopt(ac=self.acq_func.acq_kind,gp=self.gp,y_max=y_max,bounds=self.scalebounds)
        if self.opt=='scipy':
            x_max = acq_max(ac=self.acq_func.acq_kind,gp=self.gp,y_max=y_max,bounds=self.scalebounds)
        if self.opt=='direct':
            x_max = acq_max_direct(ac=self.acq_func.acq_kind,gp=self.gp,y_max=y_max,bounds=self.scalebounds)

        # record the optimization time
        finished_opt=time.time()
        elapse_opt=finished_opt-start_opt
        self.time_opt=np.hstack((self.time_opt,elapse_opt))
        
        # Test if x_max is repeated, if it is, draw another one at random
        if np.any((self.X - x_max).sum(axis=1) == 0):

            x_max = np.random.uniform(self.scalebounds[:, 0],
                                      self.scalebounds[:, 1],
                                      size=self.scalebounds.shape[0])
                                     
        # store X                                     
        self.X = np.vstack((self.X, x_max.reshape((1, -1))))

        # compute X in original scale
        temp_X_new_original=x_max*self.max_min_gap+self.bounds[:,0]
        self.X_original=np.vstack((self.X_original, temp_X_new_original))
        # evaluate Y using original X
        self.Y = np.append(self.Y, self.f(temp_X_new_original))
    def maximize_batch_B3O(self,gp_params, kappa=2,IsPlot=0):
        """
        Finding a batch of points using Budgeted Batch Bayesian Optimization approach
        
        Input Parameters
        ----------

        gp_params:          Parameters to be passed to the Gaussian Process class
        
        kappa:              constant value in UCB
        
        IsPlot:             flag variable for visualization    
        
        Returns
        -------
        X: a batch of [x_1..x_Nt]
        """

                
        # Set acquisition function
        self.acq_func = AcquisitionFunction(kind=self.acq, kappa=kappa)
        
        
        # Step 2 in the Algorithm
        
        # Set parameters for Gaussian Process
        self.gp=PradaGaussianProcess(gp_params)
        
        if len(self.gp.KK_x_x_inv)==0: # check if empty
            self.gp.fit(self.X, self.Y)
        #else:
            #self.gp.fit_incremental(self.X[ur], self.Y[ur])
        
        # record optimization time
        start_gmm_opt=time.time()
        
        
        if IsPlot==1 and self.dim<=2:#plot
            visualization.plot_bo(self)                
                
        # Step 4 in the Algorithm
        # generate samples from Acquisition function
        
        # check the bound 0-1 or original bound        
        obs=acq_batch_generalized_slice_sampling_generate(self.acq_func.acq_kind,self.gp,self.scalebounds,N=500,y_max=self.Y.max())
        
        
        # Step 5 and 6 in the Algorithm
        if len(obs)==0: # monotonous acquisition function
            print "Monotonous acquisition function"
            new_X=np.random.uniform(self.bounds[:, 0],self.bounds[:, 1],size=self.bounds.shape[0])
            new_X=new_X.reshape((1,-1))
            new_X=new_X.tolist()

        else:
            new_X=self.fitIGMM(obs,IsPlot)
            

        # Test if x_max is repeated, if it is, draw another one at random
        temp_new_X=[]
        for idx,val in enumerate(new_X):
            if np.all(np.any(np.abs(self.X-val)>0.02,axis=1)): # check if a data point is already taken
                temp_new_X=np.append(temp_new_X,val)
                
        
        if len(temp_new_X)==0:
            temp_new_X=np.zeros((1,self.dim))
            for idx in range(0,self.dim):
                temp_new_X[0,idx]=np.random.uniform(self.scalebounds[idx,0],self.scalebounds[idx,1],1)
        else:
            temp_new_X=temp_new_X.reshape((-1,self.dim))
            
        self.NumPoints=np.append(self.NumPoints,temp_new_X.shape[0])


        finished_gmm_opt=time.time()
        elapse_gmm_opt=finished_gmm_opt-start_gmm_opt
        
        self.opt_time=np.hstack((self.opt_time,elapse_gmm_opt))
        
       
        self.X=np.vstack((self.X, temp_new_X))
        
        temp_X_new_original=[val*self.max_min_gap+self.bounds[:,0] for idx, val in enumerate(temp_new_X)]
        temp_X_new_original=np.asarray(temp_X_new_original)
        
        
        # Step 7 in the algorithm
        # Evaluate y=f(x)
        
        temp=self.f(temp_X_new_original)
        temp=np.reshape(temp,(-1,1))
        
        # Step 8 in the algorithm
        
        self.Y=np.append(self.Y,temp)
        self.X_original=np.vstack((self.X_original, temp_X_new_original))

        print "#Batch={:d} f_max={:.3f}".format(temp_new_X.shape[0],self.Y.max())
    def __init__(self,
                 gp_params,
                 f,
                 pbounds,
                 acq,
                 verbose=1,
                 opt_toolbox='scipy'):
        """      
        Input parameters
        ----------
        f:              function to optimize:        
        pbounds:        bounds on parameters        
        acq:            acquisition function, 'ei', 'ucb'        
        opt:            optimization toolbox, 'nlopt','direct','scipy'
        
        Returns
        -------
        dim:            dimension
        bounds:         bounds on original scale
        scalebounds:    bounds on normalized scale of 0-1
        time_opt:       will record the time spent on optimization
        gp:             Gaussian Process object
        """
        # Store the original dictionary
        self.pbounds = pbounds

        # Find number of parameters
        self.dim = len(pbounds)

        # Create an array with parameters bounds

        if isinstance(pbounds, dict):
            # Get the name of the parameters
            self.keys = list(pbounds.keys())

            self.bounds = []
            for key in self.pbounds.keys():
                self.bounds.append(self.pbounds[key])
            self.bounds = np.asarray(self.bounds)
        else:
            self.bounds = np.asarray(pbounds)

        scalebounds = np.array([np.zeros(self.dim), np.ones(self.dim)])
        self.scalebounds = scalebounds.T

        self.max_min_gap = self.bounds[:, 1] - self.bounds[:, 0]

        # Some function to be optimized
        self.f = f

        # optimization tool: direct, scipy, nlopt
        self.opt_toolbox = opt_toolbox
        # acquisition function type
        self.acq = acq

        # store the batch size for each iteration
        self.NumPoints = []
        # Numpy array place holders
        self.X_original = None

        # scale the data to 0-1 fit GP better
        self.X = None  # X=( X_original - min(bounds) / (max(bounds) - min(bounds))

        self.Y = None  # Y=( Y_original - mean(bounds) / (max(bounds) - min(bounds))
        self.Y_original = None
        self.opt_time = 0

        self.L = 0  # lipschitz

        self.gp = PradaGaussianProcess(gp_params)

        # Acquisition Function
        #self.acq_func = None
        self.acq_func = AcquisitionFunction(acq=self.acq)
예제 #14
0
    def expandBoundsDDB_MAP(self):
        """
        Description: Expands the search space with the MAP implementation of
        our DDB method

        """
        
        print('Attempting to expand search space with DDB-MAP method')
        alpha=self.alpha
        beta=self.beta
        bound_samples=100    # Number of radius sample to fit the log-logistic distribution
        # Find y^+ and x^+
        ymax=np.max(self.Y)
        # Generate test radii
        max_loc=np.argmax(self.Y)
        xmax=self.X[max_loc]
        test_bound=np.zeros(self.scalebounds.shape)
        bound_dist=np.zeros(bound_samples)
        bound_center=xmax
        test_bound[:,1]=bound_center+0.5
        test_bound[:,0]=bound_center-0.5
        max_radius=np.max(np.array([np.max(max_bound_size-test_bound[:,1]),np.max(test_bound[:,0])]))
        step=max_radius/bound_samples
        packing_number=np.zeros(bound_samples)
        # Generate a Thompson sample maxima to estimate internal maxima
        TS=AcquisitionFunction.ThompsonSampling(self.gp)
        tsb_x,tsb_y=acq_max_global(TS, self.gp, bounds=self.scalebounds)
        # Generate Gumbel samples to estimate the external maxima
        for i in range(0,bound_samples):
            bound_length=test_bound[:,1]-test_bound[:,0]
            volume=np.power(max_bound_size,self.dim)-np.prod(bound_length)
            packing_number[i]=round(volume/(5*self.gp.lengthscale))
            mu=stats.norm.ppf(1.0-1.0/packing_number[i])
            sigma=stats.norm.ppf(1.0-(1.0/packing_number[i])*np.exp(-1.0))-stats.norm.ppf(1.0-(1.0/(packing_number[i])))
            bound_dist[i]=np.exp(-np.exp(-(-tsb_y-mu)/sigma))
            test_bound[:,1]=test_bound[:,1]+step
            test_bound[:,0]=test_bound[:,0]-step
        bound_dist[np.isnan(bound_dist)]=1
        # Fit the log-logistic paramaters to the Gumbel samples
        xfit=np.arange(0,max_radius,max_radius/100)
        popt,pcov=optimize.curve_fit(self.sufficientBoundPDF,xfit[0:100],bound_dist,bounds=np.array([[5,1.1],[20,5]]))
        print("popt={}".format(popt))
        b=ymax/popt[0]
        a=popt[1]
        print("b={}, ymax={}".format(b,ymax))
        # Find the gamma and log-logistic modes to determine the optimisation bound
        c=ymax/b
        loglog_mode=a*np.power((c-1.0)/(c+1.0),(1/c))
        gamma_mode=(alpha-1)/beta
        opt_bound=np.ones([2])
        opt_bound[0]=min(loglog_mode,gamma_mode)
        opt_bound[1]=max(loglog_mode,gamma_mode)
        bound_range=(opt_bound[1]-opt_bound[0])
        # Find MAP Estimate of radius r
        for d in range(0,self.dim):
            r_max=0
            p_max=0
            for x0 in np.arange(opt_bound[0],opt_bound[1],bound_range/10):
                res=optimize.minimize(lambda x: self.radiusPDF(x,alpha,beta,b,ymax,a),x0=x0, bounds=np.array([opt_bound]), method='L-BFGS-B')
                if -res.fun>p_max:
                    r_max=res.x
                    p_max=-res.fun
            if r_max>opt_bound[1]:
                r_max=opt_bound[1]
            xplot=np.arange(0,10,0.01)
            yplot=-self.radiusPDF(xplot,alpha,beta,b,ymax,a)
            max_loc=np.argmax(yplot)

            print("optimal radius of {} with unscaled probability of {}".format(r_max,p_max))
            self.scalebounds[d,1]=xmax[d]+r_max
            self.scalebounds[d,0]=xmax[d]-r_max
        print("seach space extended to {} with DDB".format(self.scalebounds))
class PradaBayOptFn(object):
    def __init__(self,
                 gp_params,
                 f,
                 init_bounds,
                 pbounds,
                 acq,
                 verbose=1,
                 opt_toolbox='nlopt'):
        """      
        Input parameters
        ----------
        f:              function to optimize:        
        pbounds:        bounds on parameters        
        acq:            acquisition function, acq['name']=['ei','ucb','poi','lei']
                            ,acq['kappa'] for ucb, acq['k'] for lei
        opt:            optimization toolbox, 'nlopt','direct','scipy'
        
        Returns
        -------
        dim:            dimension
        bounds:         bounds on original scale
        scalebounds:    bounds on normalized scale of 0-1
        time_opt:       will record the time spent on optimization
        gp:             Gaussian Process object
        """

        # Find number of parameters
        self.dim = len(pbounds)

        # Create an array with parameters bounds
        if isinstance(pbounds, dict):
            # Get the name of the parameters
            self.keys = list(pbounds.keys())

            self.bounds = []
            for key in pbounds.keys():
                self.bounds.append(pbounds[key])
            self.bounds = np.asarray(self.bounds)
        else:
            self.bounds = np.asarray(pbounds)

        if len(init_bounds) == 0:
            self.init_bounds = self.bounds.copy()
        else:
            self.init_bounds = init_bounds

        if isinstance(init_bounds, dict):
            # Get the name of the parameters
            self.keys = list(init_bounds.keys())

            self.init_bounds = []
            for key in init_bounds.keys():
                self.init_bounds.append(init_bounds[key])
            self.init_bounds = np.asarray(self.init_bounds)
        else:
            self.init_bounds = np.asarray(init_bounds)

        # create a scalebounds 0-1
        scalebounds = np.array([np.zeros(self.dim), np.ones(self.dim)])
        self.scalebounds = scalebounds.T

        self.max_min_gap = self.bounds[:, 1] - self.bounds[:, 0]

        # Some function to be optimized
        self.f = f
        # optimization toolbox
        self.opt_toolbox = opt_toolbox
        # acquisition function type

        self.acq = acq

        # store X in original scale
        self.X_original = None

        # store X in 0-1 scale
        self.X = None

        # store y=f(x)
        # (y - mean)/(max-min)
        self.Y = None

        # y original scale
        self.Y_original = None

        self.time_opt = 0

        self.k_Neighbor = 2

        # Lipschitz constant
        self.L = 0

        # Gaussian Process class
        self.gp = PradaGaussianProcess(gp_params)

        # acquisition function
        self.acq_func = None

        # stop condition
        self.stop_flag = 0

    # will be later used for visualization
    def posterior(self, Xnew):
        self.gp.fit(self.X, self.Y)
        mu, sigma2 = self.gp.predict(Xnew, eval_MSE=True)
        return mu, np.sqrt(sigma2)

    def init(self, gp_params, n_init_points=3):
        """      
        Input parameters
        ----------
        gp_params:            Gaussian Process structure      
        n_init_points:        # init points
        """

        # Generate random points
        l = [
            np.random.uniform(x[0], x[1], size=n_init_points)
            for x in self.init_bounds
        ]

        # Concatenate new random points to possible existing
        # points from self.explore method.
        temp = np.asarray(l)
        temp = temp.T
        init_X = list(temp.reshape((n_init_points, -1)))

        self.X_original = np.asarray(init_X)

        # Evaluate target function at all initialization
        y_init = self.f(init_X)
        y_init = np.reshape(y_init, (n_init_points, 1))

        self.Y_original = np.asarray(y_init)
        self.Y = (self.Y_original - np.mean(self.Y_original)) / (
            np.max(self.Y_original) - np.min(self.Y_original))

        # convert it to scaleX
        temp_init_point = np.divide((init_X - self.bounds[:, 0]),
                                    self.max_min_gap)

        self.X = np.asarray(temp_init_point)

    def estimate_L(self, bounds):
        '''
        Estimate the Lipschitz constant of f by taking maximizing the norm of the expectation of the gradient of *f*.
        '''
        def df(x, model, x0):
            mean_derivative = gp_model.predictive_gradient(self.X, self.Y, x)

            temp = mean_derivative * mean_derivative
            if len(temp.shape) <= 1:
                res = np.sqrt(temp)
            else:
                res = np.sqrt(
                    np.sum(temp, axis=1)
                )  # simply take the norm of the expectation of the gradient

            return -res

        gp_model = self.gp

        dim = len(bounds)
        num_data = 1000 * dim
        samples = np.zeros(shape=(num_data, dim))
        for k in range(0, dim):
            samples[:, k] = np.random.uniform(low=bounds[k][0],
                                              high=bounds[k][1],
                                              size=num_data)

        #samples = np.vstack([samples,gp_model.X])
        pred_samples = df(samples, gp_model, 0)
        x0 = samples[np.argmin(pred_samples)]

        res = minimize(df,
                       x0,
                       method='L-BFGS-B',
                       bounds=bounds,
                       args=(gp_model, x0),
                       options={'maxiter': 100})

        try:
            minusL = res.fun[0][0]
        except:
            if len(res.fun.shape) == 1:
                minusL = res.fun[0]
            else:
                minusL = res.fun

        L = -minusL
        if L < 1e-6:
            L = 0.0001  ## to avoid problems in cases in which the model is flat.

        return L

    def maximize(self, gp_params, kappa=2):
        """
        Main optimization method.

        Input parameters
        ----------

        kappa: parameter for UCB acquisition only.

        gp_params: parameter for Gaussian Process

        Returns
        -------
        x: recommented point for evaluation
        """

        if self.acq['name'] == 'random':
            x_max = [
                np.random.uniform(x[0], x[1], size=1) for x in self.bounds
            ]
            x_max = np.asarray(x_max)
            x_max = x_max.T
            self.X_original = np.vstack((self.X_original, x_max))
            # evaluate Y using original X

            #self.Y = np.append(self.Y, self.f(temp_X_new_original))
            self.Y_original = np.append(self.Y_original, self.f(x_max))

            # update Y after change Y_original
            self.Y = (self.Y_original - np.mean(self.Y_original)) / (
                np.max(self.Y_original) - np.min(self.Y_original))

            self.time_opt = np.hstack((self.time_opt, 0))
            return

        # init a new Gaussian Process
        self.gp = PradaGaussianProcess(gp_params)

        # Find unique rows of X to avoid GP from breaking
        ur = unique_rows(self.X)
        self.gp.fit(self.X[ur], self.Y[ur])

        # Set acquisition function
        start_opt = time.time()

        acq = self.acq
        y_max = self.Y.max()

        #self.L=self.estimate_L(self.scalebounds)
        # select the acquisition function
        if acq['name'] == 'nei':
            self.L = self.estimate_L(self.scalebounds)
            self.acq_func = AcquisitionFunction(kind=self.acq, L=self.L)
        else:
            self.acq_func = AcquisitionFunction(self.acq)

            if acq['name'] == "ei_mu":
                #find the maximum in the predictive mean
                mu_acq = {}
                mu_acq['name'] = 'mu'
                mu_acq['dim'] = self.dim
                acq_mu = AcquisitionFunction(mu_acq)
                x_mu_max = acq_max(ac=acq_mu.acq_kind,
                                   gp=self.gp,
                                   y_max=y_max,
                                   bounds=self.scalebounds,
                                   opt_toolbox=self.opt_toolbox)
                # set y_max = mu_max
                y_max = acq_mu.acq_kind(x_mu_max, gp=self.gp, y_max=y_max)

        x_max = acq_max(ac=self.acq_func.acq_kind,
                        gp=self.gp,
                        y_max=y_max,
                        bounds=self.scalebounds,
                        opt_toolbox=self.opt_toolbox)

        val_acq = self.acq_func.acq_kind(x_max, self.gp, y_max)
        #print "alpha[x_max]={:.5f}".format(np.ravel(val_acq)[0])
        # check the value alpha(x_max)==0
        #if val_acq<0.0001:
        #self.stop_flag=1
        #return

        # select the optimization toolbox
        """      
        if self.opt=='nlopt':
            x_max,f_max = acq_max_nlopt(ac=self.acq_func.acq_kind,gp=self.gp,y_max=y_max,bounds=self.scalebounds)
        if self.opt=='scipy':
            
        if self.opt=='direct':
            x_max = acq_max_direct(ac=self.acq_func.acq_kind,gp=self.gp,y_max=y_max,bounds=self.scalebounds)
        """

        # record the optimization time
        finished_opt = time.time()
        elapse_opt = finished_opt - start_opt
        self.time_opt = np.hstack((self.time_opt, elapse_opt))

        # Test if x_max is repeated, if it is, draw another one at random
        if np.any((self.X - x_max).sum(axis=1) == 0):

            x_max = np.random.uniform(self.scalebounds[:, 0],
                                      self.scalebounds[:, 1],
                                      size=self.scalebounds.shape[0])

        # store X
        self.X = np.vstack((self.X, x_max.reshape((1, -1))))

        # compute X in original scale
        temp_X_new_original = x_max * self.max_min_gap + self.bounds[:, 0]
        self.X_original = np.vstack((self.X_original, temp_X_new_original))
        # evaluate Y using original X

        #self.Y = np.append(self.Y, self.f(temp_X_new_original))
        self.Y_original = np.append(self.Y_original,
                                    self.f(temp_X_new_original))

        # update Y after change Y_original
        self.Y = (self.Y_original - np.mean(self.Y_original)) / (
            np.max(self.Y_original) - np.min(self.Y_original))
    def maximize(self, gp_params, kappa=2):
        """
        Main optimization method.

        Input parameters
        ----------

        kappa: parameter for UCB acquisition only.

        gp_params: parameter for Gaussian Process

        Returns
        -------
        x: recommented point for evaluation
        """

        if self.acq['name'] == 'random':
            x_max = [
                np.random.uniform(x[0], x[1], size=1) for x in self.bounds
            ]
            x_max = np.asarray(x_max)
            x_max = x_max.T
            self.X_original = np.vstack((self.X_original, x_max))
            # evaluate Y using original X

            #self.Y = np.append(self.Y, self.f(temp_X_new_original))
            self.Y_original = np.append(self.Y_original, self.f(x_max))

            # update Y after change Y_original
            self.Y = (self.Y_original - np.mean(self.Y_original)) / (
                np.max(self.Y_original) - np.min(self.Y_original))

            self.time_opt = np.hstack((self.time_opt, 0))
            return

        # init a new Gaussian Process
        self.gp = PradaGaussianProcess(gp_params)

        # Find unique rows of X to avoid GP from breaking
        ur = unique_rows(self.X)
        self.gp.fit(self.X[ur], self.Y[ur])

        # Set acquisition function
        start_opt = time.time()

        acq = self.acq
        y_max = self.Y.max()

        #self.L=self.estimate_L(self.scalebounds)
        # select the acquisition function
        if acq['name'] == 'nei':
            self.L = self.estimate_L(self.scalebounds)
            self.acq_func = AcquisitionFunction(kind=self.acq, L=self.L)
        else:
            self.acq_func = AcquisitionFunction(self.acq)

            if acq['name'] == "ei_mu":
                #find the maximum in the predictive mean
                mu_acq = {}
                mu_acq['name'] = 'mu'
                mu_acq['dim'] = self.dim
                acq_mu = AcquisitionFunction(mu_acq)
                x_mu_max = acq_max(ac=acq_mu.acq_kind,
                                   gp=self.gp,
                                   y_max=y_max,
                                   bounds=self.scalebounds,
                                   opt_toolbox=self.opt_toolbox)
                # set y_max = mu_max
                y_max = acq_mu.acq_kind(x_mu_max, gp=self.gp, y_max=y_max)

        x_max = acq_max(ac=self.acq_func.acq_kind,
                        gp=self.gp,
                        y_max=y_max,
                        bounds=self.scalebounds,
                        opt_toolbox=self.opt_toolbox)

        val_acq = self.acq_func.acq_kind(x_max, self.gp, y_max)
        #print "alpha[x_max]={:.5f}".format(np.ravel(val_acq)[0])
        # check the value alpha(x_max)==0
        #if val_acq<0.0001:
        #self.stop_flag=1
        #return

        # select the optimization toolbox
        """      
        if self.opt=='nlopt':
            x_max,f_max = acq_max_nlopt(ac=self.acq_func.acq_kind,gp=self.gp,y_max=y_max,bounds=self.scalebounds)
        if self.opt=='scipy':
            
        if self.opt=='direct':
            x_max = acq_max_direct(ac=self.acq_func.acq_kind,gp=self.gp,y_max=y_max,bounds=self.scalebounds)
        """

        # record the optimization time
        finished_opt = time.time()
        elapse_opt = finished_opt - start_opt
        self.time_opt = np.hstack((self.time_opt, elapse_opt))

        # Test if x_max is repeated, if it is, draw another one at random
        if np.any((self.X - x_max).sum(axis=1) == 0):

            x_max = np.random.uniform(self.scalebounds[:, 0],
                                      self.scalebounds[:, 1],
                                      size=self.scalebounds.shape[0])

        # store X
        self.X = np.vstack((self.X, x_max.reshape((1, -1))))

        # compute X in original scale
        temp_X_new_original = x_max * self.max_min_gap + self.bounds[:, 0]
        self.X_original = np.vstack((self.X_original, temp_X_new_original))
        # evaluate Y using original X

        #self.Y = np.append(self.Y, self.f(temp_X_new_original))
        self.Y_original = np.append(self.Y_original,
                                    self.f(temp_X_new_original))

        # update Y after change Y_original
        self.Y = (self.Y_original - np.mean(self.Y_original)) / (
            np.max(self.Y_original) - np.min(self.Y_original))
def reduced_eval_consistent_bayesian_model(
    bayesian_model: mc_dropout.BayesianModule,
    acquisition_function: AcquisitionFunction,
    num_classes: int,
    k: int,
    initial_percentage: int,
    reduce_percentage: int,
    target_size: int,
    available_loader,
    device=None,
) -> SubsetEvalResults:
    """Performs a scoring step with k inference samples while reducing the dataset to at most min_remaining_percentage.

    Before computing anything at all the initial available dataset is randomly culled to initial_percentage.

    Every `chunk_size` inferences BALD is recomputed and the bottom `reduce_percentage` samples are dropped."""
    global reduced_eval_consistent_bayesian_model_cuda_chunk_size

    # TODO: ActiveLearningData should be renamed to be a more modular SplitDataset.
    # Here, we need to use available_dataset because it allows us to easily recover the original indices.

    # We start with all data in the acquired data.
    subset_split = active_learning_data.ActiveLearningData(
        available_loader.dataset)
    initial_length = len(available_loader.dataset)

    initial_split_length = initial_length * initial_percentage // 100

    # By acquiring [initial_split_length:], we make the tail unavailable.
    subset_split.acquire(torch.randperm(initial_length)[initial_split_length:])

    subset_dataloader = data.DataLoader(subset_split.available_dataset,
                                        shuffle=False,
                                        batch_size=available_loader.batch_size)

    print(f"Scoring subset of {len(subset_dataloader.dataset)} items:")

    # We're done with available_loader in this function.
    available_loader = None

    with torch.no_grad():
        B = len(subset_split.available_dataset)
        C = num_classes

        # We stay on the CPU.
        logits_B_K_C = None

        k_lower = 0
        torch_utils.gc_cuda()
        chunk_size = reduced_eval_consistent_bayesian_model_cuda_chunk_size if device.type == "cuda" else 32
        while k_lower < k:
            try:
                k_upper = min(k_lower + chunk_size, k)

                old_logit_B_K_C = logits_B_K_C
                # This also stays on the CPU.
                logits_B_K_C = torch.empty((B, k_upper, C),
                                           dtype=torch.float64)

                # Copy the old data over.
                if k_lower > 0:
                    logits_B_K_C[:, 0:k_lower, :].copy_(old_logit_B_K_C)
                    old_logit_B_K_C = None

                # This resets the dropout masks.
                bayesian_model.eval()

                for i, (batch, _) in enumerate(
                        with_progress_bar(
                            subset_dataloader,
                            unit_scale=subset_dataloader.batch_size)):
                    lower = i * subset_dataloader.batch_size
                    upper = min(lower + subset_dataloader.batch_size, B)

                    batch = batch.to(device)
                    # batch_size x ws x classes
                    mc_output_B_K_C = bayesian_model(batch, k_upper - k_lower)
                    logits_B_K_C[lower:upper, k_lower:k_upper].copy_(
                        mc_output_B_K_C.double(), non_blocking=True)

            except RuntimeError as exception:
                if torch_utils.should_reduce_batch_size(exception):
                    if chunk_size <= 1:
                        raise
                    chunk_size = chunk_size // 2
                    print(
                        f"New reduced_eval_consistent_bayesian_model_cuda_chunk_size={chunk_size} ({exception})"
                    )
                    reduced_eval_consistent_bayesian_model_cuda_chunk_size = chunk_size

                    torch_utils.gc_cuda()
                else:
                    raise
            else:
                if k_upper == k:
                    next_size = target_size
                elif k_upper < 50:
                    next_size = B
                else:
                    next_size = max(target_size,
                                    B * (100 - reduce_percentage) // 100)

                # Compute the score if it's needed: we are going to reduce the dataset or we're in the last iteration.
                if next_size < B or k_upper == k:
                    # Calculate the scores (mutual information) of logits_B_K_C
                    scores_B = acquisition_function.compute_scores(
                        logits_B_K_C,
                        available_loader=subset_dataloader,
                        device=device)
                else:
                    scores_B = None

                if next_size < B:
                    print("Reducing size", next_size)
                    # Get indices of samples sorted by increasing mutual information
                    sorted_indices = torch.argsort(scores_B, descending=True)
                    # Select next_size samples with smallest mutual information (ascending indices)
                    new_indices = torch.sort(sorted_indices[:next_size],
                                             descending=False)[0]

                    B = next_size
                    logits_B_K_C = logits_B_K_C[new_indices]
                    if k_upper == k:
                        logits_B_K_C = logits_B_K_C.clone().detach()
                    scores_B = scores_B[new_indices].clone().detach()

                    # Acquire all the low scorers
                    subset_split.acquire(sorted_indices[next_size:])

                k_lower += chunk_size

    return SubsetEvalResults(subset_split=subset_split,
                             subset_dataloader=subset_dataloader,
                             scores_B=scores_B,
                             logits_B_K_C=logits_B_K_C)
    def maximize_batch_CL_incremental(self, gp_params, B=5):
        """
        Finding a batch of points using Constant Liar approach
        
        Input Parameters
        ----------

        gp_params:          Parameters to be passed to the Gaussian Process class
        
        kappa:              constant value in UCB
              
        Returns
        -------
        X: a batch of [x_1..x_Nt]
        """

        self.NumPoints = np.append(self.NumPoints, B)

        if self.acq['name'] == 'random':
            x_max = [
                np.random.uniform(x[0], x[1], size=B) for x in self.bounds
            ]
            x_max = np.asarray(x_max)
            x_max = x_max.T
            self.X_original = np.vstack((self.X_original, x_max))
            # evaluate Y using original X

            #self.Y = np.append(self.Y, self.f(temp_X_new_original))
            self.Y_original = np.append(self.Y_original, self.f(x_max))

            # update Y after change Y_original
            self.Y = (self.Y_original - np.mean(self.Y_original)) / (
                np.max(self.Y_original) - np.min(self.Y_original))

            self.opt_time = np.hstack((self.opt_time, 0))
            return

        #const_liar=self.Y.mean()
        #const_liar=self.Y_original.min()
        #const_liar=self.Y.max()

        # Set acquisition function
        self.acq_func = AcquisitionFunction(self.acq)

        y_max = self.Y.max()

        # Set parameters if any was passed
        self.gp = PradaGaussianProcess(gp_params)

        # Find unique rows of X to avoid GP from breaking
        ur = unique_rows(self.X)
        self.gp.fit(self.X[ur], self.Y[ur])

        start_opt = time.time()

        # copy GP, X and Y
        temp_gp = copy.deepcopy(self.gp)
        temp_X = self.X
        temp_Y = self.Y
        #temp_Y_original=self.Y_original

        #store new_x
        new_X = []
        for ii in range(B):

            # Finding argmax of the acquisition function.
            x_max = acq_max(ac=self.acq_func.acq_kind,
                            gp=temp_gp,
                            y_max=y_max,
                            bounds=self.scalebounds)

            # Test if x_max is repeated, if it is, draw another one at random
            if np.any(
                    np.any(np.abs(self.X - x_max) < 0.02,
                           axis=1)):  # check if a data point is already taken
                x_max = np.random.uniform(self.scalebounds[:, 0],
                                          self.scalebounds[:, 1],
                                          size=self.scalebounds.shape[0])

            if ii == 0:
                new_X = x_max
            else:
                new_X = np.vstack((new_X, x_max.reshape((1, -1))))

            const_liar = temp_gp.predict(x_max, eval_MSE=true)

            #temp_X= np.vstack((temp_X, x_max.reshape((1, -1))))
            #temp_Y = np.append(temp_Y, const_liar )

            #temp_gp.fit(temp_X,temp_Y)

            # update the Gaussian Process and thus the acquisition function
            #temp_gp.compute_incremental_var(temp_X,x_max)
            temp_gp.fit_incremental(x_max, np.asarray([const_liar]))

        # Updating the GP.
        new_X = new_X.reshape((B, -1))

        finished_opt = time.time()
        elapse_opt = finished_opt - start_opt
        self.opt_time = np.hstack((self.opt_time, elapse_opt))

        #print new_X

        self.X = np.vstack((self.X, new_X))

        # convert back to original scale
        temp_X_new_original = [
            val * self.max_min_gap + self.bounds[:, 0]
            for idx, val in enumerate(new_X)
        ]
        temp_X_new_original = np.asarray(temp_X_new_original)
        self.X_original = np.vstack((self.X_original, temp_X_new_original))

        for idx, val in enumerate(temp_X_new_original):
            self.Y_original = np.append(self.Y_original, self.f(val))

        # update Y after change Y_original
        self.Y = (self.Y_original - np.mean(self.Y_original)) / (
            np.max(self.Y_original) - np.min(self.Y_original))
    def maximize_batch_BUCB(self,gp_params, B=5,kappa=2):
        """
        Finding a batch of points using GP-BUCB approach
        
        Input Parameters
        ----------

        gp_params:          Parameters to be passed to the Gaussian Process class
        
        B:                  fixed batch size for all iteration
        
        kappa:              constant value in UCB
        
        IsPlot:             flag variable for visualization    
        
        
        Returns
        -------
        X: a batch of [x_1..x_B]
        """
        
        self.B=B
                
        # Set acquisition function
        self.acq_func = AcquisitionFunction(kind=self.acq, kappa=kappa)
        

               
        # Set parameters if any was passed
        self.gp=PradaGaussianProcess(gp_params)
        
        if len(self.gp.KK_x_x_inv)==0: # check if empty
            self.gp.fit(self.X, self.Y)
        #else:
            #self.gp.fit_incremental(self.X[ur], self.Y[ur])
        
        start_gmm_opt=time.time()
        # generate samples from Acquisition function
        
        y_max=self.gp.Y.max()
        # check the bound 0-1 or original bound        
        temp_X=self.X
        temp_gp=self.gp  
        temp_gp.X_bucb=temp_X
        temp_gp.KK_x_x_inv_bucb=self.gp.KK_x_x_inv
        
        # finding new X
        new_X=[]
        for ii in range(B):
            # Finding argmax of the acquisition function.
            x_max = acq_max(ac=self.acq_func.acq_kind, gp=temp_gp, y_max=y_max, bounds=self.scalebounds)
                     

            if np.any((temp_X - x_max).sum(axis=1) == 0) | np.isnan(x_max.sum()):
                x_max = np.random.uniform(self.scalebounds[:, 0],
                                          self.scalebounds[:, 1],
                                          size=self.scalebounds.shape[0])
                                          
            if ii==0:
                new_X=x_max
            else:
                new_X= np.vstack((new_X, x_max.reshape((1, -1))))
                            
            # update the Gaussian Process and thus the acquisition function                         
            temp_gp.compute_incremental_var(temp_X,x_max)

            temp_X = np.vstack((temp_X, x_max.reshape((1, -1))))
            temp_gp.X_bucb=temp_X
        
        
        # record the optimization time
        finished_gmm_opt=time.time()
        elapse_gmm_opt=finished_gmm_opt-start_gmm_opt
        
        self.opt_time=np.hstack((self.opt_time,elapse_gmm_opt))

        self.NumPoints=np.append(self.NumPoints,B)


        self.X=temp_X
                    
        # convert back to original scale
        temp_X_new_original=[val*self.max_min_gap+self.bounds[:,0] for idx, val in enumerate(new_X)]
        temp_X_new_original=np.asarray(temp_X_new_original)
        self.X_original=np.vstack((self.X_original, temp_X_new_original))
        
        # evaluate y=f(x)
        temp=self.f(temp_X_new_original)
        temp=np.reshape(temp,(-1,1))
        self.Y=np.append(self.Y,temp)
            
        print "#Batch={:d} f_max={:.4f}".format(new_X.shape[0],self.Y.max())               
예제 #20
0
    def maximize(self,gp_params):
        """
        Main optimization method.

        Input parameters
        ----------
        gp_params: parameter for Gaussian Process

        Returns
        -------
        x: recommented point for evaluation
        """

        if self.stop_flag==1:
            return
            
        if self.acq['name']=='random':
            x_max = [np.random.uniform(x[0], x[1], size=1) for x in self.scalebounds]
            x_max=np.asarray(x_max)
            x_max=x_max.T
            self.X_original=np.vstack((self.X_original, x_max))
            # evaluate Y using original X
            
            self.Y_original = np.append(self.Y_original, self.f(x_max))
            
            # update Y after change Y_original
            self.Y=(self.Y_original-np.mean(self.Y_original))/np.std(self.Y_original)
            
            self.time_opt=np.hstack((self.time_opt,0))
            return         

        # init a new Gaussian Process
        self.gp=PradaGaussianProcess(gp_params)
        if self.gp.KK_x_x_inv ==[]:
            # Find unique rows of X to avoid GP from breaking
            ur = unique_rows(self.X)
            self.gp.fit(self.X[ur], self.Y[ur])

 
        acq=self.acq

        if acq['debug']==1:
            logmarginal=self.gp.log_marginal_lengthscale(gp_params['theta'],gp_params['noise_delta'])
            print(gp_params['theta'])
            print("log marginal before optimizing ={:.4f}".format(logmarginal))
            self.logmarginal=logmarginal
                
            if logmarginal<-999999:
                logmarginal=self.gp.log_marginal_lengthscale(gp_params['theta'],gp_params['noise_delta'])

        if self.optimize_gp==1 and len(self.Y)%2*self.dim==0 and len(self.Y)>5*self.dim:

            print("Initial length scale={}".format(gp_params['theta']))
            newtheta = self.gp.optimize_lengthscale(gp_params['theta'],gp_params['noise_delta'],self.scalebounds)
            gp_params['theta']=newtheta
            print("New length scale={}".format(gp_params['theta']))

            # init a new Gaussian Process after optimizing hyper-parameter
            self.gp=PradaGaussianProcess(gp_params)
            # Find unique rows of X to avoid GP from breaking
            ur = unique_rows(self.X)
            self.gp.fit(self.X[ur], self.Y[ur])

            
        # Modify search space based on selected method
        if self.expandSS=='expandBoundsDDB_MAP':
			self.expandBoundsDDB_MAP()
        if self.expandSS=='expandBoundsDDB_FB':
			self.expandBoundsDDB_FB()            
        if self.expandSS=='expandBoundsFiltering':
            self.expandBoundsFiltering()
        if self.expandSS=='volumeDoubling' and len(self.Y)%3*self.dim==0:
            self.volumeDoubling()
        # Prevent bounds from breaching maximum limit
        for d in range(0,self.dim):
            if self.scalebounds[d,0]<0:
                print('Lower bound of {} in dimention {} exceeded minimum bound of {}. Scaling up.'.format(self.scalebounds[d,0],d,0))
                self.scalebounds[d,0]=0
                print('bound set to {}'.format(self.scalebounds))
            if self.scalebounds[d,1]>max_bound_size:
                print('Upper bound of {} in dimention {} exceeded maximum bound of {}. Scaling down.'.format(self.scalebounds[d,1],d,max_bound_size))
                self.scalebounds[d,1]=max_bound_size
                self.scalebounds[d,0]=min(self.scalebounds[d,0],self.scalebounds[d,1]-np.sqrt(3*self.gp.lengthscale))
                print('bound set to {}'.format(self.scalebounds))
        
        # Set acquisition function
        start_opt=time.time()

        y_max = self.Y.max()
        
        if acq['name'] in ['consensus','mes']: 
            ucb_acq_func={}
            ucb_acq_func['name']='ucb'
            ucb_acq_func['kappa']=np.log(len(self.Y))
            ucb_acq_func['dim']=self.dim
            ucb_acq_func['scalebounds']=self.scalebounds
        
            myacq=AcquisitionFunction(ucb_acq_func)
            xt_ucb = acq_max(ac=myacq.acq_kind,gp=self.gp,y_max=y_max,bounds=self.scalebounds)
            
            xstars=[]
            xstars.append(xt_ucb)
            
            ei_acq_func={}
            ei_acq_func['name']='ei'
            ei_acq_func['dim']=self.dim
            ei_acq_func['scalebounds']=self.scalebounds
        
            myacq=AcquisitionFunction(ei_acq_func)
            xt_ei = acq_max(ac=myacq.acq_kind,gp=self.gp,y_max=y_max,bounds=self.scalebounds)
            xstars.append(xt_ei)
                 
            
            pes_acq_func={}
            pes_acq_func['name']='pes'
            pes_acq_func['dim']=self.dim
            pes_acq_func['scalebounds']=self.scalebounds
        
            myacq=AcquisitionFunction(pes_acq_func)
            xt_pes = acq_max(ac=myacq.acq_kind,gp=self.gp,y_max=y_max,bounds=self.scalebounds)
            xstars.append(xt_pes)
            
            
            self.xstars=xstars            
            
        if acq['name']=='vrs':
            print("please call the maximize_vrs function")
            return
                      
        if 'xstars' not in globals():
            xstars=[]
            
        self.xstars=xstars

        self.acq['xstars']=xstars
        self.acq['WW']=False
        self.acq['WW_dim']=False
        self.acq_func = AcquisitionFunction(self.acq,self.bb_function)

        if acq['name']=="ei_mu":
            #find the maximum in the predictive mean
            mu_acq={}
            mu_acq['name']='mu'
            mu_acq['dim']=self.dim
            acq_mu=AcquisitionFunction(mu_acq)
            x_mu_max = acq_max(ac=acq_mu.acq_kind,gp=self.gp,y_max=y_max,bounds=self.scalebounds,opt_toolbox=self.opt_toolbox)
            # set y_max = mu_max
            y_max=acq_mu.acq_kind(x_mu_max,gp=self.gp, y_max=y_max)

        
        x_max = acq_max(ac=self.acq_func.acq_kind,gp=self.gp,y_max=y_max,bounds=self.scalebounds,opt_toolbox=self.opt_toolbox,seeds=self.xstars)

        if acq['name']=='consensus' and acq['debug']==1: # plot the x_max and xstars
            fig=plt.figure(figsize=(5, 5))

            plt.scatter(xt_ucb[0],xt_ucb[1],marker='s',color='g',s=200,label='Peak')
            plt.scatter(xt_ei[0],xt_ei[1],marker='s',color='k',s=200,label='Peak')
            plt.scatter(x_max[0],x_max[1],marker='*',color='r',s=300,label='Peak')
            plt.xlim(0,1)
            plt.ylim(0,1)
            strFileName="acquisition_functions_debug.eps"
            fig.savefig(strFileName, bbox_inches='tight')

        if acq['name']=='vrs' and acq['debug']==1: # plot the x_max and xstars
            fig=plt.figure(figsize=(5, 5))

            plt.scatter(xt_ucb[0],xt_ucb[1],marker='s',color='g',s=200,label='Peak')
            plt.scatter(xt_ei[0],xt_ei[1],marker='s',color='k',s=200,label='Peak')
            plt.scatter(x_max[0],x_max[1],marker='*',color='r',s=300,label='Peak')
            plt.xlim(0,1)
            plt.ylim(0,1)
            strFileName="vrs_acquisition_functions_debug.eps"
            #fig.savefig(strFileName, bbox_inches='tight')
            
            
        val_acq=self.acq_func.acq_kind(x_max,self.gp,y_max)
        #print x_max
        #print val_acq
        if self.stopping_criteria!=0 and val_acq<self.stopping_criteria:
            val_acq=self.acq_func.acq_kind(x_max,self.gp,y_max)

            self.stop_flag=1
            print("Stopping Criteria is violated. Stopping Criteria is {:.15f}".format(self.stopping_criteria))
        
        
        self.alpha_Xt= np.append(self.alpha_Xt,val_acq)
        
        mean,var=self.gp.predict(x_max, eval_MSE=True)
        var.flags['WRITEABLE']=True
        var[var<1e-20]=0
        #self.Tau_Xt= np.append(self.Tau_Xt,val_acq/var)
       
        # record the optimization time
        finished_opt=time.time()
        elapse_opt=finished_opt-start_opt
        self.time_opt=np.hstack((self.time_opt,elapse_opt))
        
        # store X                                     
        self.X = np.vstack((self.X, x_max.reshape((1, -1))))

        # evaluate Y using original X
        self.Y_original = np.append(self.Y_original, self.f(x_max))
        
        # update Y after change Y_original
        self.Y=(self.Y_original-np.mean(self.Y_original))/np.std(self.Y_original)
        
        if self.gp.flagIncremental==1:
            self.gp.fit_incremental(x_max,self.Y[-1])
#        if (self.acq['name']=='ei_regularizerH') or (self.acq['name']=='ei_regularizerQ'):
#            self.scalebounds[:,0]=self.scalebounds[:,0]+1
#            self.scalebounds[:,1]=self.scalebounds[:,1]-1
#        self.acq['scalebounds']=self.scalebounds
        self.experiment_num=self.experiment_num+1
예제 #21
0
class PradaBayOptFn(object):

    def __init__(self, gp_params, func_params, acq_params, experiment_num, seed):
        """      
        Input parameters
        ----------
        
        gp_params:                  GP parameters
        gp_params.theta:            to compute the kernel
        gp_params.delta:            to compute the kernel
        
        func_params:                function to optimize
        func_params.init bound:     initial bounds for parameters
        func_params.bounds:        bounds on parameters        
        func_params.func:           a function to be optimized
        
        
        acq_params:            acquisition function, 
        acq_params.acq_func['name']=['ei','ucb','poi','lei']
                            ,acq['kappa'] for ucb, acq['k'] for lei
        acq_params.opt_toolbox:     optimization toolbox 'nlopt','direct','scipy'
        
        experiment_num: the interation of the GP method. Used to make sure each 
                        independant stage of the experiment uses different 
                        initial conditions
        seed: Variable used as part of a seed to generate random initial points
                            
        Returns
        -------
        dim:            dimension
        scalebounds:    bound used thoughout the BO algorithm
        time_opt:       will record the time spent on optimization
        gp:             Gaussian Process object
        """

        self.experiment_num=experiment_num
        self.seed=seed
        np.random.seed(self.experiment_num*self.seed)
        
        # Prior distribution paramaters for the DDB method
        self.alpha=2
        self.beta=4
        
        # Find number of parameters
        bounds=func_params['bounds']
        if 'init_bounds' not in func_params:
            init_bounds=bounds
        else:
            init_bounds=func_params['init_bounds']
        # Find input dimention
        self.dim = len(bounds)
        self.radius=np.ones([self.dim,1])

        # Generate bound array
        scalebounds=np.array([np.zeros(self.dim), np.ones(self.dim)])
        self.scalebounds=scalebounds.T
        
        # find function to be optimized
        self.f = func_params['f']

        # acquisition function type
        
        self.acq=acq_params['acq_func']
        
        # Check if the search space is to be modified
        self.bb_function=acq_params["bb_function"]
        if 'expandSS' not in acq_params:
            self.expandSS=0
        else:                
            self.expandSS=acq_params['expandSS']
        # Check if the bound is to be set randomly. If so, shift the bound by a random amount
        if (acq_params['random_initial_bound']==1):
            randomizer=np.random.rand(self.dim)*max_bound_size
            for d in range(0,self.dim):
                self.scalebounds[d]=self.scalebounds[d]+randomizer[d]
        # Other checks
        if 'debug' not in self.acq:
            self.acq['debug']=0           
        if 'stopping' not in acq_params:
            self.stopping_criteria=0
        else:
            self.stopping_criteria=acq_params['stopping']
        if 'optimize_gp' not in acq_params:
            self.optimize_gp=0
        else:                
            self.optimize_gp=acq_params['optimize_gp']
        if 'marginalize_gp' not in acq_params:
            self.marginalize_gp=0
        else:                
            self.marginalize_gp=acq_params['marginalize_gp']
        
        # optimization toolbox
        if 'opt_toolbox' not in acq_params:
            if self.acq['name']=='ei_reg':
                self.opt_toolbox='unbounded'
            else:
                self.opt_toolbox='scipy'
        else:
            self.opt_toolbox=acq_params['opt_toolbox']
        self.iteration_factor=acq_params['iteration_factor']
        # store X in original scale
        self.X_original= None

        # store X in 0-1 scale
        self.X = None
        
        # store y=f(x)
        # (y - mean)/(max-min)
        self.Y = None
               
        # y original scale
        self.Y_original = None
        
        # value of the acquisition function at the selected point
        self.alpha_Xt=None
        self.Tau_Xt=None
        
        self.time_opt=0

        self.k_Neighbor=2
        
        # Gaussian Process class
        self.gp=PradaGaussianProcess(gp_params)
        self.gp_params=gp_params

        # acquisition function
        self.acq_func = None
    
        # stop condition
        self.stop_flag=0
        self.logmarginal=0
        
        # xt_suggestion, caching for Consensus
        self.xstars=[]
        self.ystars=np.zeros((2,1))
        
        # theta vector for marginalization GP
        self.theta_vector =[]
    
    def init(self,gp_params, n_init_points=3):
        """      
        Input parameters
        ----------
        gp_params:            Gaussian Process structure      
        n_init_points:        # init points
        """
        # set seed to allow for reproducible results
        np.random.seed(self.experiment_num*self.seed)
        print(self.experiment_num)
        #Generate initial points on grid
        l=np.zeros([n_init_points,self.dim])
        bound_length=self.scalebounds[0,1]-self.scalebounds[0,0]
        for d in range(0,self.dim):
            l[:,d]=lhs(n_init_points)[:,0]
        self.X=np.asarray(l)+self.scalebounds[:,0]         
        self.X=self.X*bound_length #initial inouts
        print("starting points={}".format(self.X))
        print("starting bounds={}".format(self.scalebounds))
        y_init=self.f(self.X)
        y_init=np.reshape(y_init,(n_init_points,1))
        self.Y_original = np.asarray(y_init)     #initial outputs   
        self.Y=(self.Y_original-np.mean(self.Y_original))/np.std(self.Y_original) #outputs normalised
        print("starting Y values={}".format(self.Y))

            
        #############Rename#############
    def radiusPDF(self,r,alpha,beta,b,ymax,a):
        """
        Description: Evaluates the posterior distribution for our DDB method
        Input parameters
        ----------
        r:            radius to be evaluated   
        alpha:        # gamma distribution shape paramater
        beta:         # gamma distribution rate paramater
        a:            # log-logistic distribution scale paramater
        b:            # log-logistic distribution rate paramater with y_max
        y_max:            # log-logistic distribution rate paramater with b

        Output: posterior distribution evaluated at r
        """
        gamma=stats.gamma.pdf(r,alpha,scale=1/beta)
        loglog=stats.fisk.pdf(r,ymax/b,scale=a)
        P=gamma*loglog
        return -P
    def sufficientBoundPDF(self,r,bDivYmax,a):
        """
        Description: Evaluates the likelihood distribution for our DDB method
        Input parameters
        ----------
        r:            radius to be evaluated   
        a:            # log-logistic distribution scale paramater
        bDivYmax:            # log-logistic distribution rate paramater
       
        Output: likelihood distribution evaluated at r
        """
        P=stats.fisk.cdf(r,bDivYmax,scale=a)
        return P
    
    def expandBoundsDDB_MAP(self):
        """
        Description: Expands the search space with the MAP implementation of
        our DDB method

        """
        
        print('Attempting to expand search space with DDB-MAP method')
        alpha=self.alpha
        beta=self.beta
        bound_samples=100    # Number of radius sample to fit the log-logistic distribution
        # Find y^+ and x^+
        ymax=np.max(self.Y)
        # Generate test radii
        max_loc=np.argmax(self.Y)
        xmax=self.X[max_loc]
        test_bound=np.zeros(self.scalebounds.shape)
        bound_dist=np.zeros(bound_samples)
        bound_center=xmax
        test_bound[:,1]=bound_center+0.5
        test_bound[:,0]=bound_center-0.5
        max_radius=np.max(np.array([np.max(max_bound_size-test_bound[:,1]),np.max(test_bound[:,0])]))
        step=max_radius/bound_samples
        packing_number=np.zeros(bound_samples)
        # Generate a Thompson sample maxima to estimate internal maxima
        TS=AcquisitionFunction.ThompsonSampling(self.gp)
        tsb_x,tsb_y=acq_max_global(TS, self.gp, bounds=self.scalebounds)
        # Generate Gumbel samples to estimate the external maxima
        for i in range(0,bound_samples):
            bound_length=test_bound[:,1]-test_bound[:,0]
            volume=np.power(max_bound_size,self.dim)-np.prod(bound_length)
            packing_number[i]=round(volume/(5*self.gp.lengthscale))
            mu=stats.norm.ppf(1.0-1.0/packing_number[i])
            sigma=stats.norm.ppf(1.0-(1.0/packing_number[i])*np.exp(-1.0))-stats.norm.ppf(1.0-(1.0/(packing_number[i])))
            bound_dist[i]=np.exp(-np.exp(-(-tsb_y-mu)/sigma))
            test_bound[:,1]=test_bound[:,1]+step
            test_bound[:,0]=test_bound[:,0]-step
        bound_dist[np.isnan(bound_dist)]=1
        # Fit the log-logistic paramaters to the Gumbel samples
        xfit=np.arange(0,max_radius,max_radius/100)
        popt,pcov=optimize.curve_fit(self.sufficientBoundPDF,xfit[0:100],bound_dist,bounds=np.array([[5,1.1],[20,5]]))
        print("popt={}".format(popt))
        b=ymax/popt[0]
        a=popt[1]
        print("b={}, ymax={}".format(b,ymax))
        # Find the gamma and log-logistic modes to determine the optimisation bound
        c=ymax/b
        loglog_mode=a*np.power((c-1.0)/(c+1.0),(1/c))
        gamma_mode=(alpha-1)/beta
        opt_bound=np.ones([2])
        opt_bound[0]=min(loglog_mode,gamma_mode)
        opt_bound[1]=max(loglog_mode,gamma_mode)
        bound_range=(opt_bound[1]-opt_bound[0])
        # Find MAP Estimate of radius r
        for d in range(0,self.dim):
            r_max=0
            p_max=0
            for x0 in np.arange(opt_bound[0],opt_bound[1],bound_range/10):
                res=optimize.minimize(lambda x: self.radiusPDF(x,alpha,beta,b,ymax,a),x0=x0, bounds=np.array([opt_bound]), method='L-BFGS-B')
                if -res.fun>p_max:
                    r_max=res.x
                    p_max=-res.fun
            if r_max>opt_bound[1]:
                r_max=opt_bound[1]
            xplot=np.arange(0,10,0.01)
            yplot=-self.radiusPDF(xplot,alpha,beta,b,ymax,a)
            max_loc=np.argmax(yplot)

            print("optimal radius of {} with unscaled probability of {}".format(r_max,p_max))
            self.scalebounds[d,1]=xmax[d]+r_max
            self.scalebounds[d,0]=xmax[d]-r_max
        print("seach space extended to {} with DDB".format(self.scalebounds))
        
    def expandBoundsDDB_FB(self):
        """
        Description: Expands the search space with the full Bayesian 
        implementation of our DDB method

        """
        print('Attempting to expand search space with DDB-FB method')
        alpha=self.alpha
        beta=self.beta
        bound_samples=100    # Number of radius sample to fit the log-logistic distribution
        # Find y^+ and x^+
        ymax=np.max(self.Y)
        # Generate test radii
        max_loc=np.argmax(self.Y)
        xmax=self.X[max_loc]
        test_bound=np.zeros(self.scalebounds.shape)
        bound_dist=np.zeros(bound_samples)
        bound_center=xmax
        test_bound[:,1]=bound_center+0.5
        test_bound[:,0]=bound_center-0.5
        max_radius=np.max(np.array([np.max(max_bound_size-test_bound[:,1]),np.max(test_bound[:,0])]))
        step=max_radius/bound_samples
        packing_number=np.zeros(bound_samples)
        # Generate a Thompson sample maxima to estimate internal maxima
        TS=AcquisitionFunction.ThompsonSampling(self.gp)
        tsb_x,tsb_y=acq_max_global(TS, self.gp, bounds=self.scalebounds)
        # Generate Gumbel samples to estimate the external maxima
        for i in range(0,bound_samples):
            bound_length=test_bound[:,1]-test_bound[:,0]
            volume=np.power(max_bound_size,self.dim)-np.prod(bound_length)
            packing_number[i]=round(volume/(5*self.gp.lengthscale))
            mu=stats.norm.ppf(1.0-1.0/packing_number[i])
            sigma=stats.norm.ppf(1.0-(1.0/packing_number[i])*np.exp(-1.0))-stats.norm.ppf(1.0-(1.0/(packing_number[i])))
            bound_dist[i]=np.exp(-np.exp(-(-tsb_y-mu)/sigma))
            test_bound[:,1]=test_bound[:,1]+step
            test_bound[:,0]=test_bound[:,0]-step
        bound_dist[np.isnan(bound_dist)]=1
        # Fit the log-logistic paramaters to the Gumbel samples
        xfit=np.arange(0,max_radius,max_radius/100)
        popt,pcov=optimize.curve_fit(self.sufficientBoundPDF,xfit[0:100],bound_dist,bounds=np.array([[5,1.1],[20,5]]))
        print("popt={}".format(popt))
        b=ymax/popt[0]
        a=popt[1]
        print("b={}, ymax={}".format(b,ymax))
        # Sample for the optimal radius
        for d in range(0,self.dim):
            gamma=np.random.gamma(shape=alpha,scale=1/beta,size=100)
            loglog=stats.fisk.pdf(gamma,ymax/b,scale=a)
            scaled_weights=loglog/np.sum(loglog)
            multi=np.random.multinomial(1,scaled_weights)
            r_index=np.argmax(multi)
            print("Radius of {} selected".format(gamma[r_index]))
            self.scalebounds[d,1]=xmax[d]+gamma[r_index]
            self.scalebounds[d,0]=xmax[d]-gamma[r_index]

        print("seach space extended to {} with DDB".format(self.scalebounds))
        
                            
    def lcb(self,x, gp):
        """
        Calculates the GP-LCB acquisition function values
        Inputs: gp: The Gaussian process, also contains all data
                x:The point at which to evaluate the acquisition function 
        Output: acq_value: The value of the aquisition function at point x
        """
        mean, var = gp.predict(x, eval_MSE=True)
        var.flags['WRITEABLE']=True
        var[var<1e-10]=0 #prevents negative variances obtained through comp errors
        mean=np.atleast_2d(mean).T
        var=np.atleast_2d(var).T                
        beta=2*np.log(len(gp.Y)*np.square((self.experiment_num+1)*math.pi)/(6*0.9))  
        return mean - np.sqrt(beta) * np.sqrt(var) 
    
    def ucb(self,x, gp):
        """
        Calculates the GP-UCB acquisition function values
        Inputs: gp: The Gaussian process, also contains all data
                x:The point at which to evaluate the acquisition function 
        Output: acq_value: The value of the aquisition function at point x
        """
        mean, var = gp.predict(x, eval_MSE=True)
        var.flags['WRITEABLE']=True
        var[var<1e-10]=0 #prevents negative variances obtained through comp errors
        mean=np.atleast_2d(mean).T
        var=np.atleast_2d(var).T   
        beta=2*np.log(len(gp.Y)*np.square(self.experiment_num*math.pi)/(6*0.9))         
        return mean + np.sqrt(beta) * np.sqrt(var)
                      
    def expandBoundsFiltering(self):
        """
        Description: Expands the search space with filtering Bayesian
        optimisation (FBO) by Nguyen et al.
        """
        step=0.1*self.gp.lengthscale
        print('Attempting to expand search space with FBO method')
        # Determine the unfiltered extension based on the iteration number
        extended_bound=np.copy(self.scalebounds)
        extention=math.pow(self.iteration_factor/(max([self.experiment_num,1])),(1/self.dim))
        old_radius=(extended_bound[:,1]-extended_bound[:,0])/2
        mid_point=extended_bound[:,0]+old_radius
        new_radius=old_radius*extention
        extended_bound[:,1]=mid_point+new_radius
        extended_bound[:,0]=mid_point-new_radius
        # Calculate the global maximum lower confidence bound
        lcb_x,lcb_y=acq_max_global(self.lcb, self.gp, extended_bound)
        # Filter the lower and upper boundary up to the unfiltered extension
        for d in range(0,self.dim):
            #Upper bound
            x_boundry=np.max(self.X[d],axis=0)
            x_boundry_index=np.argmax(self.X[d],axis=0)
            xb=self.X[x_boundry_index]
            ucb_y=self.ucb(self.X[x_boundry_index],self.gp)
            while(((ucb_y>lcb_y)&(x_boundry<extended_bound[d,1]))|(x_boundry<self.scalebounds[d,1])):
                x_boundry=x_boundry+step
                xb[d]=xb[d]+step
                ucb_y=self.ucb(xb,self.gp)
            extended_bound[d,1]=x_boundry
            #Lower bound
            x_boundry=np.min(self.X[d],axis=0)
            ucb_y=self.ucb(self.X[x_boundry_index],self.gp)
            while(((ucb_y>lcb_y)&(x_boundry>extended_bound[d,0]))|(x_boundry>self.scalebounds[d,0])):
                x_boundry=x_boundry-step
                xb[d]=xb[d]-step
                ucb_y=self.ucb(xb,self.gp)
            extended_bound[d,0]=x_boundry
                
            self.scalebounds=extended_bound

        print("seach space extended to {}".format(self.scalebounds))
        
    def volumeDoubling(self):
        """
        Description: Expands the search space with the volume doubling method
        by Shahriari et al
        """
        print('Attempting to expand search space with volume doubling method')
        extended_bound=np.copy(self.scalebounds)
        old_radius=(extended_bound[:,1]-extended_bound[:,0])/2
        volume=np.power(2*old_radius,self.dim)
        mid_point=extended_bound[:,0]+old_radius
        new_radius=np.power(2*volume,1/self.dim)/2      
        extended_bound[:,0]=mid_point-new_radius
        extended_bound[:,1]=mid_point+new_radius
        self.scalebounds=extended_bound

        print("seach space extended to {}".format(self.scalebounds))
        
    def maximize(self,gp_params):
        """
        Main optimization method.

        Input parameters
        ----------
        gp_params: parameter for Gaussian Process

        Returns
        -------
        x: recommented point for evaluation
        """

        if self.stop_flag==1:
            return
            
        if self.acq['name']=='random':
            x_max = [np.random.uniform(x[0], x[1], size=1) for x in self.scalebounds]
            x_max=np.asarray(x_max)
            x_max=x_max.T
            self.X_original=np.vstack((self.X_original, x_max))
            # evaluate Y using original X
            
            self.Y_original = np.append(self.Y_original, self.f(x_max))
            
            # update Y after change Y_original
            self.Y=(self.Y_original-np.mean(self.Y_original))/np.std(self.Y_original)
            
            self.time_opt=np.hstack((self.time_opt,0))
            return         

        # init a new Gaussian Process
        self.gp=PradaGaussianProcess(gp_params)
        if self.gp.KK_x_x_inv ==[]:
            # Find unique rows of X to avoid GP from breaking
            ur = unique_rows(self.X)
            self.gp.fit(self.X[ur], self.Y[ur])

 
        acq=self.acq

        if acq['debug']==1:
            logmarginal=self.gp.log_marginal_lengthscale(gp_params['theta'],gp_params['noise_delta'])
            print(gp_params['theta'])
            print("log marginal before optimizing ={:.4f}".format(logmarginal))
            self.logmarginal=logmarginal
                
            if logmarginal<-999999:
                logmarginal=self.gp.log_marginal_lengthscale(gp_params['theta'],gp_params['noise_delta'])

        if self.optimize_gp==1 and len(self.Y)%2*self.dim==0 and len(self.Y)>5*self.dim:

            print("Initial length scale={}".format(gp_params['theta']))
            newtheta = self.gp.optimize_lengthscale(gp_params['theta'],gp_params['noise_delta'],self.scalebounds)
            gp_params['theta']=newtheta
            print("New length scale={}".format(gp_params['theta']))

            # init a new Gaussian Process after optimizing hyper-parameter
            self.gp=PradaGaussianProcess(gp_params)
            # Find unique rows of X to avoid GP from breaking
            ur = unique_rows(self.X)
            self.gp.fit(self.X[ur], self.Y[ur])

            
        # Modify search space based on selected method
        if self.expandSS=='expandBoundsDDB_MAP':
			self.expandBoundsDDB_MAP()
        if self.expandSS=='expandBoundsDDB_FB':
			self.expandBoundsDDB_FB()            
        if self.expandSS=='expandBoundsFiltering':
            self.expandBoundsFiltering()
        if self.expandSS=='volumeDoubling' and len(self.Y)%3*self.dim==0:
            self.volumeDoubling()
        # Prevent bounds from breaching maximum limit
        for d in range(0,self.dim):
            if self.scalebounds[d,0]<0:
                print('Lower bound of {} in dimention {} exceeded minimum bound of {}. Scaling up.'.format(self.scalebounds[d,0],d,0))
                self.scalebounds[d,0]=0
                print('bound set to {}'.format(self.scalebounds))
            if self.scalebounds[d,1]>max_bound_size:
                print('Upper bound of {} in dimention {} exceeded maximum bound of {}. Scaling down.'.format(self.scalebounds[d,1],d,max_bound_size))
                self.scalebounds[d,1]=max_bound_size
                self.scalebounds[d,0]=min(self.scalebounds[d,0],self.scalebounds[d,1]-np.sqrt(3*self.gp.lengthscale))
                print('bound set to {}'.format(self.scalebounds))
        
        # Set acquisition function
        start_opt=time.time()

        y_max = self.Y.max()
        
        if acq['name'] in ['consensus','mes']: 
            ucb_acq_func={}
            ucb_acq_func['name']='ucb'
            ucb_acq_func['kappa']=np.log(len(self.Y))
            ucb_acq_func['dim']=self.dim
            ucb_acq_func['scalebounds']=self.scalebounds
        
            myacq=AcquisitionFunction(ucb_acq_func)
            xt_ucb = acq_max(ac=myacq.acq_kind,gp=self.gp,y_max=y_max,bounds=self.scalebounds)
            
            xstars=[]
            xstars.append(xt_ucb)
            
            ei_acq_func={}
            ei_acq_func['name']='ei'
            ei_acq_func['dim']=self.dim
            ei_acq_func['scalebounds']=self.scalebounds
        
            myacq=AcquisitionFunction(ei_acq_func)
            xt_ei = acq_max(ac=myacq.acq_kind,gp=self.gp,y_max=y_max,bounds=self.scalebounds)
            xstars.append(xt_ei)
                 
            
            pes_acq_func={}
            pes_acq_func['name']='pes'
            pes_acq_func['dim']=self.dim
            pes_acq_func['scalebounds']=self.scalebounds
        
            myacq=AcquisitionFunction(pes_acq_func)
            xt_pes = acq_max(ac=myacq.acq_kind,gp=self.gp,y_max=y_max,bounds=self.scalebounds)
            xstars.append(xt_pes)
            
            
            self.xstars=xstars            
            
        if acq['name']=='vrs':
            print("please call the maximize_vrs function")
            return
                      
        if 'xstars' not in globals():
            xstars=[]
            
        self.xstars=xstars

        self.acq['xstars']=xstars
        self.acq['WW']=False
        self.acq['WW_dim']=False
        self.acq_func = AcquisitionFunction(self.acq,self.bb_function)

        if acq['name']=="ei_mu":
            #find the maximum in the predictive mean
            mu_acq={}
            mu_acq['name']='mu'
            mu_acq['dim']=self.dim
            acq_mu=AcquisitionFunction(mu_acq)
            x_mu_max = acq_max(ac=acq_mu.acq_kind,gp=self.gp,y_max=y_max,bounds=self.scalebounds,opt_toolbox=self.opt_toolbox)
            # set y_max = mu_max
            y_max=acq_mu.acq_kind(x_mu_max,gp=self.gp, y_max=y_max)

        
        x_max = acq_max(ac=self.acq_func.acq_kind,gp=self.gp,y_max=y_max,bounds=self.scalebounds,opt_toolbox=self.opt_toolbox,seeds=self.xstars)

        if acq['name']=='consensus' and acq['debug']==1: # plot the x_max and xstars
            fig=plt.figure(figsize=(5, 5))

            plt.scatter(xt_ucb[0],xt_ucb[1],marker='s',color='g',s=200,label='Peak')
            plt.scatter(xt_ei[0],xt_ei[1],marker='s',color='k',s=200,label='Peak')
            plt.scatter(x_max[0],x_max[1],marker='*',color='r',s=300,label='Peak')
            plt.xlim(0,1)
            plt.ylim(0,1)
            strFileName="acquisition_functions_debug.eps"
            fig.savefig(strFileName, bbox_inches='tight')

        if acq['name']=='vrs' and acq['debug']==1: # plot the x_max and xstars
            fig=plt.figure(figsize=(5, 5))

            plt.scatter(xt_ucb[0],xt_ucb[1],marker='s',color='g',s=200,label='Peak')
            plt.scatter(xt_ei[0],xt_ei[1],marker='s',color='k',s=200,label='Peak')
            plt.scatter(x_max[0],x_max[1],marker='*',color='r',s=300,label='Peak')
            plt.xlim(0,1)
            plt.ylim(0,1)
            strFileName="vrs_acquisition_functions_debug.eps"
            #fig.savefig(strFileName, bbox_inches='tight')
            
            
        val_acq=self.acq_func.acq_kind(x_max,self.gp,y_max)
        #print x_max
        #print val_acq
        if self.stopping_criteria!=0 and val_acq<self.stopping_criteria:
            val_acq=self.acq_func.acq_kind(x_max,self.gp,y_max)

            self.stop_flag=1
            print("Stopping Criteria is violated. Stopping Criteria is {:.15f}".format(self.stopping_criteria))
        
        
        self.alpha_Xt= np.append(self.alpha_Xt,val_acq)
        
        mean,var=self.gp.predict(x_max, eval_MSE=True)
        var.flags['WRITEABLE']=True
        var[var<1e-20]=0
        #self.Tau_Xt= np.append(self.Tau_Xt,val_acq/var)
       
        # record the optimization time
        finished_opt=time.time()
        elapse_opt=finished_opt-start_opt
        self.time_opt=np.hstack((self.time_opt,elapse_opt))
        
        # store X                                     
        self.X = np.vstack((self.X, x_max.reshape((1, -1))))

        # evaluate Y using original X
        self.Y_original = np.append(self.Y_original, self.f(x_max))
        
        # update Y after change Y_original
        self.Y=(self.Y_original-np.mean(self.Y_original))/np.std(self.Y_original)
        
        if self.gp.flagIncremental==1:
            self.gp.fit_incremental(x_max,self.Y[-1])
#        if (self.acq['name']=='ei_regularizerH') or (self.acq['name']=='ei_regularizerQ'):
#            self.scalebounds[:,0]=self.scalebounds[:,0]+1
#            self.scalebounds[:,1]=self.scalebounds[:,1]-1
#        self.acq['scalebounds']=self.scalebounds
        self.experiment_num=self.experiment_num+1
    def maximize(self,
                 init_points=5,
                 n_iter=25,
                 acq='ucb',
                 kappa=2.576,
                 **gp_params):
        """
		Main optimization method.

		Parameters
		----------
		:param init_points:
			Number of randomly chosen points to sample the
			target function before fitting the gp.

		:param n_iter:
			Total number of times the process is to repeated. Note that
			currently this methods does not have stopping criteria (due to a
			number of reasons), therefore the total number of points to be
			sampled must be specified.

		:param acq:
			Acquisition function to be used, defaults to Expected Improvement.

		:param gp_params:
			Parameters to be passed to the Scikit-learn Gaussian Process object

		Returns
		-------
		:return: Nothing
		"""
        # Reset timer
        #self.plog.reset_timer()

        # Set acquisition function
        self.acq_func = AcquisitionFunction(kind=self.acq, kappa=kappa)

        # Initialize x, y and find current y_max
        if not self.initialized:
            #if self.verbose:
            #self.plog.print_header()
            self.init(init_points)

        y_max = self.Y.max()

        self.theta = gp_params['theta']

        # Set parameters if any was passed
        #self.gp.set_params(**gp_params)
        self.gp = PradaMultipleGaussianProcess(**gp_params)

        # Find unique rows of X to avoid GP from breaking
        ur = unique_rows(self.X)
        self.gp.fit(self.X[ur], self.Y[ur])

        # Finding argmax of the acquisition function.
        x_max = acq_max(ac=self.acq_func.acq_kind,
                        gp=self.gp,
                        y_max=y_max,
                        bounds=self.bounds)

        #print "start acq max nlopt"
        #x_max,f_max = acq_max_nlopt(f=self.acq_func.acq_kind,gp=self.gp,y_max=y_max,
        #bounds=self.bounds)
        #print "end acq max nlopt"

        # Test if x_max is repeated, if it is, draw another one at random
        # If it is repeated, print a warning
        #pwarning = False
        if np.any((self.X - x_max).sum(axis=1) == 0):
            #print "x max uniform random"

            x_max = np.random.uniform(self.bounds[:, 0],
                                      self.bounds[:, 1],
                                      size=self.bounds.shape[0])

        #print "start append X,Y"
        self.X = np.vstack((self.X, x_max.reshape((1, -1))))
        #self.Y = np.append(self.Y, self.f(**dict(zip(self.keys, x_max))))
        self.Y = np.append(self.Y, self.f(x_max))

        #print "end append X,Y"
        #print 'x_max={:f}'.format(x_max[0])

        #print "start fitting GP"

        # Updating the GP.
        ur = unique_rows(self.X)
        self.gp.fit(self.X[ur], self.Y[ur])

        #print "end fitting GP"
        # Update maximum value to search for next probe point.
        if self.Y[-1] > y_max:
            y_max = self.Y[-1]