Example #1
0
class GaussianProcessRewardModel(RewardModel):
    """
    Models rewards with a Gaussian process regressor.

    Implemented with a modified version of scikit-learn's Gaussian Process
    Regressor class.

    The GP is updated online as samples are added. As such, hyperparameters
    for the GP are fit in batch after a threshold number of samples are collected.
    The hyperparameters are then refined afterwards as more samples are added
    until the number of samples passes an upper threshold, after which the
    hyperparameters are no longer updated. This helps avoid highly expensive
    refinement which has computational complexity of O(N^3) in number of samples.

    Parameters:
    -----------
    min_samples: integer (default 100)
        The number of samples after which initial batch hyperparameter
        fitting is performed.
    batch_retries: integer (default 20)
        The number of random restarts for the initial hyperparameter fit.
    refine_ll_delta: numeric (default 1.0)
        The hyperparameters are refined after the average GP marginal
        log-likelihood decreases by this much since the last refinement.
    max_samples: integer (default 1000)
        The number of samples after which hyperparameters are no longer
        refined.

    Other Keyword Parameters:
    -------------------
    Refer to sklearn.gaussian_process.GaussianProcessRegressor's __init__
    """

    def __init__(self, min_samples=10, batch_retries=19, enable_refine=True,
                 refine_period=0, refine_ll_delta=1.0, refine_retries=0, 
                 kernel_type='rbf', verbose=False, **kwargs):

        self.min_samples = min_samples
        self.hp_batch_retries = batch_retries
        
        self.enable_refine = enable_refine
        self.hp_refine_ll_delta = float(refine_ll_delta)
        self.hp_refine_retries = refine_retries
        self.hp_refine_period = refine_period
        self.last_refine_iter = 0

        self.hp_init = False
        self.last_ll = None
        self.kwargs = kwargs
        self.verbose = bool(verbose)

        if kernel_type.lower() == 'rbf':
            self.kernel_class = RBF
        elif kernel_type.lower() == 'matern':
            self.kernel_class = Matern32
        else:
            raise ValueError('Unknown kernel_type: ' + kernel_type)

        self.kernel = None
        self.gp = None  # Init later
        self.inputs = []
        self.outputs = []

    def _initialize(self):
        x = np.asarray(self.inputs)
        y = np.asarray(self.outputs).reshape(-1, 1)
        self.kernel = self.kernel_class(input_dim=x.shape[1], ARD=True)
        self.gp = GPRegression(x, y, kernel=self.kernel, **self.kwargs)

    @property
    def num_samples(self):
        return len(self.inputs)

    def average_log_likelihood(self):
        # NOTE For some reason this returns the negative log-likelihood
        if self.gp is None or self.num_samples < self.min_samples:
            return None
        return -self.gp.log_likelihood() / self.num_samples

    def report_sample(self, x, reward):
        self.inputs.append(x)
        self.outputs.append(reward)

        if self.gp is None:
            self.batch_optimize()
        else:
            x = np.asarray(self.inputs)
            y = np.asarray(self.outputs).reshape(-1, 1)
            self.gp.set_XY(x, y)

        # Wait until we've initialized
        if not self.hp_init:
            return

        current_ll = self.average_log_likelihood()
        if self.verbose:
            rospy.loginfo('Prev LL: %f Curr LL: %f', self.last_ll, current_ll)

        self.check_refine(current_ll)

    def check_refine(self, current_ll):
        if not self.enable_refine:
            return

        if current_ll > self.last_ll:
            self.last_ll = current_ll

        # If the LL has decreased by refine_ll_delta
        delta_achieved = current_ll < self.last_ll - self.hp_refine_ll_delta

        # If it has been refine_period samples since last refinement
        period_achieved = self.num_samples > self.last_refine_iter + self.hp_refine_period
        if delta_achieved or period_achieved:
            self.batch_optimize(self.hp_refine_retries + 1)
            self.last_refine_iter = self.num_samples

    def batch_optimize(self, n_restarts=None):
        if self.num_samples < self.min_samples:
            return

        if n_restarts is None:
            n_restarts = self.hp_batch_retries + 1

        # NOTE Warm-restarting seems to get stuck in local optima, possibly from mean?
        # if self.gp is None: 
        self._initialize()

        if self.verbose:
            rospy.loginfo('Batch optimizing with %d restarts...', n_restarts)

        self.gp.optimize_restarts(optimizer='bfgs',
                                  messages=False,
                                  num_restarts=n_restarts)

        if self.verbose:
            rospy.loginfo('Optimization complete. Model:\n%s\n Kernel:\n%s', str(self.gp), str(self.kernel.lengthscale))

        self.hp_init = True
        self.last_ll = self.average_log_likelihood()

    def predict(self, x, return_std=False):
        if self.gp is None:
            #raise RuntimeError('Model is not fitted yet!')
            pred_mean = 0
            pred_std = float('inf')
        else:
            x = np.asarray(x)
            if len(x.shape) == 1:
                x = x.reshape(1, -1)
            pred_mean, pred_var = self.gp.predict_noiseless(x)
            # To catch negative variances
            if pred_var < 0:
                rospy.logwarn('Negative variance %f rounding to 0', pred_var)
                pred_var = 0
            pred_std = np.sqrt(pred_var)

        if return_std:
            return np.squeeze(pred_mean), np.squeeze(pred_std)
        else:
            return np.squeeze(pred_mean)

    def clear(self):
        self.inputs = []
        self.outputs = []
        self.kernel = None
        self.gp = None

    def fit(self, X, y):
        """Initialize the model from lists of inputs and corresponding rewards.

        Parameters
        ----------
        X : Iterable of inputs
        Y : Iterable of corresponding rewards
        """
        if len(X) != len(y):
            raise RuntimeError('X and Y lengths must be the same!')

        self.inputs = list(X)
        self.outputs = list(y)
        self._initialize()
        self.batch_optimize(self.hp_batch_retries)

    @property
    def num_samples(self):
        return len(self.inputs)

    @property
    def model(self):
        return self.gp
class KernelKernelGPModel:

    def __init__(self,
                 kernel_kernel: Optional[Covariance] = None,
                 noise_var: Optional[float] = None,
                 exact_f_eval: bool = False,
                 optimizer: Optional[str] = 'lbfgsb',
                 max_iters: int = 1000,
                 optimize_restarts: int = 5,
                 verbose: bool = True,
                 kernel_kernel_hyperpriors: Optional[HyperpriorMap] = None):
        """

        :param kernel_kernel:
        :param noise_var:
        :param exact_f_eval:
        :param optimizer:
        :param max_iters:
        :param optimize_restarts:
        :param verbose:
        :param kernel_kernel_hyperpriors:
        """
        self.noise_var = noise_var
        self.exact_f_eval = exact_f_eval
        self.optimize_restarts = optimize_restarts
        self.optimizer = optimizer
        self.max_iters = max_iters
        self.verbose = verbose
        self.covariance = kernel_kernel
        self.kernel_hyperpriors = kernel_kernel_hyperpriors
        self.model = None

    def train(self):
        """Train (optimize) the model."""
        if self.max_iters > 0:
            # Update the model maximizing the marginal likelihood.
            if self.optimize_restarts == 1:
                self.model.optimize(optimizer=self.optimizer, max_iters=self.max_iters, messages=False,
                                    ipython_notebook=False)
            else:
                self.model.optimize_restarts(num_restarts=self.optimize_restarts, optimizer=self.optimizer,
                                             max_iters=self.max_iters, ipython_notebook=False, verbose=self.verbose,
                                             robust=True, messages=False)

    def _create_model(self,
                      x: np.ndarray,
                      y: np.ndarray):
        """Create model given input data X and output data Y.

        :param x: 2d array of indices of distance builder
        :param y: model fitness scores
        :return:
        """
        # Make sure input data consists only of positive integers.
        assert np.issubdtype(x.dtype, np.integer) and x.min() >= 0

        # Define kernel
        self.input_dim = x.shape[1]
        # TODO: figure out default kernel kernel initialization
        if self.covariance is None:
            assert self.covariance is not None
            # kern = GPy.kern.RBF(self.input_dim, variance=1.)
        else:
            kern = self.covariance.raw_kernel
            self.covariance = None

        # Define model
        noise_var = y.var() * 0.01 if self.noise_var is None else self.noise_var
        normalize = x.size > 1  # only normalize if more than 1 observation.
        self.model = GPRegression(x, y, kern, noise_var=noise_var, normalizer=normalize)

        # Set hyperpriors
        if self.kernel_hyperpriors is not None:
            if 'GP' in self.kernel_hyperpriors:
                # Set likelihood hyperpriors.
                likelihood_hyperprior = self.kernel_hyperpriors['GP']
                set_priors(self.model.likelihood, likelihood_hyperprior, in_place=True)
            if 'SE' in self.kernel_hyperpriors:
                # Set kernel hyperpriors.
                se_hyperprior = self.kernel_hyperpriors['SE']
                set_priors(self.model.kern, se_hyperprior, in_place=True)

        # Restrict variance if exact evaluations of the objective.
        if self.exact_f_eval:
            self.model.Gaussian_noise.constrain_fixed(1e-6, warning=False)
        else:
            # --- We make sure we do not get ridiculously small residual noise variance
            if self.model.priors.size > 0:
                # FIXME: shouldn't need this case, but GPy doesn't have log Jacobian implemented for Logistic
                self.model.Gaussian_noise.constrain_positive(warning=False)
            else:
                self.model.Gaussian_noise.constrain_bounded(1e-9, 1e6, warning=False)

    def update(self, x_all, y_all, x_new, y_new):
        """Update model with new observations."""
        if self.model is None:
            self._create_model(x_all, y_all)
        else:
            self.model.set_XY(x_all, y_all)

        self.train()

    def _predict(self,
                 x: np.ndarray,
                 full_cov: bool,
                 include_likelihood: bool):
        if x.ndim == 1:
            x = x[None, :]
        m, v = self.model.predict(x, full_cov=full_cov, include_likelihood=include_likelihood)
        v = np.clip(v, 1e-10, np.inf)
        return m, v

    def predict(self,
                x: np.ndarray,
                with_noise: bool = True):
        m, v = self._predict(x, False, with_noise)
        # We can take the square root because v is just a diagonal matrix of variances
        return m, np.sqrt(v)

    def get_f_max(self):
        """
        Returns the location where the posterior mean is takes its maximal value.
        """
        return self.model.predict(self.model.X)[0].max()

    def plot(self, **plot_kwargs):
        import matplotlib.pyplot as plt
        self.model.plot(plot_limits=(0, self.model.kern.n_models - 1), resolution=self.model.kern.n_models,
                        **plot_kwargs)
        plt.show()
Example #3
0
class Kernel(object):
    def __init__(self,
                 x0,
                 y0,
                 cons=None,
                 alpha=opt.ke_alpha,
                 beta=opt.ke_beta,
                 input_size=opt.ke_input_size,
                 hidden_size=opt.ke_hidden_size,
                 num_layers=opt.ke_num_layers,
                 bidirectional=opt.ke_bidirectional,
                 lr=opt.ke_lr,
                 weight_decay=opt.ke_weight_decay):

        super(Kernel, self).__init__()
        self.alpha = alpha
        self.beta = beta
        self.lstm = nn.LSTM(input_size=input_size,
                            hidden_size=hidden_size,
                            num_layers=num_layers,
                            bidirectional=bidirectional)
        self.lstm = self.lstm.to(opt.device)
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.bidirectional = bidirectional
        self.bi = 2 if bidirectional else 1

        self.x = [x0]
        self.y = torch.tensor([y0],
                              dtype=torch.float,
                              device=opt.device,
                              requires_grad=False)
        self.cons = [cons]
        inp, out = clean_x(self.x, self.cons)
        self.model = GPRegression(inp, out)
        self.model.Gaussian_noise.constrain_fixed(1e-6, warning=False)
        self.model.optimize()
        self.x_best = x0
        self.y_best = y0
        self.i_best = 0

        self.n = 1
        self.E = self.embedding(x0).view(1, -1)
        self.K = self.kernel(self.E[0], self.E[0]).view(1, 1)
        self.K_inv = torch.inverse(self.K + self.beta *
                                   torch.eye(self.n, device=opt.device))
        self.optimizer = optim.Adam(self.lstm.parameters(),
                                    lr=lr,
                                    weight_decay=weight_decay)

    def embedding(self, xi):
        inputs = xi.view(-1, 1, self.input_size)
        outputs, (hn, cn) = self.lstm(inputs)
        outputs = torch.mean(outputs.squeeze(1), dim=0)
        outputs = outputs / torch.norm(outputs)
        return outputs

    def kernel(self, ei, ej):
        d = ei - ej
        d = torch.sum(d * d)
        k = torch.exp(-d / (2 * self.alpha))
        return k

    def kernel_batch(self, en):
        n = self.n
        k = torch.zeros(n, device=opt.device)
        for i in range(n):
            k[i] = self.kernel(self.E[i], en)
        return k

    def predict(self, xn):
        n = self.n
        en = self.embedding(xn)
        k = self.kernel_batch(en)
        kn = self.kernel(en, en)
        t = torch.mm(k.view(1, n), self.K_inv)
        mu = torch.mm(t, self.y.view(n, 1))
        sigma = kn - torch.mm(t, k.view(n, 1))
        sigma = torch.sqrt(sigma + self.beta)
        return mu, sigma

    def acquisition_cons(self, xn):
        with torch.no_grad():
            xn_ = np.array([xn.cpu().numpy().flatten()])
            mu_cons, sigma_cons = self.model.predict(xn_)
            sigma_cons = sqrt(sigma_cons)
            PoF = norm.cdf(0, mu_cons, sigma_cons)
            mu, sigma = self.predict(xn)
            mu = mu.item()
            sigma = sigma.item()
            y_best = self.y_best
            z = (mu - y_best) / sigma
            ei = (mu - y_best) * norm.cdf(z) + sigma * norm.pdf(z)
            return ei * PoF

    def acquisition(self, xn):
        with torch.no_grad():
            mu, sigma = self.predict(xn)
            mu = mu.item()
            sigma = sigma.item()
            y_best = self.y_best
            z = (mu - y_best) / sigma
            ei = (mu - y_best) * norm.cdf(z) + sigma * norm.pdf(z)
            return ei

    def kernel_batch_ex(self, t):
        n = self.n
        k = torch.zeros(n - 1, device=opt.device)
        for i in range(t):
            k[i] = self.kernel(self.E[i], self.E[t])
        for i in range(t + 1, n):
            k[i - 1] = self.kernel(self.E[t], self.E[i])
        return k

    def predict_ex(self, t):
        n = self.n
        k = self.kernel_batch_ex(t)
        kt = self.kernel(self.E[t], self.E[t])
        indices = list(range(t)) + list(range(t + 1, n))
        indices = torch.tensor(indices, dtype=torch.long, device=opt.device)
        K = self.K
        K = torch.index_select(K, 0, indices)
        K = torch.index_select(K, 1, indices)
        K_inv = torch.inverse(K +
                              self.beta * torch.eye(n - 1, device=opt.device))
        y = torch.index_select(self.y, 0, indices)

        t = torch.mm(k.view(1, n - 1), K_inv)
        mu = torch.mm(t, y.view(n - 1, 1))
        sigma = kt - torch.mm(t, k.view(n - 1, 1))
        sigma = torch.sqrt(sigma + self.beta)
        return mu, sigma

    def add_sample(self, xn, yn, consn):
        self.x.append(xn)
        self.y = torch.cat((self.y,
                            torch.tensor([yn],
                                         dtype=torch.float,
                                         device=opt.device,
                                         requires_grad=False)))
        self.cons.append(consn)
        inp, out = clean_x(self.x, self.cons)
        self.model.set_XY(inp, out)
        self.model.optimize()
        n = self.n
        if consn > 0:
            if yn > self.y_best:
                self.x_best = xn
                self.y_best = yn
                self.i_best = n
        en = self.embedding(xn)
        k = self.kernel_batch(en)
        kn = self.kernel(en, en)
        self.E = torch.cat((self.E, en.view(1, -1)), 0)
        self.K = torch.cat(
            (torch.cat((self.K, k.view(n, 1)),
                       1), torch.cat((k.view(1, n), kn.view(1, 1)), 1)), 0)
        self.n += 1
        self.K_inv = torch.inverse(self.K + self.beta *
                                   torch.eye(self.n, device=opt.device))

    def add_batch(self, x, y, cons):
        self.x.extend(x)
        self.y = torch.cat((self.y, y))
        self.cons.extend(cons)
        inp, out = clean_x(self.x, self.cons)
        self.model.set_XY(inp, out)
        self.model.optimize()
        m = len(x)
        for i in range(m):
            n = self.n
            if self.cons[i] > 0:
                if y[i].item() > self.y_best:
                    self.x_best = x[i]
                    self.y_best = y[i].item()
                    self.i_best = n
            en = self.embedding(x[i])
            k = self.kernel_batch(en)
            kn = self.kernel(en, en)
            self.E = torch.cat((self.E, en.view(1, -1)), 0)
            self.K = torch.cat(
                (torch.cat((self.K, k.view(n, 1)),
                           1), torch.cat((k.view(1, n), kn.view(1, 1)), 1)), 0)
            self.n += 1
        self.K_inv = torch.inverse(self.K + self.beta *
                                   torch.eye(self.n, device=opt.device))

    def update_EK(self):
        n = self.n
        E_ = torch.zeros((n, self.E.size(1)), device=opt.device)
        for i in range(n):
            E_[i] = self.embedding(self.x[i])
        self.E = E_
        K_ = torch.zeros((n, n), device=opt.device)
        for i in range(n):
            for j in range(i, n):
                k = self.kernel(self.E[i], self.E[j])
                K_[i, j] = k
                K_[j, i] = k
        self.K = K_
        self.K_inv = torch.inverse(self.K + self.beta *
                                   torch.eye(self.n, device=opt.device))

    def loss(self):
        n = self.n
        l = torch.zeros(n, device=opt.device)
        for i in range(n):
            mu, sigma = self.predict_ex(i)
            d = self.y[i] - mu
            l[i] = -(0.918939 + torch.log(sigma) + d * d / (2 * sigma * sigma))
        l = -torch.mean(l)
        return l

    def opt_step(self):
        if self.n < 2:
            return 0.0
        self.optimizer.zero_grad()
        l = self.loss()
        ll = -l.item()
        l.backward()
        self.optimizer.step()
        self.update_EK()
        return ll

    def save(self, save_path):
        path = os.path.dirname(save_path)
        if not os.path.exists(path):
            os.makedirs(path)
        torch.save(self, save_path)