Ejemplo n.º 1
0
Archivo: bnn.py Proyecto: zwt233/autotf
    def sample_functions(self, X_test, n_funcs=1):
        """
        Samples F function values from the current posterior at the N
        specified test point.

        Parameters
        ----------
        X_test: np.ndarray (N, D)
            Input test points
        n_funcs: int
            Number of function values that are drawn at each test point.

        Returns
        ----------
        np.array(F, N)
            The F function values drawn at the N test points.
        """
        if self.normalize_input:
            X_test_norm, _, _ = zero_mean_unit_var_normalization(X_test, self.x_mean, self.x_std)
        else:
            X_test_norm = X_test
        f = np.zeros([n_funcs, X_test_norm.shape[0]])
        for i in range(n_funcs):
            lasagne.layers.set_all_param_values(self.net, self.samples[i])
            out = self.single_predict(X_test_norm)[:, 0]
            if self.normalize_output:
                f[i, :] = zero_mean_unit_var_unnormalization(out, self.y_mean, self.y_std)
            else:
                f[i, :] = out

        return f
Ejemplo n.º 2
0
Archivo: bnn.py Proyecto: zwt233/autotf
    def predict(self, X_test, return_individual_predictions=False, *args, **kwargs):
        """
        Returns the predictive mean and variance of the objective function at
        the given test points.

        Parameters
        ----------
        X_test: np.ndarray (N, D)
            Input test points

        return_individual_predictions: bool
            If set to true than the individual predictions of all samples are returned.

        Returns
        ----------
        np.array(N,)
            predictive mean
        np.array(N,)
            predictive variance

        """

        if not self.is_trained:
            logging.error("Model is not trained!")
            return

        # Normalize input
        if self.normalize_input:
            X_, _, _ = zero_mean_unit_var_normalization(X_test, self.x_mean, self.x_std)
        else:
            X_ = X_test

        f_out = []
        theta_noise = []
        for sample in self.samples:
            lasagne.layers.set_all_param_values(self.net, sample)
            out = self.single_predict(X_)
            f_out.append(out[:, 0])
            theta_noise.append(np.exp(out[:, 1]))

        f_out = np.asarray(f_out)
        theta_noise = np.asarray(theta_noise)

        if return_individual_predictions:
            if self.normalize_output:
                f_out = zero_mean_unit_var_unnormalization(f_out, self.y_mean, self.y_std)
                theta_noise *= self.y_std**2
            return f_out, theta_noise

        m = np.mean(f_out, axis=0)
        # Total variance
        # v = np.mean(f_out ** 2 + theta_noise, axis=0) - m ** 2
        v = np.mean((f_out - m) ** 2, axis=0)

        if self.normalize_output:
            m = zero_mean_unit_var_unnormalization(m, self.y_mean, self.y_std)
            v *= self.y_std ** 2

        return m, v
Ejemplo n.º 3
0
    def train(self, X, y, do_optimize=True):
        """
        Computes the Cholesky decomposition of the covariance of X and
        estimates the GP hyperparameters by optimizing the marginal
        loglikelihood. The prior mean of the GP is set to the empirical
        mean of X.

        Parameters
        ----------
        X: np.ndarray (N, D)
            Input data points. The dimensionality of X is (N, D),
            with N as the number of points and D is the number of features.
        y: np.ndarray (N,)
            The corresponding target values.
        do_optimize: boolean
            If set to true the hyperparameters are optimized otherwise
            the default hyperparameters of the kernel are used.
        """

        if self.normalize_input:
            # Normalize input to be in [0, 1]
            self.X, self.lower, self.upper = normalization.zero_one_normalization(
                X, self.lower, self.upper)
        else:
            self.X = X

        if self.normalize_output:
            # Normalize output to have zero mean and unit standard deviation
            self.y, self.y_mean, self.y_std = normalization.zero_mean_unit_var_normalization(
                y)
            if self.y_std == 0:
                raise ValueError(
                    "Cannot normalize output. All targets have the same value")
        else:
            self.y = y

        # Use the empirical mean of the data as mean for the GP
        self.mean = np.mean(self.y, axis=0)

        self.gp = george.GP(self.kernel, mean=self.mean)

        if do_optimize:
            self.hypers = self.optimize()
            self.gp.kernel[:] = self.hypers[:-1]
            self.noise = np.exp(self.hypers[-1])  # sigma^2
        else:
            self.hypers = self.gp.kernel[:]
            self.hypers = np.append(self.hypers, np.log(self.noise))

        logger.debug("GP Hyperparameters: " + str(self.hypers))

        try:
            self.gp.compute(self.X, yerr=np.sqrt(self.noise))
        except np.linalg.LinAlgError:
            self.noise *= 10
            self.gp.compute(self.X, yerr=np.sqrt(self.noise))

        self.is_trained = True
Ejemplo n.º 4
0
    def predict(self, X_test):
        r"""
        Returns the predictive mean and variance of the objective function at
        the given test points.

        Parameters
        ----------
        X_test: np.ndarray (N, D)
            N input test points

        Returns
        ----------
        np.array(N,)
            predictive mean
        np.array(N,)
            predictive variance

        """
        # Normalize inputs
        if self.normalize_input:
            X_, _, _ = zero_mean_unit_var_normalization(
                X_test, self.X_mean, self.X_std)
        else:
            X_ = X_test

        # Get features from the net

        layers = lasagne.layers.get_all_layers(self.network)
        theta = lasagne.layers.get_output(layers[:-1], X_)[-1].eval()

        # Marginalise predictions over hyperparameters of the BLR
        mu = np.zeros([len(self.models), X_test.shape[0]])
        var = np.zeros([len(self.models), X_test.shape[0]])

        for i, m in enumerate(self.models):
            mu[i], var[i] = m.predict(theta)

        # See the algorithm runtime prediction paper by Hutter et al
        # for the derivation of the total variance
        m = np.mean(mu, axis=0)
        v = np.mean(mu**2 + var, axis=0) - m**2

        # Clip negative variances and set them to the smallest
        # positive float value
        if v.shape[0] == 1:
            v = np.clip(v, np.finfo(v.dtype).eps, np.inf)
        else:
            v = np.clip(v, np.finfo(v.dtype).eps, np.inf)
            v[np.where((v < np.finfo(v.dtype).eps)
                       & (v > -np.finfo(v.dtype).eps))] = 0

        if self.normalize_output:
            m = zero_mean_unit_var_unnormalization(m, self.y_mean, self.y_std)
            v *= self.y_std**2

        return m, v
Ejemplo n.º 5
0
    def train(self, X, y, do_optimize=True):
        """
        Trains the model on the provided data.

        Parameters
        ----------
        X: np.ndarray (N, D)
            Input data points. The dimensionality of X is (N, D),
            with N as the number of points and D is the number of features.
        y: np.ndarray (N,)
            The corresponding target values.
        do_optimize: boolean
            If set to true the hyperparameters are optimized otherwise
            the default hyperparameters are used.

        """
        start_time = time.time()

        # Normalize inputs
        if self.normalize_input:
            self.X, self.X_mean, self.X_std = zero_mean_unit_var_normalization(
                X)
        else:
            self.X = X

        # Normalize ouputs
        if self.normalize_output:
            self.y, self.y_mean, self.y_std = zero_mean_unit_var_normalization(
                y)
        else:
            self.y = y

        self.y = self.y[:, None]

        # Check if we have enough points to create a minibatch otherwise use all data points
        if self.X.shape[0] <= self.batch_size:
            batch_size = self.X.shape[0]
        else:
            batch_size = self.batch_size

        # Create the neural network
        features = X.shape[1]

        self.network = self._build_net(self.input_var, features)

        prediction = lasagne.layers.get_output(self.network)

        # Define loss function for training
        loss = T.mean(T.square(prediction - self.target_var)) / 0.001
        loss = loss.mean()

        params = lasagne.layers.get_all_params(self.network, trainable=True)

        self.learning_rate = theano.shared(
            np.array(self.init_learning_rate, dtype=theano.config.floatX))

        updates = lasagne.updates.adam(loss,
                                       params,
                                       learning_rate=self.learning_rate)

        logging.debug("... compiling theano functions")
        self.train_fn = theano.function([self.input_var, self.target_var],
                                        loss,
                                        updates=updates,
                                        allow_input_downcast=True)

        # Start training
        lc = np.zeros([self.num_epochs])
        for epoch in range(self.num_epochs):

            epoch_start_time = time.time()

            train_err = 0
            train_batches = 0

            for batch in self.iterate_minibatches(self.X,
                                                  self.y,
                                                  batch_size,
                                                  shuffle=True):
                inputs, targets = batch
                train_err += self.train_fn(inputs, targets)
                train_batches += 1

            lc[epoch] = train_err / train_batches
            logging.debug("Epoch {} of {}".format(epoch + 1, self.num_epochs))
            curtime = time.time()
            epoch_time = curtime - epoch_start_time
            total_time = curtime - start_time
            logging.debug("Epoch time {:.3f}s, total time {:.3f}s".format(
                epoch_time, total_time))
            logging.debug("Training loss:\t\t{:.5g}".format(train_err /
                                                            train_batches))

            # Adapt the learning rate
            if epoch % self.adapt_epoch == 0:
                self.learning_rate.set_value(
                    np.float32(self.init_learning_rate * 0.1))

        # Design matrix
        layers = lasagne.layers.get_all_layers(self.network)
        self.Theta = lasagne.layers.get_output(layers[:-1], self.X)[-1].eval()

        if do_optimize:
            if self.do_mcmc:
                self.sampler = emcee.EnsembleSampler(
                    self.n_hypers, 2, self.marginal_log_likelihood)

                # Do a burn-in in the first iteration
                if not self.burned:
                    # Initialize the walkers by sampling from the prior
                    self.p0 = self.prior.sample_from_prior(self.n_hypers)
                    # Run MCMC sampling
                    self.p0, _, _ = self.sampler.run_mcmc(self.p0,
                                                          self.burnin_steps,
                                                          rstate0=self.rng)

                    self.burned = True

                # Start sampling
                pos, _, _ = self.sampler.run_mcmc(self.p0,
                                                  self.chain_length,
                                                  rstate0=self.rng)

                # Save the current position, it will be the startpoint in
                # the next iteration
                self.p0 = pos

                # Take the last samples from each walker
                self.hypers = np.exp(self.sampler.chain[:, -1])
            else:
                # Optimize hyperparameters of the Bayesian linear regression
                res = optimize.fmin(self.nll, np.random.rand(2))
                self.hypers = [[np.exp(res[0]), np.exp(res[1])]]
        else:

            self.hypers = [[self.alpha, self.beta]]

        logging.info("Hypers: %s" % self.hypers)
        self.models = []
        for sample in self.hypers:

            # Instantiate a model for each hyperparameter configuration
            model = BayesianLinearRegression(alpha=sample[0],
                                             beta=sample[1],
                                             basis_func=None)
            model.train(self.Theta, self.y[:, 0], do_optimize=False)

            self.models.append(model)
Ejemplo n.º 6
0
    def train(self, X, y, do_optimize=True, **kwargs):
        self.X = normalize(X, self.lower, self.upper)

        if self.normalize_output:
            # Normalize output to have zero mean and unit standard deviation
            self.y, self.y_mean, self.y_std = normalization.zero_mean_unit_var_normalization(
                y)
        else:
            self.y = y

        # Use the mean of the data as mean for the GP
        mean = np.mean(self.y, axis=0)
        self.gp = george.GP(self.kernel, mean=mean)

        if do_optimize:
            # We have one walker for each hyperparameter configuration
            sampler = emcee.EnsembleSampler(self.n_hypers,
                                            len(self.kernel.pars) + 1,
                                            self.loglikelihood)

            # Do a burn-in in the first iteration
            if not self.burned:
                # Initialize the walkers by sampling from the prior
                if self.prior is None:
                    self.p0 = np.random.rand(self.n_hypers,
                                             len(self.kernel.pars) + 1)
                else:
                    self.p0 = self.prior.sample_from_prior(self.n_hypers)
                # Run MCMC sampling
                self.p0, _, _ = sampler.run_mcmc(self.p0,
                                                 self.burnin_steps,
                                                 rstate0=self.rng)

                self.burned = True

            # Start sampling
            pos, _, _ = sampler.run_mcmc(self.p0,
                                         self.chain_length,
                                         rstate0=self.rng)

            # Save the current position, it will be the start point in
            # the next iteration
            self.p0 = pos

            # Take the last samples from each walker
            self.hypers = sampler.chain[:, -1]

        else:
            if self.hypers is None:
                self.hypers = self.gp.kernel[:].tolist()
                self.hypers.append(self.noise)
                self.hypers = [self.hypers]

        self.models = []
        for sample in self.hypers:

            # Instantiate a GP for each hyperparameter configuration
            kernel = deepcopy(self.kernel)
            #kernel.pars = np.exp(sample[:-1])
            kernel.vector = sample[:-1]
            noise = np.exp(sample[-1])
            model = MTBOGP(kernel,
                           normalize_output=self.normalize_output,
                           noise=noise,
                           lower=self.lower,
                           upper=self.upper,
                           rng=self.rng)
            model.train(X, y, do_optimize=False)
            self.models.append(model)

        self.is_trained = True
Ejemplo n.º 7
0
    def train(self, X, y, do_optimize=True, **kwargs):
        """
        Performs MCMC sampling to sample hyperparameter configurations from the
        likelihood and trains for each sample a GP on X and y

        Parameters
        ----------
        X: np.ndarray (N, D)
            Input data points. The dimensionality of X is (N, D),
            with N as the number of points and D is the number of features.
        y: np.ndarray (N,)
            The corresponding target values.
        do_optimize: boolean
            If set to true we perform MCMC sampling otherwise we just use the
            hyperparameter specified in the kernel.
        """

        if self.normalize_input:
            # Normalize input to be in [0, 1]
            self.X, self.lower, self.upper = normalization.zero_one_normalization(X, self.lower, self.upper)

        else:
            self.X = X

        if self.normalize_output:
            # Normalize output to have zero mean and unit standard deviation
            self.y, self.y_mean, self.y_std = normalization.zero_mean_unit_var_normalization(y)
            if self.y_std == 0:
                raise ValueError("Cannot normalize output. All targets have the same value")
        else:
            self.y = y

        # Use the mean of the data as mean for the GP
        self.mean = np.mean(self.y, axis=0)
        self.gp = george.GP(self.kernel, mean=self.mean)

        if do_optimize:
            # We have one walker for each hyperparameter configuration
            sampler = emcee.EnsembleSampler(self.n_hypers,
                                            len(self.kernel.pars) + 1,
                                            self.loglikelihood)
            sampler.random_state = self.rng.get_state()
            # Do a burn-in in the first iteration
            if not self.burned:
                # Initialize the walkers by sampling from the prior
                if self.prior is None:
                    self.p0 = self.rng.rand(self.n_hypers, len(self.kernel.pars) + 1)
                else:
                    self.p0 = self.prior.sample_from_prior(self.n_hypers)
                # Run MCMC sampling
                self.p0, _, _ = sampler.run_mcmc(self.p0,
                                                 self.burnin_steps,
                                                 rstate0=self.rng)

                self.burned = True

            # Start sampling
            pos, _, _ = sampler.run_mcmc(self.p0,
                                         self.chain_length,
                                         rstate0=self.rng)

            # Save the current position, it will be the start point in
            # the next iteration
            self.p0 = pos

            # Take the last samples from each walker
            self.hypers = sampler.chain[:, -1]

        else:
            self.hypers = self.gp.kernel[:].tolist()
            self.hypers.append(self.noise)
            self.hypers = [self.hypers]

        self.models = []
        for sample in self.hypers:

            # Instantiate a GP for each hyperparameter configuration
            kernel = deepcopy(self.kernel)
            kernel.pars = np.exp(sample[:-1])
            noise = np.exp(sample[-1])
            model = GaussianProcess(kernel,
                                    normalize_output=self.normalize_output,
                                    normalize_input=self.normalize_input,
                                    noise=noise,
                                    lower=self.lower,
                                    upper=self.upper,
                                    rng=self.rng)
            model.train(X, y, do_optimize=False)
            self.models.append(model)

        self.is_trained = True
Ejemplo n.º 8
0
Archivo: bnn.py Proyecto: zwt233/autotf
    def train(self, X, y, *args, **kwargs):
        """
        Trains the model on the provided data.

        Parameters
        ----------
        X: np.ndarray (N, D)
            Input data points. The dimensionality of X is (N, D),
            with N as the number of points and D is the number of features.
        y: np.ndarray (N,)
            The corresponding target values.

        """

        # Clear old samples
        start_time = time.time()

        self.net = self.get_net(n_inputs=X.shape[1])

        nll, mse = self.negativ_log_likelihood(self.net, self.Xt, self.Yt, X.shape[0], self.weight_prior, self.variance_prior)
        params = lasagne.layers.get_all_params(self.net, trainable=True)

        seed = self.rng.randint(1, 100000)
        srng = theano.sandbox.rng_mrg.MRG_RandomStreams(seed)

        if self.sampling_method == "sghmc":
            self.sampler = SGHMCSampler(rng=srng, precondition=self.precondition, ignore_burn_in=False)
        elif self.sampling_method == "sgld":
            self.sampler = SGLDSampler(rng=srng, precondition=self.precondition)
        else:
            logging.error("Sampling Strategy % does not exist!" % self.sampling_method)

        self.compute_err = theano.function([self.Xt, self.Yt], [mse, nll])
        self.single_predict = theano.function([self.Xt], lasagne.layers.get_output(self.net, self.Xt))

        self.samples.clear()

        if self.normalize_input:
            self.X, self.x_mean, self.x_std = zero_mean_unit_var_normalization(X)
        else:
            self.X = X

        if self.normalize_output:
            self.y, self.y_mean, self.y_std = zero_mean_unit_var_normalization(y)
        else:
            self.y = y

        self.sampler.prepare_updates(nll, params, self.l_rate, mdecay=self.mdecay,
                                     inputs=[self.Xt, self.Yt], scale_grad=X.shape[0])

        logging.info("Starting sampling")

        # Check if we have enough data points to form a minibatch
        # otherwise set the batchsize equal to the number of input points
        if self.X.shape[0] < self.bsize:
            self.bsize = self.X.shape[0]
            logging.error("Not enough datapoint to form a minibatch. "
                          "Set the batchsize to {}".format(self.bsize))

        i = 0
        while i < self.n_iters and len(self.samples) < self.n_nets:
            if self.X.shape[0] == self.bsize:
                start = 0
            else:
                start = np.random.randint(0, self.X.shape[0] - self.bsize)

            xmb = floatX(self.X[start:start + self.bsize])
            ymb = floatX(self.y[start:start + self.bsize, None])

            if i < self.burn_in:
                _, nll_value = self.sampler.step_burn_in(xmb, ymb)
            else:
                _, nll_value = self.sampler.step(xmb, ymb)

            if i % 512 == 0 and i <= self.burn_in:
                total_err, total_nll = self.compute_err(floatX(self.X), floatX(self.y).reshape(-1, 1))
                t = time.time() - start_time
                logging.info("Iter {:8d} : NLL = {:11.4e} MSE = {:.4e} "
                             "Time = {:5.2f}".format(i, float(total_nll),
                             float(total_err), t))

            if i % self.sample_steps == 0 and i >= self.burn_in:
                total_err, total_nll = self.compute_err(floatX(self.X), floatX(self.y).reshape(-1, 1))
                t = time.time() - start_time
                self.samples.append(lasagne.layers.get_all_param_values(self.net))
                logging.info("Iter {:8d} : NLL = {:11.4e} MSE = {:.4e} "
                             "Samples= {} Time = {:5.2f}".format(i,
                                                                      float(total_nll),
                                                                      float(total_err),
                                                                      len(self.samples), t))
            i += 1
        self.is_trained = True