def create_mean(self):
     return MultitaskMean([
         ConstantMean(batch_shape=torch.Size([2, 3])),
         ZeroMean(),
         ZeroMean()
     ],
                          num_tasks=3)
 def setUp(self):
     self.mean = MultitaskMean(
         [ConstantMean(),
          ZeroMean(),
          ZeroMean(),
          ConstantMean()],
         n_tasks=4)
     self.mean.base_means[0].constant.data.fill_(5)
     self.mean.base_means[3].constant.data.fill_(7)
Exemple #3
0
 def __init__(self,
              stem,
              init_x,
              num_inducing,
              lr,
              streaming=False,
              beta=1.0,
              learn_inducing_locations=True,
              num_update_steps=1,
              **kwargs):
     super().__init__()
     likelihood = BernoulliLikelihood()
     inducing_points = torch.empty(num_inducing, stem.output_dim)
     inducing_points.uniform_(-1, 1)
     mean_module = ZeroMean()
     covar_module = ScaleKernel(RBFKernel(ard_num_dims=stem.output_dim))
     self.gp = VariationalGPModel(
         inducing_points,
         mean_module,
         covar_module,
         streaming,
         likelihood,
         beta=beta,
         learn_inducing_locations=learn_inducing_locations)
     self.mll = None
     self.stem = stem
     self.optimizer = torch.optim.Adam(self.parameters(), lr=lr)
     self.num_update_steps = num_update_steps
     self._raw_inputs = [init_x]
Exemple #4
0
def _parse_mean(input_size: int,
                dim_outputs: int = 1,
                kind: str = 'zero') -> Mean:
    """Parse Mean string.

    Parameters
    ----------
    input_size: int.
        Size of input to GP (needed for linear mean functions).
    kind: str.
        String that identifies mean function.

    Returns
    -------
    mean: Mean.
        Mean function.
    """
    if kind.lower() == 'constant':
        mean = ConstantMean()
    elif kind.lower() == 'zero':
        mean = ZeroMean()
    elif kind.lower() == 'linear':
        mean = LinearMean(input_size=input_size,
                          batch_shape=torch.Size([dim_outputs]))
    else:
        raise NotImplementedError(
            'Mean function {} not implemented.'.format(kind))
    return mean
Exemple #5
0
    def setUp(self, batched=False, learnable=False):
        torch.set_default_tensor_type(torch.DoubleTensor)
        torch.random.manual_seed(10)

        train_x = torch.rand(10, 2)
        train_y = torch.sin(2 * train_x[:, 0] + 3 * train_x[:, 1]).unsqueeze(-1)
        train_y_var = 0.1 * torch.ones_like(train_y)
        if batched:
            train_y = torch.cat(
                (
                    train_y, 
                    train_y + 0.3 * torch.randn_like(train_y),
                    train_y + 0.3 * torch.randn_like(train_y),
                ),
                dim=1
            )
            train_y_var = train_y_var.repeat(1, 3)

        model = FixedNoiseOnlineSKIGP(
            train_inputs=train_x,
            train_targets=train_y,
            train_noise_term=train_y_var,
            grid_bounds=torch.tensor([[0.0, 1.0], [0.0, 1.0]]),
            grid_size=5,
            learn_additional_noise=learnable
        )
        equivalent_model = SingleTaskGP(
            train_X=train_x, 
            train_Y=train_y, 
            likelihood=FixedNoiseGaussianLikelihood(train_y_var.t(), learn_additional_noise=learnable),
            covar_module = deepcopy(model.covar_module)
        )
        equivalent_model.mean_module = ZeroMean()

        return model, equivalent_model, train_x, train_y
Exemple #6
0
    def __init__(self, input, inducing_size, device='cpu'):
        if device == 'gpu' and torch.cuda.is_available():
            self.device = torch.device('cuda:0')
        else:
            self.device = torch.device('cpu')

        if input.ndim == 1:
            self.input_size = 1
        else:
            self.input_size = input.shape[-1]

        self.inducing_size = inducing_size

        _likelihood = GaussianLikelihood()
        super(SparseGPRegressor, self).__init__(train_inputs=None,
                                                train_targets=None,
                                                likelihood=_likelihood)

        self.mean_module = ZeroMean()
        self.base_covar_module = ScaleKernel(RBFKernel())

        inducing_idx = np.random.choice(len(input), inducing_size, replace=False)
        self.covar_module = InducingPointKernel(self.base_covar_module,
                                                inducing_points=input[inducing_idx, ...],
                                                likelihood=_likelihood)

        self.input_trans = None
        self.target_trans = None
    def __init__(self, train_x, train_y, likelihood, Z_init):

        # Locations Z corresponding to u, they can be randomly initialized or
        # regularly placed.
        self.inducing_inputs = Z_init
        self.num_inducing = len(Z_init)
        self.n = len(train_y)
        self.data_dim = train_x.shape[1]
        # Sparse Variational Formulation
        q_u = CholeskyVariationalDistribution(self.num_inducing)
        q_f = VariationalStrategy(self,
                                  self.inducing_inputs,
                                  q_u,
                                  learn_inducing_locations=True)
        super(BayesianStochasticVariationalGP, self).__init__(q_f)
        self.likelihood = likelihood
        self.train_x = train_x
        self.train_y = train_y

        self.mean_module = ZeroMean()
        self.base_covar_module = ScaleKernel(RBFKernel())
        self.covar_module = gpytorch.kernels.ScaleKernel(
            gpytorch.kernels.RBFKernel())

        # Hyperparameter Variational distribution
        hyper_prior_mean = torch.Tensor([0])
        hyper_dim = len(hyper_prior_mean)

        log_hyper_prior = NormalPrior(hyper_prior_mean,
                                      torch.ones_like(hyper_prior_mean))
        self.log_theta = LogHyperVariationalDist(hyper_dim, log_hyper_prior,
                                                 self.n, self.data_dim)
Exemple #8
0
 def __init__(self, train_x, train_y, likelihood):
     super(GPRegressionModel, self).__init__(train_x, train_y, likelihood)
     self.mean_module = ZeroMean()
     self.base_covar_module = ScaleKernel(RBFKernel(ard_num_dims=2))
     self.covar_module = AdditiveStructureKernel(GridInterpolationKernel(
         self.base_covar_module, grid_size=100, num_dims=1),
                                                 num_dims=2)
Exemple #9
0
        def init():
            r_lik = GaussianLikelihood()
            r_kernel = GridInterpolationKernelWithFantasy(
                RBFKernel(),
                grid_size=self.grid_size,
                grid_bounds=[(-4.0, 14.0)]).double()
            r_model = RegularExactGP(self.xs, self.labels, r_lik, r_kernel,
                                     ZeroMean())

            lik = GaussianLikelihood()
            kernel = GridInterpolationKernelWithFantasy(
                RBFKernel(),
                grid_size=self.grid_size,
                grid_bounds=[(-4.0, 14.0)]).double()
            model = OnlineWoodburyGP(self.xs, self.labels, lik, kernel,
                                     ZeroMean())
            return r_model, model
 def __init__(self, train_x, train_y, likelihood):
     super(GPRegressionModel, self).__init__(train_x, train_y, likelihood)
     self.mean_module = ZeroMean()
     self.base_covar_module = ScaleKernel(
         RBFKernel(log_lengthscale_prior=SmoothedBoxPrior(exp(-3), exp(3), sigma=0.1, log_transform=True))
     )
     self.covar_module = AdditiveGridInterpolationKernel(
         self.base_covar_module, grid_size=100, grid_bounds=[(-0.5, 1.5)], n_components=2
     )
Exemple #11
0
 def __init__(self, kernel, depth, dim=1, collect=True):
     super().__init__()
     self.depth = depth
     self.dim = dim
     self.covar_module = kernel
     self.mean_module = ZeroMean()
     self.collect = collect
     if self.collect:
         self.collector = Collector(depth)
Exemple #12
0
    def __init__(self, train_x, train_y, likelihood, outputscale=1.0):
        super().__init__(train_x, train_y, likelihood)

        self.mean_module = ZeroMean()
        self.kernel = ScaleKernel(
            MaternKernel(nu=2.5,
                         # ard_num_dims=train_x.shape[-1]
                         ))

        self.kernel.outputscale = outputscale
Exemple #13
0
 def __init__(self, train_x, train_y, likelihood):
     super(GPRegressionModel, self).__init__(train_x, train_y, likelihood)
     self.mean_module = ZeroMean()
     self.base_covar_module = ScaleKernel(
         RBFKernel(ard_num_dims=2,
                   log_lengthscale_prior=SmoothedBoxPrior(
                       exp(-3), exp(3), sigma=0.1, log_transform=True)))
     self.covar_module = AdditiveStructureKernel(GridInterpolationKernel(
         self.base_covar_module, grid_size=100, num_dims=2),
                                                 num_dims=2)
Exemple #14
0
    def __init__(self,
                 train_x,
                 train_y,
                 likelihood,
                 num_tasks,
                 rank=1,
                 covar_module=None):
        super(HadamardMTGPModel, self).__init__(train_x, train_y, likelihood)
        self.mean_module = ZeroMean()
        self.num_dims = train_x[0].size(-1)

        self._init_covar_module(covar_module)
        self.task_covar_module = IndexKernel(num_tasks=num_tasks, rank=rank)
    def __init__(self, train_x, train_y, likelihood, Z_init):

        super(BayesianSparseGPR_HMC, self).__init__(train_x, train_y,
                                                    likelihood)
        self.train_x = train_x
        self.train_y = train_y
        self.inducing_points = Z_init
        self.num_inducing = len(Z_init)
        self.mean_module = ZeroMean()
        self.base_covar_module = ScaleKernel(RBFKernel())
        self.covar_module = InducingPointKernel(self.base_covar_module,
                                                inducing_points=Z_init,
                                                likelihood=likelihood)
Exemple #16
0
 def __init__(self, train_x, train_y, likelihood, Z_init):
     """The sparse GP class for regression with the collapsed bound.
        q*(u) is implicit.
     """
     super(SparseGPR, self).__init__(train_x, train_y, likelihood)
     self.train_x = train_x
     self.train_y = train_y
     self.inducing_points = Z_init
     self.num_inducing = len(Z_init)
     self.likelihood = likelihood
     self.mean_module = ZeroMean()
     self.base_covar_module = ScaleKernel(RBFKernel())
     self.covar_module = InducingPointKernel(self.base_covar_module,
                                             inducing_points=Z_init,
                                             likelihood=self.likelihood)
Exemple #17
0
    def __init__(self,
                 train_x,
                 train_y,
                 likelihood,
                 outputscale=10,
                 transform_input_fn=None):
        super().__init__(train_x, train_y, likelihood)

        self.mean_module = ZeroMean()
        self.kernel = ScaleKernel(MaternKernel(nu=2.5))

        self.likelihood.noise_covar.noise = 1e-8
        self.kernel.outputscale = outputscale

        self.transform_input_fn = transform_input_fn
Exemple #18
0
    def __init__(self, input_size, device='cpu'):
        if device == 'gpu' and torch.cuda.is_available():
            self.device = torch.device('cuda:0')
        else:
            self.device = torch.device('cpu')

        self.input_size = input_size

        _likelihood = GaussianLikelihood()
        super(GPRegressor, self).__init__(train_inputs=None,
                                          train_targets=None,
                                          likelihood=_likelihood)

        self.mean_module = ZeroMean()
        self.covar_module = ScaleKernel(RBFKernel())

        self.input_trans = None
        self.target_trans = None
Exemple #19
0
    def __init__(self, input_size, target_size, device='cpu'):
        if device == 'gpu' and torch.cuda.is_available():
            self.device = torch.device('cuda:0')
        else:
            self.device = torch.device('cpu')

        self.input_size = input_size
        self.target_size = target_size

        _likelihood = MultitaskGaussianLikelihood(num_tasks=self.target_size)
        super(MultiTaskGPRegressor, self).__init__(train_inputs=None,
                                                   train_targets=None,
                                                   likelihood=_likelihood)

        self.mean_module = MultitaskMean(ZeroMean(), num_tasks=self.target_size)
        self.covar_module = MultitaskKernel(RBFKernel(), num_tasks=self.target_size, rank=1)

        self.input_trans = None
        self.target_trans = None
    def __init__(self, train_x, train_y, likelihood, Z_init):

        # Locations Z corresponding to u, they can be randomly initialized or
        # regularly placed.
        self.inducing_inputs = Z_init
        self.num_inducing = len(Z_init)
        # Sparse Variational Formulation
        q_u = CholeskyVariationalDistribution(self.num_inducing)
        q_f = VariationalStrategy(self,
                                  self.inducing_inputs,
                                  q_u,
                                  learn_inducing_locations=True)
        super(StochasticVariationalGP, self).__init__(q_f)
        self.likelihood = likelihood
        self.train_x = train_x
        self.train_y = train_y

        self.mean_module = ZeroMean()
        self.base_covar_module = ScaleKernel(RBFKernel())
        self.covar_module = gpytorch.kernels.ScaleKernel(
            gpytorch.kernels.RBFKernel())
Exemple #21
0
    def __init__(self,
                 input_dims,
                 output_dims,
                 n_inducing=50,
                 mean=None,
                 kernel=None,
                 collapsed=False):
        # Cast in case numpy-type
        self.input_dims = int(input_dims)
        self.output_dims = int(output_dims)
        n_inducing = int(n_inducing)

        if output_dims is None or output_dims == 1:
            batch_shape = torch.Size([])
        else:
            batch_shape = torch.Size([self.output_dims])
        x_u = torch.randn(n_inducing, self.input_dims)

        if collapsed:
            assert batch_shape == torch.Size([])
            strategy = CollapsedStrategy(self, n_inducing)
        else:
            variational_dist = CholeskyVariationalDistribution(
                n_inducing, batch_shape=batch_shape)
            strategy = UnwhitenedVariationalStrategy(
                self, x_u, variational_dist, learn_inducing_locations=True)

        super().__init__(strategy)

        if mean is None:
            mean = ZeroMean()
        self.mean = mean

        if kernel is None:
            rbf = RBFKernel(ard_num_dims=input_dims)
            kernel = ScaleKernel(rbf)
        self.kernel = kernel

        self.prior_point_process = SquaredReducingPointProcess(n_inducing)
        self.variational_point_process = PoissonPointProcess(n_inducing)
Exemple #22
0
    def __init__(self,
                 train_x,
                 train_y,
                 likelihood,
                 var=None,
                 latent=None,
                 kernel_params=None,
                 latent_params=None):
        super(ExactGPModel, self).__init__(train_x, train_y, likelihood)
        if latent_params is None:
            latent_params = {'input_dim': train_x.size(-1)}
        self._set_latent_function(latent, latent_params)

        self.mean_module = ZeroMean()
        ard_num_dims = self.latent_func.embed_dim if self.latent_func.embed_dim is not None else train_x.size(
            -1)

        kernel = kernel_params['type'] if kernel_params is not None else 'rbf'
        if kernel is None or kernel == 'rbf':
            self.kernel_covar_module = ScaleKernel(
                RBFKernel(ard_num_dims=ard_num_dims))
        elif kernel == 'matern':
            self.kernel_covar_module = ScaleKernel(
                MaternKernel(nu=1.5, ard_num_dims=ard_num_dims))
            # without scale kernel: very poor performance
            # matern 0.5, 1.5 and 2.5 all have similar performance
        elif kernel == 'spectral_mixture':
            self.kernel_covar_module = SpectralMixtureKernel(
                num_mixtures=kernel_params['n_mixtures'],
                ard_num_dims=train_x.size(-1))
            self.kernel_covar_module.initialize_from_data(train_x, train_y)
        else:
            raise NotImplementedError

        # set covariance module
        if var is not None:
            self.noise_covar_module = WhiteNoiseKernel(var)
            self.covar_module = self.kernel_covar_module + self.noise_covar_module
        else:
            self.covar_module = self.kernel_covar_module
Exemple #23
0
 def __init__(
         self,
         stem,
         init_x,
         init_y,
         num_inducing,
         lr,
         streaming=False,
         prior_beta=1.,
         online_beta=1.,
         learn_inducing_locations=True,
         num_update_steps=1,
         covar_module=None,
         inducing_points=None,
         **kwargs
     ):
     super().__init__()
     assert init_y.ndimension() == 2
     target_dim = init_y.size(-1)
     batch_shape = target_dim if target_dim > 1 else []
     likelihood = GaussianLikelihood(batch_shape=batch_shape)
     if inducing_points is None:
         inducing_points = torch.empty(num_inducing, stem.output_dim)
         inducing_points.uniform_(-1, 1)
     mean_module = ZeroMean()
     if covar_module is None:
         covar_module = ScaleKernel(
             RBFKernel(ard_num_dims=stem.output_dim, batch_shape=batch_shape),
             batch_shape=batch_shape
         )
     self.gp = VariationalGPModel(inducing_points, mean_module, covar_module, streaming, likelihood,
                                  beta=online_beta, learn_inducing_locations=learn_inducing_locations)
     self.mll = None
     self.stem = stem
     self.optimizer = torch.optim.Adam(self.param_groups(lr))
     self.num_update_steps = num_update_steps
     self._raw_inputs = [init_x]
     self.target_dim = target_dim
     self._prior_beta = prior_beta
Exemple #24
0
    def _setUp(self):
        self.xs = torch.tensor([2.0, 3.0, 4.0, 1.0, 7.0], dtype=torch.double)
        # self.xs = torch.rand(100).double() * 14 - 2
        self.grid_size = 20
        self.kernel = GridInterpolationKernelWithFantasy(
            RBFKernel(), grid_size=self.grid_size,
            grid_bounds=[(-4.0, 14.0)]).double()
        # self.lengthscale = np.random.rand()*10 + 0.1
        # self.noise_var = np.random.rand()*10 + 0.1
        self.lengthscale = 10.0
        self.noise_var = 0.01
        self.lr = 0.1

        self.mean_module = ZeroMean()
        self.labels = torch.sin(self.xs) + torch.tensor(
            [0.1, 0.2, -0.1, -0.2, -0.2], dtype=torch.double)
        # self.labels = torch.sin(self.xs) + torch.randn_like(self.xs)*0.1
        self.test_points = torch.tensor([5.0, 8.0], dtype=torch.double)

        self.new_points = torch.tensor([2.4, 4.7], dtype=torch.double)
        self.new_targets = torch.sin(self.new_points) + torch.tensor(
            [0.1, -0.15], dtype=torch.double)

        self.points_sequence = [
            self.xs,
            self.new_points,
            torch.tensor([2.3]),
            torch.tensor([4.1]),
            torch.tensor([4.3]),
        ]

        self.targets_sequence = [
            self.labels,
            self.new_targets,
            torch.sin(torch.tensor([2.3])),
            torch.sin(torch.tensor([4.1])) + 1,
            torch.sin(torch.tensor([4.3])),
        ]
Exemple #25
0
	def __init__(self, train_X: Tensor, train_Y: Tensor, options: dict, which_type: Optional[str] = "obj") -> None:

		# Error checking:
		assert train_Y.dim() == 1, "train_Y is required to be 1D"
		self._validate_tensor_args(X=train_X, Y=train_Y[:,None]) # Only for this function, train_Y must be 2D (this must be a bug in botorch)

		# Dimensionality of the input space:
		self.dim = train_X.shape[-1]

		# Model identity:
		self.iden = "GP_model_{0:s}".format(which_type)

		# Likelihood:
		noise_std = options["noise_std_obj"]
		lik = FixedNoiseGaussianLikelihood(noise=torch.full_like(train_Y, noise_std**2))

		# Initialize parent class:
		super().__init__(train_X, train_Y, lik)

		# Obtain hyperprior for lengthscale and outputscale:
		# NOTE: The mean (zero) and the model noise are fixed
		lengthscale_prior, outputscale_prior = extract_prior(options,which_type)

		# Initialize prior mean:
		# self.mean_module = ConstantMean()
		self.mean_module = ZeroMean()

		# Initialize covariance function:
		# base_kernel = RBFKernel(ard_num_dims=train_X.shape[-1],lengthscale_prior=GammaPrior(3.0, 6.0)) # original
		# self.covar_module = ScaleKernel(base_kernel=base_kernel,outputscale_prior=GammaPrior(2.0, 0.15)) # original
		base_kernel = RBFKernel(ard_num_dims=self.dim,lengthscale_prior=lengthscale_prior,lengthscale_constraint=GreaterThan(1e-2))
		self.covar_module = ScaleKernel(base_kernel=base_kernel,outputscale_prior=outputscale_prior)

		# Make sure we're on the right device/dtype
		self.to(train_X)

		# Instantiate the gradient model:
		self.model_grad = GPmodelWithGrad(dim=self.dim)
    def __init__(self,
                 dim: int,
                 train_X: Tensor,
                 train_Y: Tensor,
                 options: dict,
                 which_type: Optional[str] = "obj") -> None:

        self.dim = dim

        if len(train_Y) == 0:  # No data case
            train_X = None
            train_Y = None
        else:
            # Error checking:
            assert train_Y.dim() == 1, "train_Y is required to be 1D"
            self._validate_tensor_args(
                X=train_X, Y=train_Y[:, None]
            )  # Only for this function, train_Y must be 2D (this must be a bug in botorch)

        print("\n")
        logger.info("### Initializing GP model for objective f(x) ###")

        # Likelihood:
        noise_std = options.hyperpars.noise_std.value
        if train_Y is not None:
            lik = FixedNoiseGaussianLikelihood(
                noise=torch.full_like(train_Y, noise_std**2))
        else:
            lik = FixedNoiseGaussianLikelihood(
                noise=torch.tensor([noise_std**2], device=device, dtype=dtype))

        # Initialize parent class:
        super().__init__(train_X, train_Y, lik)

        # # Obtain hyperprior for lengthscale and outputscale:
        # # NOTE: The mean (zero) and the model noise are fixed
        # lengthscale_prior, outputscale_prior = extract_prior(options.hyperpriors)

        # Initialize hyperpriors using scipy because gpytorch's gamma and beta distributions do not have the inverse CDF
        hyperpriors = dict(
            lengthscales=eval(options.hyperpars.lenthscales.prior),
            outputscale=eval(options.hyperpars.outputscale.prior))

        # Index hyperparameters:
        self.idx_hyperpars = dict(lengthscales=list(range(0, self.dim)),
                                  outputscale=[self.dim])
        self.dim_hyperpars = sum(
            [len(val) for val in self.idx_hyperpars.values()])

        # Get bounds:
        self.hyperpars_bounds = self._get_hyperparameters_bounds(hyperpriors)
        logger.info("hyperpars_bounds:" + str(self.hyperpars_bounds))

        # Initialize prior mean:
        # self.mean_module = ConstantMean()
        self.mean_module = ZeroMean()

        # Initialize covariance function:
        # base_kernel = RBFKernel(ard_num_dims=train_X.shape[-1],lengthscale_prior=GammaPrior(3.0, 6.0)) # original
        # self.covar_module = ScaleKernel(base_kernel=base_kernel,outputscale_prior=GammaPrior(2.0, 0.15)) # original
        # base_kernel = RBFKernel(ard_num_dims=self.dim,lengthscale_prior=lengthscale_prior,lengthscale_constraint=GreaterThan(1e-2))
        base_kernel = MaternKernel(nu=2.5,
                                   ard_num_dims=self.dim,
                                   lengthscale=0.1 * torch.ones(self.dim))
        self.covar_module = ScaleKernel(base_kernel=base_kernel)

        self.disp_info_scipy_opti = True
        # self.method = "L-BFGS-B"
        self.method = "LN_BOBYQA"
        # self.method = 'trust-constr'

        # Get a hyperparameter sample within bounds (not the same as sampling from the corresponding priors):
        hyperpars_sample = self._sample_hyperparameters_within_bounds(
            Nsamples=1).squeeze(0)
        self.covar_module.outputscale = hyperpars_sample[
            self.idx_hyperpars["outputscale"]]
        self.covar_module.base_kernel.lengthscale = hyperpars_sample[
            self.idx_hyperpars["lengthscales"]]
        self.noise_std = options.hyperpars.noise_std.value  # The evaluation noise is fixed, and given by the user

        # Initialize marginal log likelihood for the GPCR model.
        # mll_objective is callable
        # MLLGPCR can internally modify the model hyperparameters, and will do so throughout the optimization routine
        self.mll_objective = MLLGP(model_gp=self,
                                   likelihood_gp=self.likelihood,
                                   hyperpriors=hyperpriors)

        # Define nlopt optimizer:
        self.opti_hyperpars = OptimizationNonLinear(
            dim=self.dim_hyperpars,
            fun_obj=self.mll_objective,
            algo_str=self.method,
            tol_x=1e-4,
            Neval_max_local_optis=options.hyperpars.optimization.Nmax_evals,
            bounds=self.hyperpars_bounds,
            what2optimize_str="GP hyperparameters")

        # Make sure we're on the right device/dtype
        if train_Y is not None:
            self.to(train_X)

        self.Nrestarts = options.hyperpars.optimization.Nrestarts

        self._update_hyperparameters()

        self.eval()
    def __init__(self, dim: int, train_x: Tensor, train_yl: Tensor, options):
        """
			train_X: A `batch_shape x n x d` tensor of training features.
			train_Y: A `batch_shape x n x m` tensor of training observations.
			train_Yvar: A `batch_shape x n x m` tensor of observed measurement noise.
		"""

        # Initialize parent class:
        super().__init__(
        )  # This is needed because torch.nn.Module, which is parent of GPyTorchModel, needs it

        print("\n")
        logger.info("### Initializing GPCR model for constraint g(x) ###")

        self.discard_too_close_points = options.discard_too_close_points

        self.dim = dim
        assert self.dim == train_x.shape[
            1], "The input dimension must agree with train_x"
        self.train_x = torch.tensor([],
                                    device=device,
                                    dtype=dtype,
                                    requires_grad=False)
        self.train_yl = torch.tensor([],
                                     device=device,
                                     dtype=dtype,
                                     requires_grad=False)
        self.update_XY(train_x, train_yl)

        # One output
        # ==========
        # pdb.set_trace()
        self._validate_tensor_args(X=self.train_xs,
                                   Y=self.train_ys.view(-1, 1))
        # validate_input_scaling(train_X=train_X, train_Y=train_Y, train_Yvar=train_Yvar)
        self._set_dimensions(train_X=self.train_xs,
                             train_Y=self.train_ys.view(-1, 1))
        # self.train_xs,_,_ = self._transform_tensor_args(X=self.train_xs, Y=self.train_ys)

        # # Two outputs
        # # ===========
        # # pdb.set_trace()
        # self._validate_tensor_args(X=self.train_xs, Y=self.train_yl)
        # # validate_input_scaling(train_X=train_X, train_Y=train_Y, train_Yvar=train_Yvar)
        # self._set_dimensions(train_X=self.train_xs, train_Y=self.train_yl)
        # # self.train_xs,_,_ = self._transform_tensor_args(X=self.train_xs, Y=self.train_ys)

        # Initialize hyperpriors using scipy because gpytorch's gamma and beta distributions do not have the inverse CDF
        hyperpriors = dict(
            lengthscales=eval(options.hyperpars.lenthscales.prior),
            outputscale=eval(options.hyperpars.outputscale.prior),
            threshold=eval(options.hyperpars.threshold.prior))

        # Index hyperparameters:
        self.idx_hyperpars = dict(lengthscales=list(range(0, self.dim)),
                                  outputscale=[self.dim],
                                  threshold=[self.dim + 1])
        self.dim_hyperpars = sum(
            [len(val) for val in self.idx_hyperpars.values()])

        # Get bounds:
        self.hyperpars_bounds = self._get_hyperparameters_bounds(hyperpriors)
        logger.info("hyperpars_bounds:" + str(self.hyperpars_bounds))

        # Define meand and covariance modules with dummy hyperparameters
        self.mean_module = ZeroMean()
        self.covar_module = ScaleKernel(base_kernel=MaternKernel(
            nu=2.5,
            ard_num_dims=self.dim,
            lengthscale=0.1 * torch.ones(self.dim)),
                                        outputscale=10.0)

        # # If non-zero mean, constant mean is assumed:
        # if "constant" in dir(self.mean_module):
        # 	self.__threshold = self.mean_module.constant
        # else:
        # 	self.__threshold = 0.0

        # If non-zero mean, constant mean is assumed:
        if "constant" in dir(self.mean_module):
            self.__threshold = self.mean_module.constant
            self.thres_init = self.mean_module.constant
        else:
            self.__threshold = options.hyperpars.threshold.init
            self.thres_init = options.hyperpars.threshold.init

        # Get a hyperparameter sample within bounds (not the same as sampling from the corresponding priors):
        hyperpars_sample = self._sample_hyperparameters_within_bounds(
            Nsamples=1).squeeze(0)
        self.covar_module.outputscale = hyperpars_sample[
            self.idx_hyperpars["outputscale"]]
        print("self.covar_module.outputscale:",
              str(self.covar_module.outputscale))
        self.covar_module.base_kernel.lengthscale = hyperpars_sample[
            self.idx_hyperpars["lengthscales"]]
        self.threshold = hyperpars_sample[self.idx_hyperpars["threshold"]]
        self.noise_std = options.hyperpars.noise_std.value  # The evaluation noise is fixed, and given by the user

        self.gauss_tools = GaussianTools()

        # Initialize EP
        self.ep = ExpectationPropagation(
            prior_mean=self.mean_module(train_x).cpu().detach().numpy(),
            prior_cov=self.covar_module(train_x).cpu().detach().numpy(),
            Maxiter=options.ep.maxiter,
            required_precission=options.ep.prec,
            verbosity=options.ep.verbo)

        # Initialize marginal log likelihood for the GPCR model.
        # mll_objective is callable
        # MLLGPCR can internally modify the model hyperparameters, and will do so throughout the optimization routine
        self.mll_objective = MLLGPCR(model_gpcr=self, hyperpriors=hyperpriors)

        # Define nlopt optimizer:
        self.opti = OptimizationNonLinear(
            dim=self.dim_hyperpars,
            fun_obj=self.mll_objective,
            algo_str=options.hyperpars.optimization.algo_name,
            tol_x=1e-3,
            Neval_max_local_optis=options.hyperpars.optimization.Nmax_evals,
            bounds=self.hyperpars_bounds,
            what2optimize_str="GPCR hyperparameters")

        # Extra parameters:
        self.top_dist_ambiguous_points = 0.5 * torch.min(
            self.covar_module.base_kernel.lengthscale).item()
        self.factor_heteroscedastic_noise = 10**4

        # Update hyperparameters:
        self.Nrestarts_hyperpars = options.hyperpars.optimization.Nrestarts
        self._update_hyperparameters(Nrestarts=self.Nrestarts_hyperpars)

        # self.likelihood = FixedNoiseGaussianLikelihood(noise=torch.eye())
        self.likelihood = None
Exemple #28
0
    def __init__(
        self,
        train_inputs=None,
        train_targets=None,
        train_noise_term=None,
        covar_module=None,
        kernel_cache=None,
        grid_bounds=None,
        grid_size=30,
        likelihood=None,
        learn_additional_noise=False,
        num_data=None,
    ):
        super().__init__()

        assert train_inputs is not None or kernel_cache is not None

        if train_targets is not None:
            num_outputs = train_targets.shape[-1]
            input_batch_shape = train_inputs.shape[:-2]
            self.num_data = train_inputs.shape[-2]

        else:
            # pull from kernel_cache
            num_outputs = kernel_cache["response_cache"].shape[-1]
            input_batch_shape = kernel_cache["WtW"].shape[0]
            self.num_data = num_data

        self.num_outputs = num_outputs

        _batch_shape = input_batch_shape
        if num_outputs > 1:
            _batch_shape += torch.Size([num_outputs])

        if covar_module is None:
            if grid_bounds is None:
                grid_bounds = torch.stack((
                    train_inputs.min(dim=-2)[0] - 0.1,
                    train_inputs.max(dim=-2)[0] + 0.1,
                )).transpose(-1, -2)

            covar_module = ScaleKernel(
                RBFKernel(batch_shape=_batch_shape,
                          ard_num_dims=train_inputs.size(-1)),
                batch_shape=_batch_shape,
            )

        if type(covar_module) is not GridInterpolationKernel:
            covar_module = GridInterpolationKernel(
                base_kernel=covar_module,
                grid_size=grid_size,
                num_dims=train_inputs.shape[-1],
                grid_bounds=grid_bounds,
            )

        self._batch_shape = _batch_shape
        self.train_inputs = [None]
        self.train_targets = None

        self.covar_module = covar_module
        self.mean_module = ZeroMean()
        if likelihood is None:
            if train_noise_term is None:
                train_noise_term = torch.ones_like(train_targets)

            self.likelihood = FNMGLikelihood(
                noise=train_noise_term.transpose(-1, -2),
                learn_additional_noise=learn_additional_noise,
            )
        else:
            self.likelihood = likelihood
        self.has_learnable_noise = learn_additional_noise

        # initialize the kernel caches immediately so we can throw away the data
        if kernel_cache is None:
            self.covar_module = self.covar_module.to(train_inputs.device)
            initial_kxx = self.covar_module(train_inputs).evaluate_kernel()
            initial_wmat = _get_wmat_from_kernel(initial_kxx)
            self._kernel_cache = _initialize_caches(train_targets,
                                                    train_noise_term.transpose(
                                                        -1, -2),
                                                    initial_wmat,
                                                    create_w_cache=True)
        else:
            self._kernel_cache = kernel_cache
Exemple #29
0
 def setUp(self):
     self.mean = ZeroMean()
Exemple #30
0
def main(args):
    if args.cuda and torch.cuda.is_available():
        device = torch.device("cuda:0")
    else:
        device = torch.device("cpu")

    init_dict, train_dict, test_dict = prepare_data(args.data_loc,
                                                    args.num_init,
                                                    args.num_total)
    init_x, init_y, init_y_var = (
        init_dict["x"].to(device),
        init_dict["y"].to(device),
        init_dict["y_var"].to(device),
    )
    train_x, train_y, train_y_var = (
        train_dict["x"].to(device),
        train_dict["y"].to(device),
        train_dict["y_var"].to(device),
    )
    test_x, test_y, test_y_var = (
        test_dict["x"].to(device),
        test_dict["y"].to(device),
        test_dict["y_var"].to(device),
    )

    covar_module = ScaleKernel(
        MaternKernel(
            ard_num_dims=2,
            nu=0.5,
            lengthscale_prior=GammaPrior(3.0, 6.0),
        ),
        outputscale_prior=GammaPrior(2.0, 0.15),
    )
    if not args.exact:
        covar_module = GridInterpolationKernel(
            base_kernel=covar_module,
            grid_size=30,
            num_dims=2,
            grid_bounds=torch.tensor([[0.0, 1.0], [0.0, 1.0]]),
        )
    model = FixedNoiseGP(
        init_x,
        init_y.view(-1, 1),
        init_y_var.view(-1, 1),
        covar_module=covar_module,
    ).to(device)
    model.mean_module = ZeroMean()

    mll = ExactMarginalLogLikelihood(model.likelihood, model)

    print("---- Fitting initial model ----")
    start = time.time()
    with skip_logdet_forward(True), use_toeplitz(args.toeplitz):
        fit_gpytorch_torch(mll, options={"lr": 0.1, "maxiter": 1000})
    end = time.time()
    print("Elapsed fitting time: ", end - start)

    model.zero_grad()
    model.eval()

    print("--- Generating initial predictions on test set ----")
    start = time.time()
    with detach_test_caches(True), max_cholesky_size(
            args.cholesky_size), use_toeplitz(args.toeplitz):
        pred_dist = model(train_x)

        pred_mean = pred_dist.mean.detach()
        # pred_var = pred_dist.variance.detach()
    end = time.time()
    print("Elapsed initial prediction time: ", end - start)

    rmse_initial = ((pred_mean.view(-1) - train_y.view(-1))**2).mean().sqrt()
    print("Initial RMSE: ", rmse_initial.item())

    optimizer = torch.optim.Adam(model.parameters(), lr=1e-2)

    mll_time_list = []
    rmse_list = []
    for i in range(500, train_x.shape[0]):
        model.zero_grad()
        model.train()

        start = time.time()
        with skip_logdet_forward(True), max_cholesky_size(
                args.cholesky_size), use_toeplitz(args.toeplitz):
            loss = -mll(model(*model.train_inputs), model.train_targets).sum()

        loss.backward()
        mll_time = start - time.time()

        optimizer.step()
        model.zero_grad()
        optimizer.zero_grad()
        start = time.time()
        if not args.reset_training_data:
            with torch.no_grad():
                model.eval()
                model.posterior(train_x[i].unsqueeze(0))
                model = model.condition_on_observations(
                    X=train_x[i].unsqueeze(0),
                    Y=train_y[i].view(1, 1),
                    noise=train_y_var[i].view(-1, 1),
                )
        else:
            model.set_train_data(train_x[:i], train_y[:i], strict=False)
            model.likelihood.noise = train_y_var[:i].t()

        fantasy_time = start - time.time()
        mll_time_list.append([-mll_time, -fantasy_time])

        if i % 25 == 0:
            start = time.time()
            model.eval()
            model.zero_grad()

            with detach_test_caches(), max_cholesky_size(10000):
                pred_dist = model(train_x)
            end = time.time()

            rmse = (((pred_dist.mean -
                      train_y.view(-1))**2).mean().sqrt().item())
            rmse_list.append([rmse, end - start])
            print("Current RMSE: ", rmse)
            #print(
            #    "Outputscale: ", model.covar_module.base_kernel.raw_outputscale
            #)
            #print(
            #    "Lengthscale: ",
            #    model.covar_module.base_kernel.base_kernel.raw_lengthscale,
            #)

            print("Step: ", i, "Train Loss: ", loss)
            optimizer.param_groups[0]["lr"] *= 0.9

    torch.save({
        "training": mll_time_list,
        "predictions": rmse_list
    }, args.output)