def _choose_var_strat(model, var_strat, var_dist, ind_pt, learn_ind=True, num_classes=None): if var_strat == "multi_task": try: num_classes = int(num_classes) except TypeError: raise RuntimeError( "Multi-task variational strategy must specify integer num_classes" ) return gpytorch.variational.MultitaskVariationalStrategy( VariationalStrategy(model, ind_pt, var_dist, learn_inducing_locations=learn_ind), num_tasks=num_classes, task_dim=0, ) else: return VariationalStrategy(model, ind_pt, var_dist, learn_inducing_locations=learn_ind)
def __init__(self, input_dim, num_inducing, hidden_sizes=[32, 32], out_dim=None, mean=None, covar=None): if out_dim is None: batch_shape = torch.Size([]) else: batch_shape = torch.Size([out_dim]) if out_dim is None: inducing_points = torch.rand(num_inducing, hidden_sizes[-1]) else: inducing_points = torch.rand(out_dim, num_inducing, hidden_sizes[-1]) variational_distribution = CholeskyVariationalDistribution( inducing_points.size(-2), batch_shape=batch_shape ) # Use LMCVariationalStrategy for introducing correlation among tasks if out_dim is None: variational_strategy = VariationalStrategy( self, inducing_points, variational_distribution, learn_inducing_locations=True ) else: variational_strategy = IndependentMultitaskVariationalStrategy( VariationalStrategy( self, inducing_points, variational_distribution, learn_inducing_locations=True ), num_tasks=out_dim, ) super(DeepGraphKernel, self).__init__(variational_strategy) gcn_layers = nn.ModuleList() layer_input_out_dims = list(zip( [input_dim] + hidden_sizes[:-1], hidden_sizes )) for i, (in_features, out_features) in enumerate(layer_input_out_dims): gcn_layers.append( GraphConv(in_features, out_features, activation=nn.ReLU()) ) self.mean_module = gpytorch.means.LinearMean(hidden_sizes[-1], batch_shape=torch.Size([out_dim])) if mean is None else mean self.covar_module = gpytorch.kernels.PolynomialKernel(power=4, batch_shape=batch_shape) if covar is None else covar # self.covar_module.offset = 5 self.num_inducing = inducing_points.size(-2) self.gcn = gcn_layers self.dropout = torch.nn.Dropout(0.5)
def __init__(self, train_x, train_y, likelihood, Z_init): # Locations Z corresponding to u, they can be randomly initialized or # regularly placed. self.inducing_inputs = Z_init self.num_inducing = len(Z_init) self.n = len(train_y) self.data_dim = train_x.shape[1] # Sparse Variational Formulation q_u = CholeskyVariationalDistribution(self.num_inducing) q_f = VariationalStrategy(self, self.inducing_inputs, q_u, learn_inducing_locations=True) super(BayesianStochasticVariationalGP, self).__init__(q_f) self.likelihood = likelihood self.train_x = train_x self.train_y = train_y self.mean_module = ZeroMean() self.base_covar_module = ScaleKernel(RBFKernel()) self.covar_module = gpytorch.kernels.ScaleKernel( gpytorch.kernels.RBFKernel()) # Hyperparameter Variational distribution hyper_prior_mean = torch.Tensor([0]) hyper_dim = len(hyper_prior_mean) log_hyper_prior = NormalPrior(hyper_prior_mean, torch.ones_like(hyper_prior_mean)) self.log_theta = LogHyperVariationalDist(hyper_dim, log_hyper_prior, self.n, self.data_dim)
def __init__(self, train_x, train_y, likelihood, learned_kernel=None, learned_mean=None, mean_module=None, covar_module=None, beta=1.0): self.beta = beta self.n_train_samples = train_x.shape[0] variational_distribution = CholeskyVariationalDistribution( self.n_train_samples) variational_strategy = VariationalStrategy( self, train_x, variational_distribution, learn_inducing_locations=False) super().__init__(variational_strategy) if mean_module is None: self.mean_module = gpytorch.means.ZeroMean() else: self.mean_module = mean_module self.covar_module = covar_module self.learned_kernel = learned_kernel self.learned_mean = learned_mean self.likelihood = likelihood
def __init__(self, input_dims, output_dims, num_inducing=128, mean_type="constant"): if output_dims is None: inducing_points = torch.randn(num_inducing, input_dims) batch_shape = torch.Size([]) else: inducing_points = torch.randn(output_dims, num_inducing, input_dims) batch_shape = torch.Size([output_dims]) variational_distribution = CholeskyVariationalDistribution( num_inducing_points=num_inducing, batch_shape=batch_shape) variational_strategy = VariationalStrategy( self, inducing_points, variational_distribution, learn_inducing_locations=True) super().__init__(variational_strategy, input_dims=input_dims, output_dims=output_dims) if mean_type == "constant": self.mean = ConstantMean(batch_shape=batch_shape) else: self.mean = LinearMean(input_dims) self.covar = ScaleKernel(RBFKernel(ard_num_dims=input_dims, batch_shape=batch_shape), batch_shape=batch_shape, ard_num_dims=None)
def __init__(self, n_inducing): # number of inducing points and optimisation samples assert isinstance(n_inducing, int) self.m = n_inducing # variational distribution and strategy # NOTE: we put random normal dumby inducing points # here, which we'll change in self.fit vardist = CholeskyVariationalDistribution(self.m) varstra = VariationalStrategy(self, torch.randn((self.m, 2)), vardist, learn_inducing_locations=True) VariationalGP.__init__(self, varstra) # kernel — implemented in self.forward self.mean = ConstantMean() self.cov = MaternKernel(ard_num_dims=2) # self.cov = GaussianSymmetrizedKLKernel() self.cov = ScaleKernel(self.cov, ard_num_dims=2) # likelihood self.likelihood = GaussianLikelihood() # hardware allocation self.device = torch.device( 'cuda' if torch.cuda.is_available() else 'cpu') self.likelihood.to(self.device).float() self.to(self.device).float()
def __init__(self, input_dims, output_dims, num_inducing=128, mean_type='constant'): if output_dims is None: inducing_points = torch.randn(num_inducing, input_dims) batch_shape = torch.Size([]) else: inducing_points = torch.randn(output_dims, num_inducing, input_dims) batch_shape = torch.Size([output_dims]) variational_distribution = CholeskyVariationalDistribution( num_inducing_points=num_inducing, batch_shape=batch_shape) variational_strategy = VariationalStrategy( self, inducing_points, variational_distribution, learn_inducing_locations=True) super(ApproximateDeepGPHiddenLayer, self).__init__(variational_strategy, input_dims, output_dims) if mean_type == 'constant': self.mean_module = ConstantMean(batch_shape=batch_shape) else: self.mean_module = LinearMean(input_dims) self.covar_module = ScaleKernel(RBFKernel(batch_shape=batch_shape, ard_num_dims=input_dims), batch_shape=batch_shape, ard_num_dims=None) self.linear_layer = Linear(input_dims, 1)
def __init__(self, input_dims, output_dims, num_inducing=128): if output_dims is None: inducing_points = torch.randn(num_inducing, input_dims) if torch.cuda.is_available(): inducing_points = inducing_points.cuda() batch_shape = torch.Size([]) else: inducing_points = torch.randn(output_dims, num_inducing, input_dims) if torch.cuda.is_available(): inducing_points = inducing_points.cuda() batch_shape = torch.Size([output_dims]) variational_distribution = CholeskyVariationalDistribution( num_inducing_points=num_inducing, batch_shape=batch_shape) variational_strategy = VariationalStrategy( self, inducing_points, variational_distribution, learn_inducing_locations=True) super().__init__(variational_strategy, input_dims, output_dims) self.mean_module = ConstantMean(batch_shape=batch_shape) self.covar_module = ScaleKernel(RBFKernel(ard_num_dims=None), ard_num_dims=None)
def __init__(self, train_x, lengthscale=None): variational_distribution = CholeskyVariationalDistribution(train_x.size(0)) variational_strategy = VariationalStrategy(self, train_x, variational_distribution, learn_inducing_locations=True) super(VSGPClassificationModel, self).__init__(variational_strategy) self.mean_module = gpytorch.means.ConstantMean() if lengthscale is not None: self.covar_module = gpytorch.kernels.ScaleKernel(gpytorch.kernels.RBFKernel(lengthscale=lengthscale)) else: self.covar_module = gpytorch.kernels.ScaleKernel(gpytorch.kernels.RBFKernel())
def __init__(self, train_x): if train_x.dim() == 3: variational_distribution = CholeskyVariationalDistribution(train_x.size(-2), batch_size=train_x.size(0)) else: variational_distribution = CholeskyVariationalDistribution(train_x.size(-2)) variational_strategy = VariationalStrategy(self, train_x, variational_distribution) super(GPClassificationModel, self).__init__(variational_strategy) self.mean_module = gpytorch.means.ConstantMean() self.covar_module = gpytorch.kernels.ScaleKernel(gpytorch.kernels.RBFKernel())
def __init__(self, train_x, likelihood, feature_extractor): variational_distribution = CholeskyVariationalDistribution( train_x.size(0)) variational_strategy = VariationalStrategy(self, train_x, variational_distribution) super(DKL, self).__init__(variational_strategy) self.mean_module = gpytorch.means.ConstantMean() self.covar_module = gpytorch.kernels.ScaleKernel( gpytorch.kernels.MaternKernel()) self.feature_extractor = feature_extractor
def __init__(self): init_inducing = torch.randn(100, 10) variational_distribution = CholeskyVariationalDistribution( init_inducing.size(0)) variational_strategy = VariationalStrategy( self, init_inducing, variational_distribution, learn_inducing_locations=True) super().__init__(variational_strategy)
def __init__(self, inducing_points): variational_distribution = CholeskyVariationalDistribution(inducing_points.size(-2), batch_size=2) variational_strategy = VariationalStrategy( self, inducing_points, variational_distribution, learn_inducing_locations=True ) super(SVGPRegressionModel, self).__init__(variational_strategy) self.mean_module = gpytorch.means.ConstantMean() self.covar_module = gpytorch.kernels.ScaleKernel( gpytorch.kernels.RBFKernel(lengthscale_prior=gpytorch.priors.SmoothedBoxPrior(0.001, 1.0, sigma=0.1)) )
def __init__(self, train_x, likelihood): variational_distribution = CholeskyVariationalDistribution(train_x.size(0)) variational_strategy = VariationalStrategy(self, train_x, variational_distribution) super(Spatiotemporal_GP, self).__init__(variational_strategy) self.mean_module = gpytorch.means.ZeroMean() self.covar_season = gpytorch.kernels.PeriodicKernel() self.covar_week = gpytorch.kernels.RBFKernel() self.covar_spatial = gpytorch.kernels.MaternKernel() self.covar_remote = gpytorch.kernels.MaternKernel()
def __init__(self, inducing_points): variational_distribution = CholeskyVariationalDistribution(inducing_points.size(0)) variational_strategy = VariationalStrategy( self, inducing_points, variational_distribution, learn_inducing_locations=True ) super(GPClassificationModel, self).__init__(variational_strategy) self.mean_module = gpytorch.means.ZeroMean() self.covar_module = gpytorch.kernels.ScaleKernel(gpytorch.kernels.RBFKernel()) self.quadrature = GaussHermiteQuadrature1D()
def __init__(self, inducing_points, in_dim, out_dim=None, Q=8, mean=None, covar=None): """[summary] :param inducing_points: [description] :type inducing_points: [type] :param mean: [description] :type mean: [type] :param covar: [description] :type covar: [type] :param num_output_dim: [description] :type num_output_dim: [type] :param full_x: [description], defaults to None :type full_x: [type], optional :param sparse_adj_mat: [description], defaults to None :type sparse_adj_mat: [type], optional """ if out_dim is None: batch_shape = torch.Size([]) else: batch_shape = torch.Size([out_dim]) # variational_distribution = CholeskyVariationalDistribution( # inducing_points.size(-2), # batch_shape=batch_shape # ) variational_distribution = MeanFieldVariationalDistribution( num_inducing_points=inducing_points.size(-2), batch_shape=torch.Size([out_dim]) if out_dim is not None else torch.Size([])) # Seems like it might be better to use independent multitask for single layer GP # And variational strategy for deep GP. They don't seem to play well together for some reason variational_strategy = VariationalStrategy( self, inducing_points, variational_distribution, learn_inducing_locations=True) super(GraphDSPPLayer, self).__init__(variational_strategy, in_dim, out_dim, Q) # TODO: make this modifiable self.mean_module = gpytorch.means.LinearMean( in_dim, batch_shape=torch.Size([out_dim ])) if mean is None else mean self.covar_module = gpytorch.kernels.PolynomialKernel( power=4, batch_shape=batch_shape) if covar is None else covar self.num_inducing = inducing_points.size(-2)
def __init__(self, input_dim, feature_dim, label_dim, hidden_width, hidden_depth, n_inducing, batch_size, max_epochs_since_update, **kwargs): """ Args: input_dim (int) feature_dim (int): dimension of deep kernel features label_dim (int) hidden_depth (int) hidden_width (int or list) n_inducing (int): number of inducing points for variational approximation batch_size (int) max_epochs_since_update (int) """ params = locals() del params['self'] self.__dict__ = params super().__init__() noise_constraint = GreaterThan(1e-4) self.likelihood = GaussianLikelihood(batch_shape=torch.Size( [label_dim]), noise_constraint=noise_constraint) self.nn = FCNet(input_dim, output_dim=label_dim, hidden_width=hidden_width, hidden_depth=hidden_depth, batch_norm=True) self.batch_norm = torch.nn.BatchNorm1d(feature_dim) self.mean_module = ConstantMean(batch_shape=torch.Size([label_dim])) base_kernel = RBFKernel(batch_shape=torch.Size([label_dim]), ard_num_dims=feature_dim) self.covar_module = ScaleKernel(base_kernel, batch_shape=torch.Size([label_dim])) variational_dist = MeanFieldVariationalDistribution( num_inducing_points=n_inducing, batch_shape=torch.Size([label_dim])) inducing_points = torch.randn(n_inducing, feature_dim) self.variational_strategy = VariationalStrategy( self, inducing_points, variational_dist, learn_inducing_locations=True) # initialize preprocessers self.register_buffer("input_mean", torch.zeros(input_dim)) self.register_buffer("input_std", torch.ones(input_dim)) self.register_buffer("label_mean", torch.zeros(label_dim)) self.register_buffer("label_std", torch.ones(label_dim)) self._train_ckpt = deepcopy(self.state_dict()) self._eval_ckpt = deepcopy(self.state_dict())
def __init__(self, inducing_points: torch.Tensor): variational_distribution = CholeskyVariationalDistribution( inducing_points.size(0)) variational_strategy = VariationalStrategy( self, inducing_points, variational_distribution, learn_inducing_locations=True) super(GeneralApproximateGP, self).__init__(variational_strategy) self.mean_module = gpytorch.means.ConstantMean() self.covar_module = gpytorch.kernels.ScaleKernel( gpytorch.kernels.RBFKernel())
def __init__(self, inducing_points, kernel, likelihood): variational_distribution = CholeskyVariationalDistribution( inducing_points.size(0)) variational_strategy = VariationalStrategy( self, inducing_points, variational_distribution, learn_inducing_locations=True) super(SVGPRegressionModel, self).__init__(variational_strategy) self.mean_module = gpytorch.means.ConstantMean() self.covar_module = kernel self.likelihood = likelihood
def __init__(self, inducing_points): variational_distribution = CholeskyVariationalDistribution( inducing_points.size(0)) variational_strategy = VariationalStrategy( self, inducing_points, variational_distribution, learn_inducing_locations=False) super(GPModel, self).__init__(variational_strategy) self.mean_module = gpytorch.means.ConstantMean() self.covar_module = gpytorch.kernels.ScaleKernel( gpytorch.kernels.RBFKernel())
def _reset_variational_strategy(self): inducing_points = self._select_inducing_points( method=self.inducing_point_method) variational_distribution = CholeskyVariationalDistribution( inducing_points.size(0), batch_shape=torch.Size([self._batch_size])) self.variational_strategy = VariationalStrategy( self, inducing_points, variational_distribution, learn_inducing_locations=False, )
def __init__(self, num_inducing_points=64, name_prefix="mixture_gp"): self.name_prefix = name_prefix inducing_points = torch.linspace(0, 1, num_inducing_points) variational_distribution = CholeskyVariationalDistribution( num_inducing_points) variational_strategy = VariationalStrategy(self, inducing_points, variational_distribution) super().__init__(variational_strategy) self.mean = ConstantMean() self.covar = ScaleKernel(RBFKernel())
def __init__(self, inducing_points, in_dim, out_dim=None, mean=None, covar=None, is_output_layer=True): """[summary] :param inducing_points: [description] :type inducing_points: [type] :param mean: [description] :type mean: [type] :param covar: [description] :type covar: [type] :param num_output_dim: [description] :type num_output_dim: [type] :param full_x: [description], defaults to None :type full_x: [type], optional :param sparse_adj_mat: [description], defaults to None :type sparse_adj_mat: [type], optional """ if out_dim is None: batch_shape = torch.Size([]) else: batch_shape = torch.Size([out_dim]) variational_distribution = CholeskyVariationalDistribution( inducing_points.size(-2), batch_shape=batch_shape) # LMCVariationalStrategy for introducing correlation among tasks variational_strategy = IndependentMultitaskVariationalStrategy( VariationalStrategy(self, inducing_points, variational_distribution, learn_inducing_locations=True), num_tasks=out_dim, ) super(VariationalGraphGP, self).__init__(variational_strategy) # self.mean_module = gpytorch.means.ConstantMean(batch_shape=batch_shape) if mean is None else mean self.mean_module = gpytorch.means.LinearMean( in_dim, batch_shape=torch.Size([out_dim ])) if mean is None else mean self.covar_module = gpytorch.kernels.PolynomialKernel( power=4, batch_shape=batch_shape) if covar is None else covar self.num_inducing = inducing_points.size(-2) self.is_output_layer = is_output_layer
def __init__(self, inducing_points, ex_var_dim, kernel, **ker_conf): variational_distribution = CholeskyVariationalDistribution( inducing_points.size(0) ) variational_strategy = VariationalStrategy( self, inducing_points, variational_distribution, learn_inducing_locations=True ) super(ApproximateGPModel, self).__init__(variational_strategy) self.mean_module = ConstantMean() _ker_conf = {'ard_num_dims': ex_var_dim} _ker_conf.update(ker_conf) self.covar_module = set_kernel(kernel, **_ker_conf)
def __init__( self, train_x: torch.Tensor, train_y: torch.Tensor, inducing_points: torch.Tensor, scales: Union[torch.Tensor, float] = 1.0, mean_module: Optional[Mean] = None, covar_module: Optional[Kernel] = None, fixed_prior_mean: Optional[float] = None, ) -> None: variational_distribution = CholeskyVariationalDistribution( inducing_points.size(0)) variational_distribution.to(train_x) variational_strategy = VariationalStrategy( model=self, inducing_points=inducing_points, variational_distribution=variational_distribution, learn_inducing_locations=False, ) super(MixedDerivativeVariationalGP, self).__init__(variational_strategy) # Set the mean if specified to if mean_module is None: self.mean_module = ConstantMeanPartialObsGrad() else: self.mean_module = mean_module if fixed_prior_mean is not None: self.mean_module.constant.requires_grad_(False) self.mean_module.constant.copy_( torch.tensor([fixed_prior_mean], dtype=train_x.dtype)) if covar_module is None: self.base_kernel = RBFKernelPartialObsGrad( ard_num_dims=train_x.shape[-1] - 1, lengthscale_prior=GammaPrior(3.0, 6.0 / scales), ) self.covar_module = ScaleKernel(self.base_kernel, outputscale_prior=GammaPrior( 2.0, 0.15)) else: self.covar_module = covar_module self._num_outputs = 1 self.train_inputs = (train_x, ) self.train_targets = train_y self(train_x) # Necessary for CholeskyVariationalDistribution
def __init__(self, inducting_points): ''' As a default, we'll use the default VariationalStrategy class with a CholeskyVariationalDistribution. The CholeskyVariationalDistribution class allows S to be on any positive semidefinite matrix. This is the most general/expressive option for approximate GPs ''' variational_distribution = CholeskyVariationalDistribution( inducting_points.size(-2)) variational_strategy = VariationalStrategy( self, inducting_points, variational_distribution, learn_inducing_locations=True) super().__init__(variational_strategy) self.mean = ConstantMean() self.covar = ScaleKernel(RBFKernel())
def __init__(self, inducting_points): ''' A more extreme method of reducing parameters is to get rid of S entirely. This corresponds to learning a delta distribution u=m rather than a multivariate Normal distribution for u. In other words, this corresponds to performing MAP estimation rather than variational inference. ''' variational_distribution = DeltaVariationalDistribution( inducting_points.size(-2)) variational_strategy = VariationalStrategy( self, inducting_points, variational_distribution, learn_inducing_locations=True) super().__init__(variational_strategy) self.mean = ConstantMean() self.covar = ScaleKernel(RBFKernel())
def __init__(self, inducing_points): if (inducing_points.ndim == 2): dims = inducing_points.shape[1] else: dims = 1 variational_distribution = CholeskyVariationalDistribution( inducing_points.size(0)) variational_strategy = VariationalStrategy( self, inducing_points, variational_distribution, learn_inducing_locations=True) super(VarsparseGPModel, self).__init__(variational_strategy) self.mean_module = gpytorch.means.ZeroMean() self.covar_module = gpytorch.kernels.ScaleKernel( gpytorch.kernels.RBFKernel(ard_num_dims=dims))
def __init__(self, inducting_points): ''' One way to reduce the number of parameters is to restrict that $\mathbf S$ is only diagonal. This is less expressive, but the number of parameters is now linear in $m$ instead of quadratic. All we have to do is take the previous example, and change CholeskyVariationalDistribution S to MeanFieldVariationalDistribution S. ''' variational_distribution = MeanFieldVariationalDistribution( inducting_points.size(-2)) variational_strategy = VariationalStrategy( self, inducting_points, variational_distribution, learn_inducing_locations=True) super().__init__(variational_strategy) self.mean = ConstantMean() self.covar = ScaleKernel(RBFKernel())
def __init__(self, inducing_points: Tensor, covar_module: Optional[gpytorch.kernels.Kernel] = None): variational_distribution = CholeskyVariationalDistribution( inducing_points.size(0)) variational_strategy = VariationalStrategy( self, inducing_points, variational_distribution, learn_inducing_locations=True) super(SVGP, self).__init__(variational_strategy) self.mean_module = gpytorch.means.ConstantMean() if covar_module is None: self.covar_module = gpytorch.kernels.ScaleKernel( gpytorch.kernels.RBFKernel()) else: self.covar_module = covar_module