def __init__(self, temperature, probs=None, logits=None, validate_args=None): super(RelaxedBernoulli, self).__init__(LogitRelaxedBernoulli(temperature, probs, logits), SigmoidTransform(), validate_args=validate_args)
def get_transforms(cache_size): transforms = [ AbsTransform(cache_size=cache_size), ExpTransform(cache_size=cache_size), PowerTransform(exponent=2, cache_size=cache_size), PowerTransform(exponent=torch.tensor(5.).normal_(), cache_size=cache_size), PowerTransform(exponent=torch.tensor(5.).normal_(), cache_size=cache_size), SigmoidTransform(cache_size=cache_size), TanhTransform(cache_size=cache_size), AffineTransform(0, 1, cache_size=cache_size), AffineTransform(1, -2, cache_size=cache_size), AffineTransform(torch.randn(5), torch.randn(5), cache_size=cache_size), AffineTransform(torch.randn(4, 5), torch.randn(4, 5), cache_size=cache_size), SoftmaxTransform(cache_size=cache_size), SoftplusTransform(cache_size=cache_size), StickBreakingTransform(cache_size=cache_size), LowerCholeskyTransform(cache_size=cache_size), CorrCholeskyTransform(cache_size=cache_size), ComposeTransform([ AffineTransform(torch.randn(4, 5), torch.randn(4, 5), cache_size=cache_size), ]), ComposeTransform([ AffineTransform(torch.randn(4, 5), torch.randn(4, 5), cache_size=cache_size), ExpTransform(cache_size=cache_size), ]), ComposeTransform([ AffineTransform(0, 1, cache_size=cache_size), AffineTransform(torch.randn(4, 5), torch.randn(4, 5), cache_size=cache_size), AffineTransform(1, -2, cache_size=cache_size), AffineTransform(torch.randn(4, 5), torch.randn(4, 5), cache_size=cache_size), ]), ReshapeTransform((4, 5), (2, 5, 2)), IndependentTransform( AffineTransform(torch.randn(5), torch.randn(5), cache_size=cache_size), 1), CumulativeDistributionTransform(Normal(0, 1)), ] transforms += [t.inv for t in transforms] return transforms
def train_moons(model, optimizer, n_epochs=10001, base_distr="normal", d=2, device=None, plot_val=True, plot_interval=1000, input_grad=False): if device is None: device = "cuda" if torch.cuda.is_available() else "cpu" if base_distr == "normal": distr = torch.distributions.multivariate_normal.MultivariateNormal( torch.zeros(d, device=device), torch.eye(d, device=device)) elif base_distr == "logistic": distr = TransformedDistribution( Uniform(torch.zeros(d, device=device), torch.ones(d, device=device)), SigmoidTransform().inv) else: raise ValueError("wrong base distribution") train_loss = [] pbar = trange(n_epochs) for i in pbar: #range(n_epochs): x, y = datasets.make_moons(128, noise=.1) x = torch.tensor(x, dtype=torch.float32, requires_grad=input_grad).to(device) model.train() z, log_det = model(x) l = loss(z[-1], log_det, distr, base_distr) l.backward() optimizer.step() optimizer.zero_grad() train_loss.append(l.item()) if i % 100 == 0: pbar.set_postfix_str(f"loss = {train_loss[-1]:.3f}") if plot_val and i % plot_interval == 0: print(i, train_loss[-1]) if input_grad: val_moons_grad(model, distr, i, device, base_distr) else: val_moons(model, distr, i, device, base_distr) return train_loss
def __init__(self, loc, scale, transforms, sigmoid_last=True, validate_args=None): if sigmoid_last: transforms.append(SigmoidTransform()) super(AutoregressiveFlow, self).__init__(Normal(loc, scale), transforms, validate_args=validate_args)
def true_model(design): w1 = torch.tensor([-1., 1.]) w2 = torch.tensor([-.5, .5, -.5, .5, -.5, 2., -2., 2., -2., 0.]) w = torch.cat([w1, w2], dim=-1) k = torch.tensor(.1) response_mean = rmv(design, w) base_dist = dist.Normal(response_mean, torch.tensor(1.)).to_event(1) k = k.expand(response_mean.shape) transforms = [AffineTransform(loc=0., scale=k), SigmoidTransform()] response_dist = dist.TransformedDistribution(base_dist, transforms) return pyro.sample("y", response_dist)
def test_overdispersed_asymptote(probs, overdispersion): total_count = 100000 # Check binomial_dist converges in distribution to LogitNormal. d1 = binomial_dist(total_count, probs) d2 = dist.TransformedDistribution( dist.Normal(math.log(probs / (1 - probs)), overdispersion), SigmoidTransform()) # CRPS is equivalent to the Cramer-von Mises test. # https://en.wikipedia.org/wiki/Cram%C3%A9r%E2%80%93von_Mises_criterion k = torch.arange(0., total_count + 1.) cdf1 = d1.log_prob(k).exp().cumsum(-1) cdf2 = d2.cdf(k / total_count) crps = (cdf1 - cdf2).pow(2).mean() assert crps < 0.02
def get_transforms(cache_size): transforms = [ AbsTransform(cache_size=cache_size), ExpTransform(cache_size=cache_size), PowerTransform(exponent=2, cache_size=cache_size), PowerTransform(exponent=torch.tensor(5.).normal_(), cache_size=cache_size), SigmoidTransform(cache_size=cache_size), TanhTransform(cache_size=cache_size), AffineTransform(0, 1, cache_size=cache_size), AffineTransform(1, -2, cache_size=cache_size), AffineTransform(torch.randn(5), torch.randn(5), cache_size=cache_size), AffineTransform(torch.randn(4, 5), torch.randn(4, 5), cache_size=cache_size), SoftmaxTransform(cache_size=cache_size), StickBreakingTransform(cache_size=cache_size), LowerCholeskyTransform(cache_size=cache_size), CorrCholeskyTransform(cache_size=cache_size), ComposeTransform([ AffineTransform(torch.randn(4, 5), torch.randn(4, 5), cache_size=cache_size), ]), ComposeTransform([ AffineTransform(torch.randn(4, 5), torch.randn(4, 5), cache_size=cache_size), ExpTransform(cache_size=cache_size), ]), ComposeTransform([ AffineTransform(0, 1, cache_size=cache_size), AffineTransform(torch.randn(4, 5), torch.randn(4, 5), cache_size=cache_size), AffineTransform(1, -2, cache_size=cache_size), AffineTransform(torch.randn(4, 5), torch.randn(4, 5), cache_size=cache_size), ]), ] transforms += [t.inv for t in transforms] return transforms
def sigmoid_example(design): n = design.shape[-2] random_effect_k = pyro.sample("k", dist.Gamma(2.*torch.ones(n), torch.tensor(2.))) random_effect_offset = pyro.sample("w2", dist.Normal(torch.tensor(0.), torch.ones(n))) w1 = pyro.sample("w1", dist.Normal(torch.tensor([1., -1.]), torch.tensor([10., 10.])).to_event(1)) mean = torch.matmul(design[..., :-2], w1.unsqueeze(-1)).squeeze(-1) offset_mean = mean + random_effect_offset base_dist = dist.Normal(offset_mean, torch.tensor(1.)).to_event(1) transforms = [ AffineTransform(loc=torch.tensor(0.), scale=random_effect_k), SigmoidTransform() ] response_dist = dist.TransformedDistribution(base_dist, transforms) y = pyro.sample("y", response_dist) return y
def test_logistic(): base_distribution = Uniform(0, 1) transforms = [SigmoidTransform().inv, AffineTransform(loc=torch.tensor([2.]), scale=torch.tensor([1.]))] model = TransformedDistribution(base_distribution, transforms) transform = Logistic(2., 1.) x = model.sample((4,)).reshape(-1, 1) assert torch.all(transform.log_prob(x)- model.log_prob(x).view(-1) < 1e-4) x = transform.sample(4) assert x.shape == (4, 1) assert torch.all(transform.log_prob(x)- model.log_prob(x).view(-1) < 1e-4) x = transform.sample(1) assert x.shape == (1, 1) assert torch.all(transform.log_prob(x)- model.log_prob(x).view(-1) < 1e-4) transform.get_parameters()
def __init__(self, obs_dim, act_dim, act_low, act_high, log_std_min=-20, log_std_max=20, hidden_size=256): super(GaussianActorNetwork, self).__init__(obs_dim, hidden_size=hidden_size) self._mean_layer = nn.Linear(self._hidden_size, act_dim) self._std_layer = nn.Linear(self._hidden_size, act_dim) self._act_dim = act_dim self._log_std_min = log_std_min self._log_std_max = log_std_max act_scale = torch.FloatTensor(act_high - act_low).to(device) act_low = torch.FloatTensor(act_low).to(device) self._transforms = [ SigmoidTransform(), AffineTransform(loc=act_low, scale=act_scale) ]
def __init__(self, prior, coupling, in_out_dim, mid_dim, hidden, bottleneck, compress, device, n_layers): """Initialize a NICE. Args: coupling: number of coupling layers. in_out_dim: input/output dimensions. mid_dim: number of units in a hidden layer. hidden: number of hidden layers. device: run on cpu or gpu """ super(NICE, self).__init__() self.device = device if prior == 'gaussian': self.prior = torch.distributions.Normal( torch.tensor(0.).to(device), torch.tensor(1.).to(device)) elif prior == 'logistic': self.prior = TransformedDistribution( Uniform( torch.tensor(0.).to(device), torch.tensor(1.).to(device)), [SigmoidTransform().inv, AffineTransform(loc=0., scale=1.)]) else: raise ValueError('Prior not implemented.') self.in_out_dim = in_out_dim self.coupling = coupling self.n_layers = n_layers layer = AdditiveCoupling if coupling == 'additive' else AffineCoupling self.coupling_layers = nn.ModuleList([ layer(in_out_dim, mid_dim, hidden, i % 2) for i in range(self.n_layers) ]).to(device) self.scale = Scaling(in_out_dim).to(device) self.bottleneck_factor = compress self.bottleneck_loss = nn.MSELoss() self.bottleneck = bottleneck
def bayesian_linear_model(design, w_means={}, w_sqrtlambdas={}, re_group_sizes={}, re_alphas={}, re_betas={}, obs_sd=None, alpha_0=None, beta_0=None, response="normal", response_label="y", k=None): """ A pyro model for Bayesian linear regression. If :param:`response` is `"normal"` this corresponds to a linear regression model :math:`Y = Xw + \\epsilon` with `\\epsilon`` i.i.d. zero-mean Gaussian. The observation standard deviation (:param:`obs_sd`) may be known or unknown. If unknown, it is assumed to follow an inverse Gamma distribution with parameters :param:`alpha_0` and :param:`beta_0`. If the response type is `"bernoulli"` we instead have :math:`Y \\sim Bernoulli(p)` with :math:`logit(p) = Xw` Given parameter groups in :param:`w_means` and :param:`w_sqrtlambda`, the fixed effects regression coefficient is taken to be Gaussian with mean `w_mean` and standard deviation given by :math:`\\sigma / \\sqrt{\\lambda}` corresponding to the normal inverse Gamma family. The random effects coefficient is constructed as follows. For each random effect group, standard deviations for that group are sampled from a normal inverse Gamma distribution. For each group, a random effect coefficient is then sampled from a zero mean Gaussian with those standard deviations. :param torch.Tensor design: a tensor with last two dimensions `n` and `p` corresponding to observations and features respectively. :param OrderedDict w_means: map from variable names to tensors of fixed effect means. :param OrderedDict w_sqrtlambdas: map from variable names to tensors of square root :math:`\\lambda` values for fixed effects. :param OrderedDict re_group_sizes: map from variable names to int representing the group size :param OrderedDict re_alphas: map from variable names to `torch.Tensor`, the tensor consists of Gamma dist :math:`\\alpha` values :param OrderedDict re_betas: map from variable names to `torch.Tensor`, the tensor consists of Gamma dist :math:`\\beta` values :param torch.Tensor obs_sd: the observation standard deviation (if assumed known). This is still relevant in the case of Bernoulli observations when coefficeints are sampled using `w_sqrtlambdas`. :param torch.Tensor alpha_0: Gamma :math:`\\alpha` parameter for unknown observation covariance. :param torch.Tensor beta_0: Gamma :math:`\\beta` parameter for unknown observation covariance. :param str response: Emission distribution. May be `"normal"` or `"bernoulli"`. :param str response_label: Variable label for response. :param torch.Tensor k: Only used for a sigmoid response. The slope of the sigmoid transformation. """ # design is size batch x n x p # tau is size batch batch_shape = design.shape[:-2] with ExitStack() as stack: for plate in iter_plates_to_shape(batch_shape): stack.enter_context(plate) if obs_sd is None: # First, sample tau (observation precision) tau_prior = dist.Gamma(alpha_0.unsqueeze(-1), beta_0.unsqueeze(-1)).to_event(1) tau = pyro.sample("tau", tau_prior) obs_sd = 1. / torch.sqrt(tau) elif alpha_0 is not None or beta_0 is not None: warnings.warn("Values of `alpha_0` and `beta_0` unused becased" "`obs_sd` was specified already.") obs_sd = obs_sd.expand(batch_shape + (1, )) # Build the regression coefficient w = [] # Allow different names for different coefficient groups # Process fixed effects for name, w_sqrtlambda in w_sqrtlambdas.items(): w_mean = w_means[name] # Place a normal prior on the regression coefficient w_prior = dist.Normal(w_mean, obs_sd / w_sqrtlambda).to_event(1) w.append(pyro.sample(name, w_prior)) # Process random effects for name, group_size in re_group_sizes.items(): # Sample `G` once for this group alpha, beta = re_alphas[name], re_betas[name] G_prior = dist.Gamma(alpha, beta).to_event(1) G = 1. / torch.sqrt(pyro.sample("G_" + name, G_prior)) # Repeat `G` for each group repeat_shape = tuple(1 for _ in batch_shape) + (group_size, ) u_prior = dist.Normal(torch.tensor(0.), G.repeat(repeat_shape)).to_event(1) w.append(pyro.sample(name, u_prior)) # Regression coefficient `w` is batch x p w = broadcast_cat(w) # Run the regressor forward conditioned on inputs prediction_mean = rmv(design, w) if response == "normal": # y is an n-vector: hence use .to_event(1) return pyro.sample( response_label, dist.Normal(prediction_mean, obs_sd).to_event(1)) elif response == "bernoulli": return pyro.sample( response_label, dist.Bernoulli(logits=prediction_mean).to_event(1)) elif response == "sigmoid": base_dist = dist.Normal(prediction_mean, obs_sd).to_event(1) # You can add loc via the linear model itself k = k.expand(prediction_mean.shape) transforms = [ AffineTransform(loc=torch.tensor(0.), scale=k), SigmoidTransform() ] response_dist = dist.TransformedDistribution(base_dist, transforms) return pyro.sample(response_label, response_dist) else: raise ValueError( "Unknown response distribution: '{}'".format(response))
Args: x: input tensor. reverse: True in inference mode, False in sampling mode. Returns: transformed tensor and log-determinant of Jacobian. """ scale = torch.exp(self.scale) + self.eps det = torch.sum(self.scale) return x * (scale if not reverse else scale.reciprocal()), det """Standard logistic distribution. """ logistic = TransformedDistribution(Uniform( 0, 1), [SigmoidTransform().inv, AffineTransform(loc=0., scale=1.)]) """NICE main model. """ class NICE(nn.Module): def __init__(self, prior, coupling, in_out_dim, mid_dim, hidden, bottleneck, compress, device, n_layers): """Initialize a NICE. Args: coupling: number of coupling layers. in_out_dim: input/output dimensions. mid_dim: number of units in a hidden layer. hidden: number of hidden layers.
def __init__(self, loc, scale): super().__init__( D.Uniform(torch.zeros_like(loc), 1), [SigmoidTransform().inv, AffineTransform(loc=loc, scale=scale)]) self.loc = loc
class TransformMixIn: """Mixin for providing pre- and post-processing capabilities to encoders. Class should have a ``transformation`` attribute to indicate how to preprocess data. """ # dict of PyTorch functions that transforms and inversely transforms values. # inverse entry required if "reverse" is not the "inverse" of "forward". TRANSFORMATIONS = { "log": dict(forward=_clipped_log, reverse=torch.exp, inverse_torch=ExpTransform()), "log1p": dict(forward=torch.log1p, reverse=torch.exp, inverse=torch.expm1, inverse_torch=Expm1Transform()), "logit": dict(forward=_clipped_logit, reverse=_clipped_sigmoid, inverse_torch=SigmoidTransform()), "count": dict(forward=_plus_one, reverse=F.softplus, inverse=_minus_one, inverse_torch=MinusOneTransform()), "softplus": dict(forward=softplus_inv, reverse=F.softplus, inverse_torch=SoftplusTransform()), "relu": dict(forward=_identity, reverse=F.relu, inverse=_identity, inverse_torch=ReLuTransform()), "sqrt": dict(forward=torch.sqrt, reverse=_square, inverse_torch=PowerTransform(exponent=2.0)), } @classmethod def get_transform( cls, transformation: Union[str, Dict[str, Callable]]) -> Dict[str, Callable]: """Return transformation functions. Args: transformation (Union[str, Dict[str, Callable]]): name of transformation or dictionary with transformation information. Returns: Dict[str, Callable]: dictionary with transformation functions (forward, reverse, inverse and inverse_torch) """ return cls.TRANSFORMATIONS.get(transformation, transformation) def preprocess( self, y: Union[pd.Series, pd.DataFrame, np.ndarray, torch.Tensor] ) -> Union[np.ndarray, torch.Tensor]: """ Preprocess input data (e.g. take log). Uses ``transform`` attribute to determine how to apply transform. Returns: Union[np.ndarray, torch.Tensor]: return rescaled series with type depending on input type """ if self.transformation is None: return y if isinstance(y, torch.Tensor): y = self.get_transform(self.transformation)["forward"](y) else: # convert first to tensor, then transform and then convert to numpy array if isinstance(y, (pd.Series, pd.DataFrame)): y = y.to_numpy() y = torch.as_tensor(y) y = self.get_transform(self.transformation)["forward"](y) y = np.asarray(y) return y def inverse_preprocess( self, y: Union[pd.Series, np.ndarray, torch.Tensor] ) -> Union[np.ndarray, torch.Tensor]: """ Inverse preprocess re-scaled data (e.g. take exp). Uses ``transform`` attribute to determine how to apply inverse transform. Returns: Union[np.ndarray, torch.Tensor]: return rescaled series with type depending on input type """ if self.transformation is None: pass elif isinstance(y, torch.Tensor): y = self.get_transform(self.transformation)["reverse"](y) else: # convert first to tensor, then transform and then convert to numpy array y = torch.as_tensor(y) y = self.get_transform(self.transformation)["reverse"](y) y = np.asarray(y) return y
def __init__(self, loc, scale, validate_args=None): base_dist = Normal(loc, scale) #super(LogitNormal, self).__init__(base_dist, SigmoidTransform(), validate_args=validate_args) # causes an error if using importlib.reload super().__init__(base_dist, SigmoidTransform(), validate_args=validate_args)