def __init__(self, input_dim, context_dim, hidden_dims, activation=nn.ELU(), iaf_parametrization=False): super(MAF, self).__init__() self.arn = ConditionalAutoRegressiveNN( input_dim, context_dim, hidden_dims, nonlinearity=activation, skip_connections=False, ) # This is a bit of a hack: it registers the perturbation attribute of # the ConditionalAutoregressiveNN as a buffer, so that it will be saved # when state_dict() is called. We call permutation explicitly below, so # this quantity needs to be saved. # # A less hacky approach would be to save each permutation as a # hyperparameter and then pass at build time. This is quicker though. perm = self.arn.permutation.clone().detach() del self.arn.permutation self.arn.register_buffer('permutation', perm) self.iaf_parametrization = iaf_parametrization self.initial_bias = 1.0
def _test_jacobian(self, input_dim, observed_dim, hidden_dim, param_dim): jacobian = torch.zeros(input_dim, input_dim) if observed_dim > 0: arn = ConditionalAutoRegressiveNN(input_dim, observed_dim, [hidden_dim], param_dims=[param_dim]) else: arn = AutoRegressiveNN(input_dim, [hidden_dim], param_dims=[param_dim]) def nonzero(x): return torch.sign(torch.abs(x)) x = torch.randn(1, input_dim) y = torch.randn(1, observed_dim) for output_index in range(param_dim): for j in range(input_dim): for k in range(input_dim): epsilon_vector = torch.zeros(1, input_dim) epsilon_vector[0, j] = self.epsilon if observed_dim > 0: delta = (arn(x + 0.5 * epsilon_vector, y) - arn(x - 0.5 * epsilon_vector, y)) / self.epsilon else: delta = (arn(x + 0.5 * epsilon_vector) - arn(x - 0.5 * epsilon_vector)) / self.epsilon jacobian[j, k] = float(delta[0, output_index, k]) permutation = arn.get_permutation() permuted_jacobian = jacobian.clone() for j in range(input_dim): for k in range(input_dim): permuted_jacobian[j, k] = jacobian[permutation[j], permutation[k]] lower_sum = torch.sum(torch.tril(nonzero(permuted_jacobian), diagonal=0)) assert lower_sum == float(0.0)
def conditional_affine_autoregressive(input_dim, context_dim, hidden_dims=None, **kwargs): """ A helper function to create an :class:`~pyro.distributions.transforms.ConditionalAffineAutoregressive` object that takes care of constructing a dense network with the correct input/output dimensions. :param input_dim: Dimension of input variable :type input_dim: int :param context_dim: Dimension of context variable :type context_dim: int :param hidden_dims: The desired hidden dimensions of the dense network. Defaults to using [10*input_dim] :type hidden_dims: list[int] :param log_scale_min_clip: The minimum value for clipping the log(scale) from the autoregressive NN :type log_scale_min_clip: float :param log_scale_max_clip: The maximum value for clipping the log(scale) from the autoregressive NN :type log_scale_max_clip: float :param sigmoid_bias: A term to add the logit of the input when using the stable tranform. :type sigmoid_bias: float :param stable: When true, uses the alternative "stable" version of the transform (see above). :type stable: bool """ if hidden_dims is None: hidden_dims = [10 * input_dim] nn = ConditionalAutoRegressiveNN(input_dim, context_dim, hidden_dims) return ConditionalAffineAutoregressive(nn, **kwargs)
def loadConditionalIAFFlows(self, num_iafs, context_dims, z_dim=100, iaf_dim=320): ar = ConditionalAutoRegressiveNN(z_dim, context_dims, [iaf_dim, iaf_dim]) flow_fn = lambda: CondInverseAutoregressiveFlowStable(ar) self.use_cond_flow = True self.loadFlows(num_iafs, flow_fn)
def conditional_spline_autoregressive(input_dim, context_dim, hidden_dims=None, count_bins=8, bound=3.0, order="linear"): r""" A helper function to create a :class:`~pyro.distributions.transforms.ConditionalSplineAutoregressive` object that takes care of constructing an autoregressive network with the correct input/output dimensions. :param input_dim: Dimension of input variable :type input_dim: int :param context_dim: Dimension of context variable :type context_dim: int :param hidden_dims: The desired hidden dimensions of the autoregressive network. Defaults to using [input_dim * 10, input_dim * 10] :type hidden_dims: list[int] :param count_bins: The number of segments comprising the spline. :type count_bins: int :param bound: The quantity :math:`K` determining the bounding box, :math:`[-K,K]\times[-K,K]`, of the spline. :type bound: float :param order: One of ['linear', 'quadratic'] specifying the order of the spline. :type order: string """ if hidden_dims is None: hidden_dims = [input_dim * 10, input_dim * 10] param_dims = [count_bins, count_bins, count_bins - 1, count_bins] arn = ConditionalAutoRegressiveNN(input_dim, context_dim, hidden_dims, param_dims=param_dims) return ConditionalSplineAutoregressive(input_dim, arn, count_bins=count_bins, bound=bound, order=order)
def conditional_neural_autoregressive(input_dim, context_dim, hidden_dims=None, activation='sigmoid', width=16): """ A helper function to create a :class:`~pyro.distributions.transforms.ConditionalNeuralAutoregressive` object that takes care of constructing an autoregressive network with the correct input/output dimensions. :param input_dim: Dimension of input variable :type input_dim: int :param context_dim: Dimension of context variable :type context_dim: int :param hidden_dims: The desired hidden dimensions of the autoregressive network. Defaults to using [3*input_dim + 1] :type hidden_dims: list[int] :param activation: Activation function to use. One of 'ELU', 'LeakyReLU', 'sigmoid', or 'tanh'. :type activation: string :param width: The width of the "multilayer perceptron" in the transform (see paper). Defaults to 16 :type width: int """ if hidden_dims is None: hidden_dims = [3 * input_dim + 1] arn = ConditionalAutoRegressiveNN(input_dim, context_dim, hidden_dims, param_dims=[width] * 3) return ConditionalNeuralAutoregressive(arn, hidden_units=width, activation=activation)
class MAF(torch.nn.Module): """Class containing a single MAF block. It can be used also as an IAF block. The forward pass is always the fast direction. For a MAF, this means that it actually computes the inverse transformation f^{-1}(x). The inverse pass is the slow pass (i.e., for the MAF, f(u)). There are two parametrizations that are implemented: standard: x = f(u) = u * exp(alpha(x, y)) + mu(alpha(x, y)) where alpha(x, y) and mu(x, y) are autoregressive on x, and y is a context variable. I.e., there can be arbitrary dependence on y. iaf: u = f^{-1}(x) = x * sigma(x, y) + (1 - sigma(x, y)) * m(x, y) where sigma(x, y) = sigmoid(s(x, y)) and where s(x, y) and m(x, y) are autoregressive on x, and y is a context variable. The forward pass, therefore, returns f^{-1}(x), and the inverse pass returns f(u). In addition, both passes return log|det J(f)|, where J(f) is the Jacobian of f(u). Args: input_dim dim(x) context_dim dim(y) hidden_dims list of dimensions for hidden layers of autoregressive network (MADE) nonlinearity activation function for autoregressive network iaf_parametrization whether to use iaf parametrization. If False, uses standard parametrization. """ def __init__(self, input_dim, context_dim, hidden_dims, activation=nn.ELU(), iaf_parametrization=False): super(MAF, self).__init__() self.arn = ConditionalAutoRegressiveNN( input_dim, context_dim, hidden_dims, nonlinearity=activation, skip_connections=False, ) # This is a bit of a hack: it registers the perturbation attribute of # the ConditionalAutoregressiveNN as a buffer, so that it will be saved # when state_dict() is called. We call permutation explicitly below, so # this quantity needs to be saved. # # A less hacky approach would be to save each permutation as a # hyperparameter and then pass at build time. This is quicker though. perm = self.arn.permutation.clone().detach() del self.arn.permutation self.arn.register_buffer('permutation', perm) self.iaf_parametrization = iaf_parametrization self.initial_bias = 1.0 def inverse(self, u, context): """This is the forward MAF.""" x_size = u.size()[:-1] perm = self.arn.permutation input_size = u.size(-1) x = [torch.zeros(x_size, device=u.device)] * input_size # Expensive for idx in perm: if self.iaf_parametrization: m, s = self.arn(torch.stack(x, dim=-1), context) sigma = torch.sigmoid( s + self.initial_bias * torch.ones_like(s)) x[idx] = ((u[..., idx] - m[..., idx]) / sigma[..., idx] + m[..., idx]) else: mu, alpha = self.arn(torch.stack(x, dim=-1), context) x[idx] = (u[..., idx] * torch.exp(alpha[..., idx]) + mu[..., idx]) x = torch.stack(x, dim=-1) # log|det df/du| if self.iaf_parametrization: log_det_Jf = - (torch.log(sigma)).sum(-1) else: log_det_Jf = alpha.sum(-1) return x, log_det_Jf def forward(self, x, context): """This is really the inverse pass of the flow.""" if self.iaf_parametrization: m, s = self.arn(x, context) # Initial bias of s to improve training. Initially, the IAF # does not effect a large change in x. sigma = torch.sigmoid(s + self.initial_bias * torch.ones_like(s)) u = x * sigma + (torch.ones_like(sigma) - sigma) * m log_det_Jf = - (torch.log(sigma)).sum(-1) else: mu, alpha = self.arn(x, context) u = (x - mu) * torch.exp(-alpha) log_det_Jf = alpha.sum(-1) # Return the result of the inverse flow, along with log|det J(f)| return u, log_det_Jf