Exemple #1
0
    def __init__(self, input_dim, context_dim, hidden_dims,
                 activation=nn.ELU(), iaf_parametrization=False):
        super(MAF, self).__init__()
        self.arn = ConditionalAutoRegressiveNN(
            input_dim,
            context_dim,
            hidden_dims,
            nonlinearity=activation,
            skip_connections=False,
        )

        # This is a bit of a hack: it registers the perturbation attribute of
        # the ConditionalAutoregressiveNN as a buffer, so that it will be saved
        # when state_dict() is called. We call permutation explicitly below, so
        # this quantity needs to be saved.
        #
        # A less hacky approach would be to save each permutation as a
        # hyperparameter and then pass at build time. This is quicker though.

        perm = self.arn.permutation.clone().detach()
        del self.arn.permutation
        self.arn.register_buffer('permutation', perm)

        self.iaf_parametrization = iaf_parametrization
        self.initial_bias = 1.0
    def _test_jacobian(self, input_dim, observed_dim, hidden_dim, param_dim):
        jacobian = torch.zeros(input_dim, input_dim)
        if observed_dim > 0:
            arn = ConditionalAutoRegressiveNN(input_dim, observed_dim, [hidden_dim], param_dims=[param_dim])
        else:
            arn = AutoRegressiveNN(input_dim, [hidden_dim], param_dims=[param_dim])

        def nonzero(x):
            return torch.sign(torch.abs(x))

        x = torch.randn(1, input_dim)
        y = torch.randn(1, observed_dim)

        for output_index in range(param_dim):
            for j in range(input_dim):
                for k in range(input_dim):
                    epsilon_vector = torch.zeros(1, input_dim)
                    epsilon_vector[0, j] = self.epsilon
                    if observed_dim > 0:
                        delta = (arn(x + 0.5 * epsilon_vector, y) - arn(x - 0.5 * epsilon_vector, y)) / self.epsilon
                    else:
                        delta = (arn(x + 0.5 * epsilon_vector) - arn(x - 0.5 * epsilon_vector)) / self.epsilon
                    jacobian[j, k] = float(delta[0, output_index, k])

            permutation = arn.get_permutation()
            permuted_jacobian = jacobian.clone()
            for j in range(input_dim):
                for k in range(input_dim):
                    permuted_jacobian[j, k] = jacobian[permutation[j], permutation[k]]

            lower_sum = torch.sum(torch.tril(nonzero(permuted_jacobian), diagonal=0))

            assert lower_sum == float(0.0)
Exemple #3
0
def conditional_affine_autoregressive(input_dim,
                                      context_dim,
                                      hidden_dims=None,
                                      **kwargs):
    """
    A helper function to create an
    :class:`~pyro.distributions.transforms.ConditionalAffineAutoregressive` object
    that takes care of constructing a dense network with the correct input/output
    dimensions.

    :param input_dim: Dimension of input variable
    :type input_dim: int
    :param context_dim: Dimension of context variable
    :type context_dim: int
    :param hidden_dims: The desired hidden dimensions of the dense network. Defaults
        to using [10*input_dim]
    :type hidden_dims: list[int]
    :param log_scale_min_clip: The minimum value for clipping the log(scale) from
        the autoregressive NN
    :type log_scale_min_clip: float
    :param log_scale_max_clip: The maximum value for clipping the log(scale) from
        the autoregressive NN
    :type log_scale_max_clip: float
    :param sigmoid_bias: A term to add the logit of the input when using the stable
        tranform.
    :type sigmoid_bias: float
    :param stable: When true, uses the alternative "stable" version of the transform
        (see above).
    :type stable: bool

    """
    if hidden_dims is None:
        hidden_dims = [10 * input_dim]
    nn = ConditionalAutoRegressiveNN(input_dim, context_dim, hidden_dims)
    return ConditionalAffineAutoregressive(nn, **kwargs)
 def loadConditionalIAFFlows(self,
                             num_iafs,
                             context_dims,
                             z_dim=100,
                             iaf_dim=320):
     ar = ConditionalAutoRegressiveNN(z_dim, context_dims,
                                      [iaf_dim, iaf_dim])
     flow_fn = lambda: CondInverseAutoregressiveFlowStable(ar)
     self.use_cond_flow = True
     self.loadFlows(num_iafs, flow_fn)
Exemple #5
0
def conditional_spline_autoregressive(input_dim,
                                      context_dim,
                                      hidden_dims=None,
                                      count_bins=8,
                                      bound=3.0,
                                      order="linear"):
    r"""
    A helper function to create a
    :class:`~pyro.distributions.transforms.ConditionalSplineAutoregressive` object
    that takes care of constructing an autoregressive network with the correct
    input/output dimensions.

    :param input_dim: Dimension of input variable
    :type input_dim: int
    :param context_dim: Dimension of context variable
    :type context_dim: int
    :param hidden_dims: The desired hidden dimensions of the autoregressive network.
        Defaults to using [input_dim * 10, input_dim * 10]
    :type hidden_dims: list[int]
    :param count_bins: The number of segments comprising the spline.
    :type count_bins: int
    :param bound: The quantity :math:`K` determining the bounding box,
        :math:`[-K,K]\times[-K,K]`, of the spline.
    :type bound: float
    :param order: One of ['linear', 'quadratic'] specifying the order of the spline.
    :type order: string

    """

    if hidden_dims is None:
        hidden_dims = [input_dim * 10, input_dim * 10]

    param_dims = [count_bins, count_bins, count_bins - 1, count_bins]
    arn = ConditionalAutoRegressiveNN(input_dim,
                                      context_dim,
                                      hidden_dims,
                                      param_dims=param_dims)
    return ConditionalSplineAutoregressive(input_dim,
                                           arn,
                                           count_bins=count_bins,
                                           bound=bound,
                                           order=order)
def conditional_neural_autoregressive(input_dim,
                                      context_dim,
                                      hidden_dims=None,
                                      activation='sigmoid',
                                      width=16):
    """
    A helper function to create a
    :class:`~pyro.distributions.transforms.ConditionalNeuralAutoregressive` object
    that takes care of constructing an autoregressive network with the correct
    input/output dimensions.

    :param input_dim: Dimension of input variable
    :type input_dim: int
    :param context_dim: Dimension of context variable
    :type context_dim: int
    :param hidden_dims: The desired hidden dimensions of the autoregressive network.
        Defaults to using [3*input_dim + 1]
    :type hidden_dims: list[int]
    :param activation: Activation function to use. One of 'ELU', 'LeakyReLU',
        'sigmoid', or 'tanh'.
    :type activation: string
    :param width: The width of the "multilayer perceptron" in the transform (see
        paper). Defaults to 16
    :type width: int

    """

    if hidden_dims is None:
        hidden_dims = [3 * input_dim + 1]
    arn = ConditionalAutoRegressiveNN(input_dim,
                                      context_dim,
                                      hidden_dims,
                                      param_dims=[width] * 3)
    return ConditionalNeuralAutoregressive(arn,
                                           hidden_units=width,
                                           activation=activation)
Exemple #7
0
class MAF(torch.nn.Module):
    """Class containing a single MAF block. It can be used also as an IAF
    block.

    The forward pass is always the fast direction. For a MAF, this means that
    it actually computes the inverse transformation f^{-1}(x). The inverse pass
    is the slow pass (i.e., for the MAF, f(u)).

    There are two parametrizations that are implemented:

        standard:       x = f(u) = u * exp(alpha(x, y)) + mu(alpha(x, y))

                where alpha(x, y) and mu(x, y) are autoregressive on x, and y
                is a context variable. I.e., there can be arbitrary dependence
                on y.

        iaf:            u = f^{-1}(x) = x * sigma(x, y)
                                        + (1 - sigma(x, y)) * m(x, y)

                where sigma(x, y) = sigmoid(s(x, y))

                and where s(x, y) and m(x, y) are autoregressive on x, and y
                is a context variable.

    The forward pass, therefore, returns f^{-1}(x), and the inverse pass
    returns f(u). In addition, both passes return log|det J(f)|, where J(f) is
    the Jacobian of f(u).

    Args:

            input_dim       dim(x)
            context_dim     dim(y)
            hidden_dims     list of dimensions for hidden layers of
                            autoregressive network (MADE)
            nonlinearity    activation function for autoregressive network
            iaf_parametrization     whether to use iaf parametrization. If
                                    False, uses standard parametrization.
    """

    def __init__(self, input_dim, context_dim, hidden_dims,
                 activation=nn.ELU(), iaf_parametrization=False):
        super(MAF, self).__init__()
        self.arn = ConditionalAutoRegressiveNN(
            input_dim,
            context_dim,
            hidden_dims,
            nonlinearity=activation,
            skip_connections=False,
        )

        # This is a bit of a hack: it registers the perturbation attribute of
        # the ConditionalAutoregressiveNN as a buffer, so that it will be saved
        # when state_dict() is called. We call permutation explicitly below, so
        # this quantity needs to be saved.
        #
        # A less hacky approach would be to save each permutation as a
        # hyperparameter and then pass at build time. This is quicker though.

        perm = self.arn.permutation.clone().detach()
        del self.arn.permutation
        self.arn.register_buffer('permutation', perm)

        self.iaf_parametrization = iaf_parametrization
        self.initial_bias = 1.0

    def inverse(self, u, context):
        """This is the forward MAF."""

        x_size = u.size()[:-1]
        perm = self.arn.permutation
        input_size = u.size(-1)
        x = [torch.zeros(x_size, device=u.device)] * input_size

        # Expensive
        for idx in perm:
            if self.iaf_parametrization:
                m, s = self.arn(torch.stack(x, dim=-1), context)
                sigma = torch.sigmoid(
                    s + self.initial_bias * torch.ones_like(s))
                x[idx] = ((u[..., idx] - m[..., idx]) / sigma[..., idx] +
                          m[..., idx])
            else:
                mu, alpha = self.arn(torch.stack(x, dim=-1), context)
                x[idx] = (u[..., idx] * torch.exp(alpha[..., idx]) +
                          mu[..., idx])
        x = torch.stack(x, dim=-1)

        # log|det df/du|
        if self.iaf_parametrization:
            log_det_Jf = - (torch.log(sigma)).sum(-1)
        else:
            log_det_Jf = alpha.sum(-1)

        return x, log_det_Jf

    def forward(self, x, context):
        """This is really the inverse pass of the flow."""
        if self.iaf_parametrization:
            m, s = self.arn(x, context)
            # Initial bias of s to improve training. Initially, the IAF
            # does not effect a large change in x.
            sigma = torch.sigmoid(s + self.initial_bias * torch.ones_like(s))
            u = x * sigma + (torch.ones_like(sigma) - sigma) * m
            log_det_Jf = - (torch.log(sigma)).sum(-1)
        else:
            mu, alpha = self.arn(x, context)
            u = (x - mu) * torch.exp(-alpha)
            log_det_Jf = alpha.sum(-1)

        # Return the result of the inverse flow, along with log|det J(f)|
        return u, log_det_Jf