Exemple #1
0
def neals_funnel(ndims = 10,
                 name = 'neals_funnel'):
  """Creates a funnel-shaped distribution.

  This distribution was first described in [1]. The distribution is constructed
  by transforming a N-D gaussian with scale [3, 1, ...] by scaling all but the
  first dimensions by `exp(x0 / 2)` where  `x0` is the value of the first
  dimension.

  This distribution is notable for having a relatively very narrow "neck" region
  which is challenging for HMC to explore. This distribution resembles the
  posteriors of centrally parameterized hierarchical models.

  Args:
    ndims: Dimensionality of the distribution. Must be at least 2.
    name: Name to prepend to ops created in this function, as well as to the
      `code_name` in the returned `TargetDensity`.

  Returns:
    target: `TargetDensity` specifying the funnel distribution. The
      `distribution` attribute is an instance of `TransformedDistribution`.

  Raises:
    ValueError: If ndims < 2.

  #### References

  1. Neal, R. M. (2003). Slice sampling. Annals of Statistics, 31(3), 705-767.
  """
  if ndims < 2:
    raise ValueError(f'ndims must be at least 2, saw: {ndims}')

  with tf.name_scope(name):

    def bijector_fn(x):
      """Funnel transform."""
      batch_shape = tf.shape(x)[:-1]
      scale = tf.concat(
          [
              tf.ones(tf.concat([batch_shape, [1]], axis=0)),
              tf.exp(x[Ellipsis, :1] / 2) *
              tf.ones(tf.concat([batch_shape, [ndims - 1]], axis=0)),
          ],
          axis=-1,
      )
      return tfb.Scale(scale)

    mg = tfd.MultivariateNormalDiag(
        loc=tf.zeros(ndims), scale_diag=[3.] + [1.] * (ndims - 1))
    dist = tfd.TransformedDistribution(
        mg, bijector=tfb.MaskedAutoregressiveFlow(bijector_fn=bijector_fn))

    return target_spec.TargetDensity.from_distribution(
        distribution=dist,
        constraining_bijectors=tfb.Identity(),
        expectations=dict(
            params=target_spec.expectation(
                fn=tf.identity,
                human_name='Parameters',
                # The trailing dimensions come from a product distribution of
                # independent standard normal and a log-normal with a scale of
                # 3 / 2.
                # See https://en.wikipedia.org/wiki/Product_distribution for the
                # formulas.
                # For the mean, the formulas yield zero.
                ground_truth_mean=np.zeros(ndims),
                # For the standard deviation, all means are zero and standard
                # deivations of the normals are 1, so the formula reduces to
                # `sqrt((sigma_log_normal + mean_log_normal**2))` which reduces
                # to `exp((sigma_log_normal)**2)`.
                ground_truth_standard_deviation=np.array([3.] +
                                                         [np.exp((3. / 2)**2)] *
                                                         (ndims - 1)),
            ),),
        code_name=f'{name}_ndims_{ndims}',
        human_name='Neal\'s Funnel',
    )
Exemple #2
0
def banana(ndims=2, nonlinearity=0.03, name='banana'):
    """Creates a banana-shaped distribution.

  This distribution was first described in [1]. The distribution is constructed
  by transforming a N-D gaussian with scale [10, 1, ...] by shifting the second
  dimension by `nonlinearity * (x0 - 100)` where `x0` is the value of the first
  dimension.

  This distribution is notable for having relatively narrow tails, while being
  derived from a simple, volume-preserving transformation of a normal
  distribution. Despite this simplicity, some inference algorithms have trouble
  sampling from this distribution.

  Args:
    ndims: Dimensionality of the distribution. Must be at least 2.
    nonlinearity: Controls the strength of the nonlinearity of the distribution.
    name: Name to prepend to ops created in this function, as well as to the
      `code_name` in the returned `TargetDensity`.

  Returns:
    target: `TargetDensity` specifying the banana distribution. The
      `distribution` attribute is an instance of `TransformedDistribution`.

  Raises:
    ValueError: If ndims < 2.

  #### References

  1. Haario, H., Saksman, E., & Tamminen, J. (1999). Adaptive proposal
      distribution for random walk Metropolis algorithm. Computational
      Statistics, 14(3), 375-396.
  """
    if ndims < 2:
        raise ValueError(f'ndims must be at least 2, saw: {ndims}')

    with tf.name_scope(name):

        def bijector_fn(x):
            """Banana transform."""
            batch_shape = tf.shape(x)[:-1]
            shift = tf.concat(
                [
                    tf.zeros(tf.concat([batch_shape, [1]], axis=0)),
                    nonlinearity * (tf.square(x[Ellipsis, :1]) - 100),
                    tf.zeros(tf.concat([batch_shape, [ndims - 2]], axis=0)),
                ],
                axis=-1,
            )
            return tfb.Shift(shift)

        mg = tfd.MultivariateNormalDiag(loc=tf.zeros(ndims),
                                        scale_diag=[10.] + [1.] * (ndims - 1))
        dist = tfd.TransformedDistribution(
            mg, bijector=tfb.MaskedAutoregressiveFlow(bijector_fn=bijector_fn))

        return target_spec.TargetDensity.from_distribution(
            distribution=dist,
            constraining_bijectors=tfb.Identity(),
            expectations=dict(
                params=target_spec.expectation(
                    fn=tf.identity,
                    human_name='Parameters',
                    # The second dimension is a sum of scaled Chi2 and normal
                    # distribution.
                    # Mean of Chi2 with one degree of freedom is 1, but since the
                    # first element has variance of 100, it cancels with the shift
                    # (hence why the shift is there).
                    ground_truth_mean=np.zeros(ndims),
                    # Variance of Chi2 with one degree of freedom is 2.
                    ground_truth_standard_deviation=np.array(
                        [10.] + [np.sqrt(1. + 2 * nonlinearity**2 * 10.**4)] +
                        [1.] * (ndims - 2)),
                ), ),
            code_name=f'{name}_ndims_{ndims}_nonlinearity_{nonlinearity}',
            human_name='Banana',
        )
def ill_conditioned_gaussian(
    ndims = 100,
    gamma_shape_parameter = 0.5,
    max_eigvalue = None,
    seed = 10,
    name='ill_conditioned_gaussian'):
  """Creates a random ill-conditioned Gaussian.

  The covariance matrix has eigenvalues sampled from the inverse Gamma
  distribution with the specified shape, and then rotated by a random orthogonal
  matrix.

  Note that this function produces reproducible targets, i.e. the `seed`
  argument always needs to be non-`None`.

  Args:
    ndims: Dimensionality of the Gaussian.
    gamma_shape_parameter: The shape parameter of the inverse Gamma
      distribution.
    max_eigvalue: If set, will normalize the eigenvalues such that the maximum
      is this value.
    seed: Seed to use when generating the eigenvalues and the random orthogonal
      matrix.
    name: Name to prepend to ops created in this function, as well as to the
      `code_name` in the returned `TargetDensity`.

  Returns:
    target: `TargetDensity` specifying the requested Gaussian distribution. The
      `distribution` attribute is an instance of `MultivariateNormalTriL`.
  """
  with tf.name_scope(name):
    rng = np.random.RandomState(seed=seed & (2**32 - 1))
    eigenvalues = 1. / np.sort(
        rng.gamma(shape=gamma_shape_parameter, scale=1., size=ndims))
    if max_eigvalue is not None:
      eigenvalues *= max_eigvalue / eigenvalues.max()

    q, r = np.linalg.qr(rng.randn(ndims, ndims))
    q *= np.sign(np.diag(r))

    covariance = (q * eigenvalues).dot(q.T)

    gaussian = tfd.MultivariateNormalTriL(
        loc=tf.zeros(ndims),
        scale_tril=tf.linalg.cholesky(
            tf.convert_to_tensor(covariance, dtype=tf.float32)))

    # TODO(siege): Expose the eigenvalues directly.
    return target_spec.TargetDensity.from_distribution(
        distribution=gaussian,
        constraining_bijectors=tfb.Identity(),
        expectations=dict(
            first_moment=target_spec.expectation(
                fn=tf.identity,
                human_name='First moment',
                ground_truth_mean=np.zeros(ndims),
                ground_truth_standard_deviation=np.sqrt(np.diag(covariance)),
            ),
            second_moment=target_spec.expectation(
                fn=tf.square,
                human_name='Second moment',
                ground_truth_mean=np.diag(covariance),
                # The variance of the second moment is
                # E[x**4] - E[x**2]**2 = 3 sigma**4 - sigma**4 = 2 sigma**4.
                ground_truth_standard_deviation=(np.sqrt(2) *
                                                 np.diag(covariance)),
            )),
        code_name='{name}_ndims_{ndims}_gamma_shape_'
        '{gamma_shape}_seed_{seed}{max_eigvalue_str}'.format(
            name=name,
            ndims=ndims,
            gamma_shape=gamma_shape_parameter,
            seed=seed,
            max_eigvalue_str='' if max_eigvalue is None else
            '_max_eigvalue_{}'.format(max_eigvalue)),
        human_name='Ill-conditioned Gaussian',
    )
Exemple #4
0
def logistic_regression(
    dataset_fn,
    name='logistic_regression',
):
    """Bayesian logistic regression with a Gaussian prior.

  Args:
    dataset_fn: A function to create a classification data set. The dataset must
      have binary labels.
    name: Name to prepend to ops created in this function, as well as to the
      `code_name` in the returned `TargetDensity`.

  Returns:
    target: `TargetDensity`.
  """
    with tf.name_scope(name) as name:
        dataset = dataset_fn()

        num_train_points = dataset.train_features.shape[0]
        num_test_points = dataset.test_features.shape[0]
        have_test = num_test_points > 0

        # Add bias.
        train_features = tf.concat(
            [dataset.train_features,
             tf.ones([num_train_points, 1])], axis=-1)
        train_labels = tf.convert_to_tensor(dataset.train_labels)
        test_features = tf.concat(
            [dataset.test_features,
             tf.ones([num_test_points, 1])], axis=-1)
        test_labels = tf.convert_to_tensor(dataset.test_labels)
        num_features = int(train_features.shape[1])

        root = tfd.JointDistributionCoroutine.Root
        zero = tf.zeros(num_features)
        one = tf.ones(num_features)

        def model_fn(features):
            weights = yield root(tfd.Independent(tfd.Normal(zero, one), 1))
            logits = tf.einsum('nd,...d->...n', features, weights)
            yield tfd.Independent(tfd.Bernoulli(logits=logits), 1)

        train_joint_dist = tfd.JointDistributionCoroutine(
            functools.partial(model_fn, features=train_features))
        test_joint_dist = tfd.JointDistributionCoroutine(
            functools.partial(model_fn, features=test_features))
        dist = joint_distribution_posterior.JointDistributionPosterior(
            train_joint_dist, (None, train_labels))

        expectations = {
            'params':
            target_spec.expectation(
                fn=lambda params: params[0],
                human_name='Parameters',
            )
        }
        if have_test:
            expectations['test_nll'] = target_spec.expectation(
                fn=lambda params: (  # pylint: disable=g-long-lambda
                    -test_joint_dist.sample_distributions(value=params)[0][-1].
                    log_prob(test_labels)),
                human_name='Test NLL',
            )
            expectations['per_example_test_nll'] = target_spec.expectation(
                fn=lambda params: (  # pylint: disable=g-long-lambda
                    -test_joint_dist.sample_distributions(value=params)[0][-1].
                    distribution.log_prob(test_labels)),
                human_name='Per-example Test NLL',
            )

        return target_spec.TargetDensity.from_distribution(
            distribution=dist,
            constraining_bijectors=(tfb.Identity(), ),
            expectations=expectations,
            code_name='{}_{}'.format(dataset.code_name, name),
            human_name='{} Logistic Regression'.format(dataset.human_name),
        )
def item_response_theory(
    dataset_fn,
    name='item_response_theory',
):
    """One-parameter logistic item-response theory (IRT) model.

  Args:
    dataset_fn: A function to create an IRT data set.
    name: Name to prepend to ops created in this function, as well as to the
      `code_name` in the returned `TargetDensity`.

  Returns:
    target: `TargetDensity`.
  """
    with tf.name_scope(name) as name:
        dataset = dataset_fn()
        have_test = dataset.test_student_ids.shape[0] > 0

        num_students = dataset.train_student_ids.max()
        num_questions = dataset.train_question_ids.max()
        if have_test:
            num_students = max(num_students, dataset.test_student_ids.max())
            num_questions = max(num_questions, dataset.test_question_ids.max())

        # TODO(siege): Make it an option to use a sparse encoding, the choice
        # clearly depends on the dataset sparsity.
        def make_dense_encoding(student_ids, question_ids, correct):
            dense_y = np.zeros([num_students, num_questions], np.float32)
            y_mask = np.zeros_like(dense_y)
            dense_y[student_ids - 1, question_ids - 1] = (correct)
            y_mask[student_ids - 1, question_ids - 1] = 1.
            return dense_y, y_mask

        train_dense_y, train_y_mask = make_dense_encoding(
            dataset.train_student_ids,
            dataset.train_question_ids,
            dataset.train_correct,
        )
        test_dense_y, test_y_mask = make_dense_encoding(
            dataset.test_student_ids,
            dataset.test_question_ids,
            dataset.test_correct,
        )

        root = tfd.JointDistributionCoroutine.Root

        def model_fn(dense_y, y_mask):
            """Model definition."""
            mean_student_ability = yield root(tfd.Normal(0.75, 1.))
            student_ability = yield root(
                tfd.Independent(tfd.Normal(0., tf.ones([dense_y.shape[0]])),
                                1))
            question_difficulty = yield root(
                tfd.Independent(tfd.Normal(0., tf.ones([dense_y.shape[1]])),
                                1))
            logits = (mean_student_ability[Ellipsis, tf.newaxis, tf.newaxis] +
                      student_ability[Ellipsis, tf.newaxis] -
                      question_difficulty[Ellipsis, tf.newaxis, :])
            masked_logits = logits * y_mask - 1e10 * (1 - y_mask)
            yield tfd.Independent(tfd.Bernoulli(masked_logits), 2)

        train_joint_dist = tfd.JointDistributionCoroutine(
            functools.partial(model_fn, train_dense_y, train_y_mask))
        test_joint_dist = tfd.JointDistributionCoroutine(
            functools.partial(model_fn, test_dense_y, test_y_mask))
        dist = joint_distribution_posterior.JointDistributionPosterior(
            train_joint_dist, (None, None, None, train_dense_y))

        expectations = {
            'params':
            target_spec.expectation(
                fn=lambda params: tf.concat(  # pylint: disable=g-long-lambda
                    (params[0][Ellipsis, tf.newaxis], ) + params[1:],
                    axis=-1),
                human_name='Parameters',
            )
        }
        if have_test:
            expectations['test_nll'] = target_spec.expectation(
                fn=lambda params: (  # pylint: disable=g-long-lambda
                    -test_joint_dist.sample_distributions(value=params)[0][-1].
                    log_prob(test_dense_y)),
                human_name='Test NLL',
            )

            def per_example_test_nll(params):
                """Computes per-example test NLL."""
                test_y_idx = np.stack([
                    dataset.test_student_ids - 1, dataset.test_question_ids - 1
                ],
                                      axis=-1)

                dense_nll = (-test_joint_dist.sample_distributions(
                    value=params)[0][-1].distribution.log_prob(test_dense_y))
                vectorized_dense_nll = tf.reshape(
                    dense_nll, [-1, num_students, num_questions])
                # TODO(siege): Avoid using vmap here.
                log_prob_y = tf.vectorized_map(
                    lambda nll: tf.gather_nd(nll, test_y_idx),
                    vectorized_dense_nll)
                return tf.reshape(
                    log_prob_y,
                    list(params[0].shape) + [test_y_idx.shape[0]])

            expectations['per_example_test_nll'] = target_spec.expectation(
                fn=per_example_test_nll,
                human_name='Per-example Test NLL',
            )

        return target_spec.TargetDensity.from_distribution(
            distribution=dist,
            constraining_bijectors=(tfb.Identity(), tfb.Identity(),
                                    tfb.Identity()),
            expectations=expectations,
            code_name='{}_{}'.format(dataset.code_name, name),
            human_name='{} 1PL Item-Response Theory'.format(
                dataset.human_name),
        )