def neals_funnel(ndims = 10, name = 'neals_funnel'): """Creates a funnel-shaped distribution. This distribution was first described in [1]. The distribution is constructed by transforming a N-D gaussian with scale [3, 1, ...] by scaling all but the first dimensions by `exp(x0 / 2)` where `x0` is the value of the first dimension. This distribution is notable for having a relatively very narrow "neck" region which is challenging for HMC to explore. This distribution resembles the posteriors of centrally parameterized hierarchical models. Args: ndims: Dimensionality of the distribution. Must be at least 2. name: Name to prepend to ops created in this function, as well as to the `code_name` in the returned `TargetDensity`. Returns: target: `TargetDensity` specifying the funnel distribution. The `distribution` attribute is an instance of `TransformedDistribution`. Raises: ValueError: If ndims < 2. #### References 1. Neal, R. M. (2003). Slice sampling. Annals of Statistics, 31(3), 705-767. """ if ndims < 2: raise ValueError(f'ndims must be at least 2, saw: {ndims}') with tf.name_scope(name): def bijector_fn(x): """Funnel transform.""" batch_shape = tf.shape(x)[:-1] scale = tf.concat( [ tf.ones(tf.concat([batch_shape, [1]], axis=0)), tf.exp(x[Ellipsis, :1] / 2) * tf.ones(tf.concat([batch_shape, [ndims - 1]], axis=0)), ], axis=-1, ) return tfb.Scale(scale) mg = tfd.MultivariateNormalDiag( loc=tf.zeros(ndims), scale_diag=[3.] + [1.] * (ndims - 1)) dist = tfd.TransformedDistribution( mg, bijector=tfb.MaskedAutoregressiveFlow(bijector_fn=bijector_fn)) return target_spec.TargetDensity.from_distribution( distribution=dist, constraining_bijectors=tfb.Identity(), expectations=dict( params=target_spec.expectation( fn=tf.identity, human_name='Parameters', # The trailing dimensions come from a product distribution of # independent standard normal and a log-normal with a scale of # 3 / 2. # See https://en.wikipedia.org/wiki/Product_distribution for the # formulas. # For the mean, the formulas yield zero. ground_truth_mean=np.zeros(ndims), # For the standard deviation, all means are zero and standard # deivations of the normals are 1, so the formula reduces to # `sqrt((sigma_log_normal + mean_log_normal**2))` which reduces # to `exp((sigma_log_normal)**2)`. ground_truth_standard_deviation=np.array([3.] + [np.exp((3. / 2)**2)] * (ndims - 1)), ),), code_name=f'{name}_ndims_{ndims}', human_name='Neal\'s Funnel', )
def banana(ndims=2, nonlinearity=0.03, name='banana'): """Creates a banana-shaped distribution. This distribution was first described in [1]. The distribution is constructed by transforming a N-D gaussian with scale [10, 1, ...] by shifting the second dimension by `nonlinearity * (x0 - 100)` where `x0` is the value of the first dimension. This distribution is notable for having relatively narrow tails, while being derived from a simple, volume-preserving transformation of a normal distribution. Despite this simplicity, some inference algorithms have trouble sampling from this distribution. Args: ndims: Dimensionality of the distribution. Must be at least 2. nonlinearity: Controls the strength of the nonlinearity of the distribution. name: Name to prepend to ops created in this function, as well as to the `code_name` in the returned `TargetDensity`. Returns: target: `TargetDensity` specifying the banana distribution. The `distribution` attribute is an instance of `TransformedDistribution`. Raises: ValueError: If ndims < 2. #### References 1. Haario, H., Saksman, E., & Tamminen, J. (1999). Adaptive proposal distribution for random walk Metropolis algorithm. Computational Statistics, 14(3), 375-396. """ if ndims < 2: raise ValueError(f'ndims must be at least 2, saw: {ndims}') with tf.name_scope(name): def bijector_fn(x): """Banana transform.""" batch_shape = tf.shape(x)[:-1] shift = tf.concat( [ tf.zeros(tf.concat([batch_shape, [1]], axis=0)), nonlinearity * (tf.square(x[Ellipsis, :1]) - 100), tf.zeros(tf.concat([batch_shape, [ndims - 2]], axis=0)), ], axis=-1, ) return tfb.Shift(shift) mg = tfd.MultivariateNormalDiag(loc=tf.zeros(ndims), scale_diag=[10.] + [1.] * (ndims - 1)) dist = tfd.TransformedDistribution( mg, bijector=tfb.MaskedAutoregressiveFlow(bijector_fn=bijector_fn)) return target_spec.TargetDensity.from_distribution( distribution=dist, constraining_bijectors=tfb.Identity(), expectations=dict( params=target_spec.expectation( fn=tf.identity, human_name='Parameters', # The second dimension is a sum of scaled Chi2 and normal # distribution. # Mean of Chi2 with one degree of freedom is 1, but since the # first element has variance of 100, it cancels with the shift # (hence why the shift is there). ground_truth_mean=np.zeros(ndims), # Variance of Chi2 with one degree of freedom is 2. ground_truth_standard_deviation=np.array( [10.] + [np.sqrt(1. + 2 * nonlinearity**2 * 10.**4)] + [1.] * (ndims - 2)), ), ), code_name=f'{name}_ndims_{ndims}_nonlinearity_{nonlinearity}', human_name='Banana', )
def ill_conditioned_gaussian( ndims = 100, gamma_shape_parameter = 0.5, max_eigvalue = None, seed = 10, name='ill_conditioned_gaussian'): """Creates a random ill-conditioned Gaussian. The covariance matrix has eigenvalues sampled from the inverse Gamma distribution with the specified shape, and then rotated by a random orthogonal matrix. Note that this function produces reproducible targets, i.e. the `seed` argument always needs to be non-`None`. Args: ndims: Dimensionality of the Gaussian. gamma_shape_parameter: The shape parameter of the inverse Gamma distribution. max_eigvalue: If set, will normalize the eigenvalues such that the maximum is this value. seed: Seed to use when generating the eigenvalues and the random orthogonal matrix. name: Name to prepend to ops created in this function, as well as to the `code_name` in the returned `TargetDensity`. Returns: target: `TargetDensity` specifying the requested Gaussian distribution. The `distribution` attribute is an instance of `MultivariateNormalTriL`. """ with tf.name_scope(name): rng = np.random.RandomState(seed=seed & (2**32 - 1)) eigenvalues = 1. / np.sort( rng.gamma(shape=gamma_shape_parameter, scale=1., size=ndims)) if max_eigvalue is not None: eigenvalues *= max_eigvalue / eigenvalues.max() q, r = np.linalg.qr(rng.randn(ndims, ndims)) q *= np.sign(np.diag(r)) covariance = (q * eigenvalues).dot(q.T) gaussian = tfd.MultivariateNormalTriL( loc=tf.zeros(ndims), scale_tril=tf.linalg.cholesky( tf.convert_to_tensor(covariance, dtype=tf.float32))) # TODO(siege): Expose the eigenvalues directly. return target_spec.TargetDensity.from_distribution( distribution=gaussian, constraining_bijectors=tfb.Identity(), expectations=dict( first_moment=target_spec.expectation( fn=tf.identity, human_name='First moment', ground_truth_mean=np.zeros(ndims), ground_truth_standard_deviation=np.sqrt(np.diag(covariance)), ), second_moment=target_spec.expectation( fn=tf.square, human_name='Second moment', ground_truth_mean=np.diag(covariance), # The variance of the second moment is # E[x**4] - E[x**2]**2 = 3 sigma**4 - sigma**4 = 2 sigma**4. ground_truth_standard_deviation=(np.sqrt(2) * np.diag(covariance)), )), code_name='{name}_ndims_{ndims}_gamma_shape_' '{gamma_shape}_seed_{seed}{max_eigvalue_str}'.format( name=name, ndims=ndims, gamma_shape=gamma_shape_parameter, seed=seed, max_eigvalue_str='' if max_eigvalue is None else '_max_eigvalue_{}'.format(max_eigvalue)), human_name='Ill-conditioned Gaussian', )
def logistic_regression( dataset_fn, name='logistic_regression', ): """Bayesian logistic regression with a Gaussian prior. Args: dataset_fn: A function to create a classification data set. The dataset must have binary labels. name: Name to prepend to ops created in this function, as well as to the `code_name` in the returned `TargetDensity`. Returns: target: `TargetDensity`. """ with tf.name_scope(name) as name: dataset = dataset_fn() num_train_points = dataset.train_features.shape[0] num_test_points = dataset.test_features.shape[0] have_test = num_test_points > 0 # Add bias. train_features = tf.concat( [dataset.train_features, tf.ones([num_train_points, 1])], axis=-1) train_labels = tf.convert_to_tensor(dataset.train_labels) test_features = tf.concat( [dataset.test_features, tf.ones([num_test_points, 1])], axis=-1) test_labels = tf.convert_to_tensor(dataset.test_labels) num_features = int(train_features.shape[1]) root = tfd.JointDistributionCoroutine.Root zero = tf.zeros(num_features) one = tf.ones(num_features) def model_fn(features): weights = yield root(tfd.Independent(tfd.Normal(zero, one), 1)) logits = tf.einsum('nd,...d->...n', features, weights) yield tfd.Independent(tfd.Bernoulli(logits=logits), 1) train_joint_dist = tfd.JointDistributionCoroutine( functools.partial(model_fn, features=train_features)) test_joint_dist = tfd.JointDistributionCoroutine( functools.partial(model_fn, features=test_features)) dist = joint_distribution_posterior.JointDistributionPosterior( train_joint_dist, (None, train_labels)) expectations = { 'params': target_spec.expectation( fn=lambda params: params[0], human_name='Parameters', ) } if have_test: expectations['test_nll'] = target_spec.expectation( fn=lambda params: ( # pylint: disable=g-long-lambda -test_joint_dist.sample_distributions(value=params)[0][-1]. log_prob(test_labels)), human_name='Test NLL', ) expectations['per_example_test_nll'] = target_spec.expectation( fn=lambda params: ( # pylint: disable=g-long-lambda -test_joint_dist.sample_distributions(value=params)[0][-1]. distribution.log_prob(test_labels)), human_name='Per-example Test NLL', ) return target_spec.TargetDensity.from_distribution( distribution=dist, constraining_bijectors=(tfb.Identity(), ), expectations=expectations, code_name='{}_{}'.format(dataset.code_name, name), human_name='{} Logistic Regression'.format(dataset.human_name), )
def item_response_theory( dataset_fn, name='item_response_theory', ): """One-parameter logistic item-response theory (IRT) model. Args: dataset_fn: A function to create an IRT data set. name: Name to prepend to ops created in this function, as well as to the `code_name` in the returned `TargetDensity`. Returns: target: `TargetDensity`. """ with tf.name_scope(name) as name: dataset = dataset_fn() have_test = dataset.test_student_ids.shape[0] > 0 num_students = dataset.train_student_ids.max() num_questions = dataset.train_question_ids.max() if have_test: num_students = max(num_students, dataset.test_student_ids.max()) num_questions = max(num_questions, dataset.test_question_ids.max()) # TODO(siege): Make it an option to use a sparse encoding, the choice # clearly depends on the dataset sparsity. def make_dense_encoding(student_ids, question_ids, correct): dense_y = np.zeros([num_students, num_questions], np.float32) y_mask = np.zeros_like(dense_y) dense_y[student_ids - 1, question_ids - 1] = (correct) y_mask[student_ids - 1, question_ids - 1] = 1. return dense_y, y_mask train_dense_y, train_y_mask = make_dense_encoding( dataset.train_student_ids, dataset.train_question_ids, dataset.train_correct, ) test_dense_y, test_y_mask = make_dense_encoding( dataset.test_student_ids, dataset.test_question_ids, dataset.test_correct, ) root = tfd.JointDistributionCoroutine.Root def model_fn(dense_y, y_mask): """Model definition.""" mean_student_ability = yield root(tfd.Normal(0.75, 1.)) student_ability = yield root( tfd.Independent(tfd.Normal(0., tf.ones([dense_y.shape[0]])), 1)) question_difficulty = yield root( tfd.Independent(tfd.Normal(0., tf.ones([dense_y.shape[1]])), 1)) logits = (mean_student_ability[Ellipsis, tf.newaxis, tf.newaxis] + student_ability[Ellipsis, tf.newaxis] - question_difficulty[Ellipsis, tf.newaxis, :]) masked_logits = logits * y_mask - 1e10 * (1 - y_mask) yield tfd.Independent(tfd.Bernoulli(masked_logits), 2) train_joint_dist = tfd.JointDistributionCoroutine( functools.partial(model_fn, train_dense_y, train_y_mask)) test_joint_dist = tfd.JointDistributionCoroutine( functools.partial(model_fn, test_dense_y, test_y_mask)) dist = joint_distribution_posterior.JointDistributionPosterior( train_joint_dist, (None, None, None, train_dense_y)) expectations = { 'params': target_spec.expectation( fn=lambda params: tf.concat( # pylint: disable=g-long-lambda (params[0][Ellipsis, tf.newaxis], ) + params[1:], axis=-1), human_name='Parameters', ) } if have_test: expectations['test_nll'] = target_spec.expectation( fn=lambda params: ( # pylint: disable=g-long-lambda -test_joint_dist.sample_distributions(value=params)[0][-1]. log_prob(test_dense_y)), human_name='Test NLL', ) def per_example_test_nll(params): """Computes per-example test NLL.""" test_y_idx = np.stack([ dataset.test_student_ids - 1, dataset.test_question_ids - 1 ], axis=-1) dense_nll = (-test_joint_dist.sample_distributions( value=params)[0][-1].distribution.log_prob(test_dense_y)) vectorized_dense_nll = tf.reshape( dense_nll, [-1, num_students, num_questions]) # TODO(siege): Avoid using vmap here. log_prob_y = tf.vectorized_map( lambda nll: tf.gather_nd(nll, test_y_idx), vectorized_dense_nll) return tf.reshape( log_prob_y, list(params[0].shape) + [test_y_idx.shape[0]]) expectations['per_example_test_nll'] = target_spec.expectation( fn=per_example_test_nll, human_name='Per-example Test NLL', ) return target_spec.TargetDensity.from_distribution( distribution=dist, constraining_bijectors=(tfb.Identity(), tfb.Identity(), tfb.Identity()), expectations=expectations, code_name='{}_{}'.format(dataset.code_name, name), human_name='{} 1PL Item-Response Theory'.format( dataset.human_name), )