def unscaled_sample(self, sampled_vars, sample_inputs, rng_key=None): params = OrderedDict(self.params) value = params.pop("value") assert all(isinstance(v, (Number, Tensor)) for v in params.values()) assert isinstance(value, Variable) and value.name in sampled_vars inputs_, tensors = align_tensors(*params.values()) inputs = OrderedDict(sample_inputs.items()) inputs.update(inputs_) sample_shape = tuple(v.size for v in sample_inputs.values()) raw_dist = self.dist_class(**dict(zip(self._ast_fields[:-1], tensors))) sample_args = (sample_shape, ) if rng_key is None else (rng_key, sample_shape) if getattr(raw_dist, "has_rsample", False): raw_sample = raw_dist.rsample(*sample_args) else: raw_sample = ops.detach(raw_dist.sample(*sample_args)) result = funsor.delta.Delta( value.name, Tensor(raw_sample, inputs, value.output.dtype)) if not getattr(raw_dist, "has_rsample", False): # scaling of dice_factor by num samples should already be handled by Funsor.sample raw_log_prob = raw_dist.log_prob(raw_sample) dice_factor = Tensor(raw_log_prob - ops.detach(raw_log_prob), inputs) result = result + dice_factor return result
def eager_delta(v, log_density, value): # This handles event_dim specially, and hence cannot use the # generic Delta.eager_log_prob() method. assert v.output == value.output event_dim = len(v.output.shape) inputs, (v, log_density, value) = align_tensors(v, log_density, value) data = dist.Delta(v, log_density, event_dim).log_prob(value) return Tensor(data, inputs)
def eager_multinomial(total_count, probs, value): # Multinomial.log_prob() supports inhomogeneous total_count only by # avoiding passing total_count to the constructor. inputs, (total_count, probs, value) = align_tensors(total_count, probs, value) shape = broadcast_shape(total_count.shape + (1,), probs.shape, value.shape) probs = Tensor(probs.expand(shape), inputs) value = Tensor(value.expand(shape), inputs) total_count = Number(total_count.max().item()) # Used by distributions validation code. return Multinomial.eager_log_prob(total_count, probs, value)
def diff_fn(p_data): p = Tensor(p_data, be_inputs) q = p.sample(sampled_vars, sample_inputs, rng_key=rng_key) mq = p.materialize(q).reduce(ops.logaddexp, 'n') mq = mq.align(tuple(p.inputs)) _, (p_data, mq_data) = align_tensors(p, mq) assert p_data.shape == mq_data.shape return (ops.exp(mq_data) * probe).sum() - (ops.exp(p_data) * probe).sum(), mq
def eager_delta_tensor(v, log_density, value): # This handles event_dim specially, and hence cannot use the # generic Delta.eager_log_prob() method. assert v.output == value.output event_dim = len(v.output.shape) inputs, (v, log_density, value) = align_tensors(v, log_density, value) backend_dist = import_module( BACKEND_TO_DISTRIBUTIONS_BACKEND[get_backend()]) data = backend_dist.Delta.dist_class(v, log_density, event_dim).log_prob( value) # noqa: F821 return Tensor(data, inputs)
def _get_stat_diff(funsor_dist_class, sample_inputs, inputs, num_samples, statistic, with_lazy, params): params = [Tensor(p, inputs) for p in params] if isinstance(with_lazy, bool): with interpretation(lazy if with_lazy else eager): funsor_dist = funsor_dist_class(*params) else: funsor_dist = funsor_dist_class(*params) rng_key = None if get_backend() == "torch" else np.array([0, 0], dtype=np.uint32) sample_value = funsor_dist.sample(frozenset(['value']), sample_inputs, rng_key=rng_key) expected_inputs = OrderedDict( tuple(sample_inputs.items()) + tuple(inputs.items()) + (('value', funsor_dist.inputs['value']), )) check_funsor(sample_value, expected_inputs, reals()) if sample_inputs: actual_mean = Integrate(sample_value, Variable('value', funsor_dist.inputs['value']), frozenset(['value' ])).reduce(ops.add, frozenset(sample_inputs)) inputs, tensors = align_tensors( *list(funsor_dist.params.values())[:-1]) raw_dist = funsor_dist.dist_class( **dict(zip(funsor_dist._ast_fields[:-1], tensors))) expected_mean = Tensor(raw_dist.mean, inputs) if statistic == "mean": actual_stat, expected_stat = actual_mean, expected_mean elif statistic == "variance": actual_stat = Integrate( sample_value, (Variable('value', funsor_dist.inputs['value']) - actual_mean)**2, frozenset(['value'])).reduce(ops.add, frozenset(sample_inputs)) expected_stat = Tensor(raw_dist.variance, inputs) elif statistic == "entropy": actual_stat = -Integrate(sample_value, funsor_dist, frozenset(['value'])).reduce( ops.add, frozenset(sample_inputs)) expected_stat = Tensor(raw_dist.entropy(), inputs) else: raise ValueError("invalid test statistic") diff = actual_stat.reduce(ops.add).data - expected_stat.reduce( ops.add).data return diff.sum(), diff
def eager_affine_normal(matrix, loc, scale, value_x, value_y): assert len(matrix.output.shape) == 2 assert value_x.output == reals(matrix.output.shape[0]) assert value_y.output == reals(matrix.output.shape[1]) loc += value_x @ matrix int_inputs, (loc, scale) = align_tensors(loc, scale, expand=True) i_name = gensym("i") y_name = gensym("y") y_i_name = gensym("y_i") int_inputs[i_name] = bint(value_y.output.shape[0]) loc = Tensor(loc, int_inputs) scale = Tensor(scale, int_inputs) y_dist = Independent(Normal(loc, scale, y_i_name), y_name, i_name, y_i_name) return y_dist(**{y_name: value_y})
def test_lognormal_distribution(moment): num_samples = 100000 inputs = OrderedDict(batch=Bint[10]) loc = random_tensor(inputs) scale = random_tensor(inputs).exp() log_measure = dist.LogNormal(loc, scale)(value='x') probe = Variable('x', Real)**moment with interpretation(MonteCarlo(particle=Bint[num_samples])): with xfail_if_not_implemented(): actual = Integrate(log_measure, probe, frozenset(['x'])) _, (loc_data, scale_data) = align_tensors(loc, scale) samples = backend_dist.LogNormal(loc_data, scale_data).sample( (num_samples, )) expected = (samples**moment).mean(0) assert_close(actual.data, expected, atol=1e-2, rtol=1e-2)
def eager_multinomial(total_count, probs, value): # Multinomial.log_prob() supports inhomogeneous total_count only by # avoiding passing total_count to the constructor. inputs, (total_count, probs, value) = align_tensors(total_count, probs, value) shape = broadcast_shape(total_count.shape + (1, ), probs.shape, value.shape) probs = Tensor(ops.expand(probs, shape), inputs) value = Tensor(ops.expand(value, shape), inputs) if get_backend() == "torch": total_count = Number( ops.amax(total_count, None).item()) # Used by distributions validation code. else: total_count = Tensor(ops.expand(total_count, shape[:-1]), inputs) backend_dist = import_module( BACKEND_TO_DISTRIBUTIONS_BACKEND[get_backend()]) return backend_dist.Multinomial.eager_log_prob(total_count, probs, value) # noqa: F821
def test_binary_funsor_funsor(symbol, dims1, dims2): sizes = {'a': 3, 'b': 4, 'c': 5} shape1 = tuple(sizes[d] for d in dims1) shape2 = tuple(sizes[d] for d in dims2) inputs1 = OrderedDict((d, bint(sizes[d])) for d in dims1) inputs2 = OrderedDict((d, bint(sizes[d])) for d in dims2) data1 = rand(shape1) + 0.5 data2 = rand(shape2) + 0.5 dtype = 'real' if symbol in BOOLEAN_OPS: dtype = 2 data1 = ops.astype(data1, 'uint8') data2 = ops.astype(data2, 'uint8') x1 = Tensor(data1, inputs1, dtype) x2 = Tensor(data2, inputs2, dtype) inputs, aligned = align_tensors(x1, x2) expected_data = binary_eval(symbol, aligned[0], aligned[1]) actual = binary_eval(symbol, x1, x2) check_funsor(actual, inputs, Domain((), dtype), expected_data)
def test_batched_einsum(equation, batch1, batch2): inputs, output = equation.split('->') inputs = inputs.split(',') sizes = dict(a=2, b=3, c=4, i=5, j=6) batch1 = OrderedDict([(k, bint(sizes[k])) for k in batch1]) batch2 = OrderedDict([(k, bint(sizes[k])) for k in batch2]) funsors = [ random_tensor(batch, reals(*(sizes[d] for d in dims))) for batch, dims in zip([batch1, batch2], inputs) ] actual = Einsum(equation, tuple(funsors)) _equation = ','.join('...' + i for i in inputs) + '->...' + output inputs, tensors = align_tensors(*funsors) batch = tuple(v.size for v in inputs.values()) tensors = [ ops.expand(x, batch + f.shape) for (x, f) in zip(tensors, funsors) ] expected = Tensor(ops.einsum(_equation, *tensors), inputs) assert_close(actual, expected, atol=1e-5, rtol=None)
def eager_affine_normal(matrix, loc, scale, value_x, value_y): assert len(matrix.output.shape) == 2 assert value_x.output == reals(matrix.output.shape[0]) assert value_y.output == reals(matrix.output.shape[1]) tensors = (matrix, loc, scale, value_y) int_inputs, tensors = align_tensors(*tensors) matrix, loc, scale, value_y = tensors assert value_y.size(-1) == loc.size(-1) prec_sqrt = matrix / scale.unsqueeze(-2) precision = prec_sqrt.matmul(prec_sqrt.transpose(-1, -2)) delta = (value_y - loc) / scale info_vec = prec_sqrt.matmul(delta.unsqueeze(-1)).squeeze(-1) log_normalizer = (-0.5 * loc.size(-1) * math.log(2 * math.pi) - 0.5 * delta.pow(2).sum(-1) - scale.log().sum(-1)) precision = precision.expand(info_vec.shape + (-1,)) log_normalizer = log_normalizer.expand(info_vec.shape[:-1]) inputs = int_inputs.copy() x_name = gensym("x") inputs[x_name] = value_x.output x_dist = Tensor(log_normalizer, int_inputs) + Gaussian(info_vec, precision, inputs) return x_dist(**{x_name: value_x})
def test_tensor_distribution(event_inputs, batch_inputs, test_grad): num_samples = 50000 sample_inputs = OrderedDict(n=bint(num_samples)) be_inputs = OrderedDict(batch_inputs + event_inputs) batch_inputs = OrderedDict(batch_inputs) event_inputs = OrderedDict(event_inputs) sampled_vars = frozenset(event_inputs) p = random_tensor(be_inputs) p.data.requires_grad_(test_grad) q = p.sample(sampled_vars, sample_inputs) mq = p.materialize(q).reduce(ops.logaddexp, 'n') mq = mq.align(tuple(p.inputs)) assert_close(mq, p, atol=0.1, rtol=None) if test_grad: _, (p_data, mq_data) = align_tensors(p, mq) assert p_data.shape == mq_data.shape probe = torch.randn(p_data.shape) expected = grad((p_data.exp() * probe).sum(), [p.data])[0] actual = grad((mq_data.exp() * probe).sum(), [p.data])[0] assert_close(actual, expected, atol=0.1, rtol=None)
def _eager_subs_affine(self, subs, remaining_subs): # Extract an affine representation. affine = OrderedDict() for k, v in subs: const, coeffs = extract_affine(v) if (isinstance(const, Tensor) and all( isinstance(coeff, Tensor) for coeff, _ in coeffs.values())): affine[k] = const, coeffs else: remaining_subs += (k, v), if not affine: return reflect(Subs, self, remaining_subs) # Align integer dimensions. old_int_inputs = OrderedDict( (k, v) for k, v in self.inputs.items() if v.dtype != 'real') tensors = [ Tensor(self.info_vec, old_int_inputs), Tensor(self.precision, old_int_inputs) ] for const, coeffs in affine.values(): tensors.append(const) tensors.extend(coeff for coeff, _ in coeffs.values()) new_int_inputs, tensors = align_tensors(*tensors, expand=True) tensors = (Tensor(x, new_int_inputs) for x in tensors) old_info_vec = next(tensors).data old_precision = next(tensors).data for old_k, (const, coeffs) in affine.items(): const = next(tensors) for new_k, (coeff, eqn) in coeffs.items(): coeff = next(tensors) coeffs[new_k] = coeff, eqn affine[old_k] = const, coeffs batch_shape = old_info_vec.shape[:-1] # Align real dimensions. old_real_inputs = OrderedDict( (k, v) for k, v in self.inputs.items() if v.dtype == 'real') new_real_inputs = old_real_inputs.copy() for old_k, (const, coeffs) in affine.items(): del new_real_inputs[old_k] for new_k, (coeff, eqn) in coeffs.items(): new_shape = coeff.shape[:len(eqn.split('->')[0].split(',')[1])] new_real_inputs[new_k] = Reals[new_shape] old_offsets, old_dim = _compute_offsets(old_real_inputs) new_offsets, new_dim = _compute_offsets(new_real_inputs) new_inputs = new_int_inputs.copy() new_inputs.update(new_real_inputs) # Construct a blockwise affine representation of the substitution. subs_vector = BlockVector(batch_shape + (old_dim, )) subs_matrix = BlockMatrix(batch_shape + (new_dim, old_dim)) for old_k, old_offset in old_offsets.items(): old_size = old_real_inputs[old_k].num_elements old_slice = slice(old_offset, old_offset + old_size) if old_k in new_real_inputs: new_offset = new_offsets[old_k] new_slice = slice(new_offset, new_offset + old_size) subs_matrix[..., new_slice, old_slice] = \ ops.new_eye(self.info_vec, batch_shape + (old_size,)) continue const, coeffs = affine[old_k] old_shape = old_real_inputs[old_k].shape assert const.data.shape == batch_shape + old_shape subs_vector[..., old_slice] = const.data.reshape(batch_shape + (old_size, )) for new_k, new_offset in new_offsets.items(): if new_k in coeffs: coeff, eqn = coeffs[new_k] new_size = new_real_inputs[new_k].num_elements new_slice = slice(new_offset, new_offset + new_size) assert coeff.shape == new_real_inputs[ new_k].shape + old_shape subs_matrix[..., new_slice, old_slice] = \ coeff.data.reshape(batch_shape + (new_size, old_size)) subs_vector = subs_vector.as_tensor() subs_matrix = subs_matrix.as_tensor() subs_matrix_t = ops.transpose(subs_matrix, -1, -2) # Construct the new funsor. Suppose the old Gaussian funsor g has density # g(x) = < x | i - 1/2 P x> # Now define a new funsor f by substituting x = A y + B: # f(y) = g(A y + B) # = < A y + B | i - 1/2 P (A y + B) > # = < y | At (i - P B) - 1/2 At P A y > + < B | i - 1/2 P B > # =: < y | i' - 1/2 P' y > + C # where P' = At P A and i' = At (i - P B) parametrize a new Gaussian # and C = < B | i - 1/2 P B > parametrize a new Tensor. precision = subs_matrix @ old_precision @ subs_matrix_t info_vec = _mv(subs_matrix, old_info_vec - _mv(old_precision, subs_vector)) const = _vv(subs_vector, old_info_vec - 0.5 * _mv(old_precision, subs_vector)) result = Gaussian(info_vec, precision, new_inputs) + Tensor( const, new_int_inputs) return Subs(result, remaining_subs) if remaining_subs else result
def _eager_subs_real(self, subs, remaining_subs): # Broadcast all component tensors. subs = OrderedDict(subs) int_inputs = OrderedDict( (k, d) for k, d in self.inputs.items() if d.dtype != 'real') tensors = [ Tensor(self.info_vec, int_inputs), Tensor(self.precision, int_inputs) ] tensors.extend(subs.values()) int_inputs, tensors = align_tensors(*tensors) batch_dim = len(tensors[0].shape) - 1 batch_shape = broadcast_shape(*(x.shape[:batch_dim] for x in tensors)) (info_vec, precision), values = tensors[:2], tensors[2:] offsets, event_size = _compute_offsets(self.inputs) slices = [(k, slice(offset, offset + self.inputs[k].num_elements)) for k, offset in offsets.items()] # Expand all substituted values. values = OrderedDict(zip(subs, values)) for k, value in values.items(): value = value.reshape(value.shape[:batch_dim] + (-1, )) if not get_tracing_state(): assert value.shape[-1] == self.inputs[k].num_elements values[k] = ops.expand(value, batch_shape + value.shape[-1:]) # Try to perform a complete substitution of all real variables, resulting in a Tensor. if all(k in subs for k, d in self.inputs.items() if d.dtype == 'real'): # Form the concatenated value. value = BlockVector(batch_shape + (event_size, )) for k, i in slices: if k in values: value[..., i] = values[k] value = value.as_tensor() # Evaluate the non-normalized log density. result = _vv(value, info_vec - 0.5 * _mv(precision, value)) result = Tensor(result, int_inputs) assert result.output == Real return Subs(result, remaining_subs) if remaining_subs else result # Perform a partial substution of a subset of real variables, resulting in a Joint. # We split real inputs into two sets: a for the preserved and b for the substituted. b = frozenset(k for k, v in subs.items()) a = frozenset(k for k, d in self.inputs.items() if d.dtype == 'real' and k not in b) prec_aa = ops.cat( -2, *[ ops.cat( -1, *[precision[..., i1, i2] for k2, i2 in slices if k2 in a]) for k1, i1 in slices if k1 in a ]) prec_ab = ops.cat( -2, *[ ops.cat( -1, *[precision[..., i1, i2] for k2, i2 in slices if k2 in b]) for k1, i1 in slices if k1 in a ]) prec_bb = ops.cat( -2, *[ ops.cat( -1, *[precision[..., i1, i2] for k2, i2 in slices if k2 in b]) for k1, i1 in slices if k1 in b ]) info_a = ops.cat(-1, *[info_vec[..., i] for k, i in slices if k in a]) info_b = ops.cat(-1, *[info_vec[..., i] for k, i in slices if k in b]) value_b = ops.cat(-1, *[values[k] for k, i in slices if k in b]) info_vec = info_a - _mv(prec_ab, value_b) log_scale = _vv(value_b, info_b - 0.5 * _mv(prec_bb, value_b)) precision = ops.expand(prec_aa, info_vec.shape + info_vec.shape[-1:]) inputs = int_inputs.copy() for k, d in self.inputs.items(): if k not in subs: inputs[k] = d result = Gaussian(info_vec, precision, inputs) + Tensor( log_scale, int_inputs) return Subs(result, remaining_subs) if remaining_subs else result
def eager_log_prob(cls, *params): inputs, tensors = align_tensors(*params) params = dict(zip(cls._ast_fields, tensors)) value = params.pop('value') data = cls.dist_class(**params).log_prob(value) return Tensor(data, inputs)
def _check_sample(funsor_dist, sample_inputs, inputs, atol=1e-2, rtol=None, num_samples=100000, statistic="mean", skip_grad=False): """utility that compares a Monte Carlo estimate of a distribution mean with the true mean""" samples_per_dim = int(num_samples**(1. / max(1, len(sample_inputs)))) sample_inputs = OrderedDict( (k, bint(samples_per_dim)) for k in sample_inputs) for tensor in list(funsor_dist.params.values())[:-1]: tensor.data.requires_grad_() sample_value = funsor_dist.sample(frozenset(['value']), sample_inputs) expected_inputs = OrderedDict( tuple(sample_inputs.items()) + tuple(inputs.items()) + (('value', funsor_dist.inputs['value']), )) check_funsor(sample_value, expected_inputs, reals()) if sample_inputs: actual_mean = Integrate(sample_value, Variable('value', funsor_dist.inputs['value']), frozenset(['value' ])).reduce(ops.add, frozenset(sample_inputs)) inputs, tensors = align_tensors( *list(funsor_dist.params.values())[:-1]) raw_dist = funsor_dist.dist_class( **dict(zip(funsor_dist._ast_fields[:-1], tensors))) expected_mean = Tensor(raw_dist.mean, inputs) check_funsor(actual_mean, expected_mean.inputs, expected_mean.output) assert_close(actual_mean, expected_mean, atol=atol, rtol=rtol) if sample_inputs and not skip_grad: if statistic == "mean": actual_stat, expected_stat = actual_mean, expected_mean elif statistic == "variance": actual_stat = Integrate( sample_value, (Variable('value', funsor_dist.inputs['value']) - actual_mean)**2, frozenset(['value'])).reduce(ops.add, frozenset(sample_inputs)) expected_stat = Tensor(raw_dist.variance, inputs) elif statistic == "entropy": actual_stat = -Integrate(sample_value, funsor_dist, frozenset(['value'])).reduce( ops.add, frozenset(sample_inputs)) expected_stat = Tensor(raw_dist.entropy(), inputs) else: raise ValueError("invalid test statistic") grad_targets = [v.data for v in list(funsor_dist.params.values())[:-1]] actual_grads = torch.autograd.grad(actual_stat.reduce( ops.add).sum().data, grad_targets, allow_unused=True) expected_grads = torch.autograd.grad(expected_stat.reduce( ops.add).sum().data, grad_targets, allow_unused=True) assert_close(actual_stat, expected_stat, atol=atol, rtol=rtol) for actual_grad, expected_grad in zip(actual_grads, expected_grads): if expected_grad is not None: assert_close(actual_grad, expected_grad, atol=atol, rtol=rtol) else: assert_close(actual_grad, torch.zeros_like(actual_grad), atol=atol, rtol=rtol)