def testSqrtToPow(self):
   def fun(x):
     return np.sqrt(x)
   expr = make_expr(fun, 3.)
   expr = canonicalize(expr)
   self.assertIsInstance(expr, GraphExpr)
   self.assertEqual(expr.expr_node.fun, np.power)
   self.assertEqual(eval_expr(expr, {'x': 3.}), fun(3.))
  def testFindSufficientStatisticNodes(self):

    def log_joint(x, y, matrix):
      # Linear in x: y^T x
      result = np.einsum('i,i->', x, y)
      # Quadratic form: x^T matrix x
      result += np.einsum('ij,i,j->', matrix, x, x)
      # Rank-1 quadratic form: (x**2)^T(y**2)
      result += np.einsum('i,i,j,j->', x, y, x, y)
      # Linear in log(x): y^T log(x)
      result += np.einsum('i,i->', y, np.log(x))
      # Linear in reciprocal(x): y^T reciprocal(x)
      result += np.einsum('i,i->', y, np.reciprocal(x))
      # More obscurely linear in log(x): y^T matrix log(x)
      result += np.einsum('i,ij,j->', y, matrix, np.log(x))
      # Linear in x * log(x): y^T (x * log(x))
      result += np.einsum('i,i->', y, x * np.log(x))
      return result

    n_dimensions = 5
    x = np.exp(np.random.randn(n_dimensions))
    y = np.random.randn(n_dimensions)
    matrix = np.random.randn(n_dimensions, n_dimensions)
    env = {'x': x, 'y': y, 'matrix': matrix}

    expr = make_expr(log_joint, x, y, matrix)
    expr = canonicalize(expr)
    sufficient_statistic_nodes = find_sufficient_statistic_nodes(expr, 'x')
    suff_stats = [eval_expr(GraphExpr(node, expr.free_vars), env)
                  for node in sufficient_statistic_nodes]
    correct_suff_stats = [x, x.dot(matrix.dot(x)), np.square(x.dot(y)),
                          np.log(x), np.reciprocal(x), y.dot(x * np.log(x))]
    self.assertTrue(_perfect_match_values(suff_stats, correct_suff_stats))

    expr = make_expr(log_joint, x, y, matrix)
    expr = canonicalize(expr)
    sufficient_statistic_nodes = find_sufficient_statistic_nodes(
        expr, 'x', split_einsums=True)
    suff_stats = [eval_expr(GraphExpr(node, expr.free_vars), env)
                  for node in sufficient_statistic_nodes]
    correct_suff_stats = [x, np.outer(x, x), x * x,
                          np.log(x), np.reciprocal(x), x * np.log(x)]
    self.assertTrue(_match_values(suff_stats, correct_suff_stats))
  def testEinsumAddSubSimplify(self):
    # TODO(mhoffman): Think about broadcasting. We need to support `x - 2.0`.

    def test_fun(x):
      return np.einsum('i->', x + np.full(x.shape, 2.0))

    expr = make_expr(test_fun, np.ones(3))
    test_x = np.full(3, 0.5)
    correct_value = eval_expr(expr, {'x': test_x})
    expr = canonicalize(expr)
    self.assertIsInstance(expr, GraphExpr)
    self.assertEqual(expr.expr_node.fun, add_n)
    self.assertEqual(expr.expr_node.parents[0].fun.__name__, 'einsum')
    new_value = eval_expr(expr, {'x': test_x})
    self.assertEqual(correct_value, new_value)
  def testCanonicalize(self):

    def mahalanobis_distance(x, y, matrix):
      x_minus_y = x - y
      return np.einsum('i,j,ij->', x_minus_y, x_minus_y, matrix)

    x = np.array([1.3, 3.6])
    y = np.array([2.3, -1.2])
    matrix = np.arange(4).reshape([2, 2])
    expr = make_expr(mahalanobis_distance, x, y, matrix)
    self.assertFalse(is_canonical(expr))
    correct_value = eval_expr(expr, {'x': x, 'y': y, 'matrix': matrix})
    expr = canonicalize(expr)
    self.assertTrue(is_canonical(expr))
    new_value = eval_expr(expr, {'x': x, 'y': y, 'matrix': matrix})
    self.assertAlmostEqual(correct_value, new_value)
  def testLinearRegression(self):
    def log_joint(X, beta, y):
      predictions = np.einsum('ij,j->i', X, beta)
      errors = y - predictions
      log_prior = np.einsum('i,i,i->', -0.5 * np.ones_like(beta), beta, beta)
      log_likelihood = np.einsum(',k,k->', -0.5, errors, errors)
      return log_prior + log_likelihood
    n_examples = 10
    n_predictors = 2
    X = np.random.randn(n_examples, n_predictors)
    beta = np.random.randn(n_predictors)
    y = np.random.randn(n_examples)
    graph = make_expr(log_joint, X, beta, y)
    graph = canonicalize(graph)

    args = graph.free_vars.keys()
    sufficient_statistic_nodes = find_sufficient_statistic_nodes(graph, args[1])
    sufficient_statistics = [eval_node(node, graph.free_vars,
                                       {'X': X, 'beta': beta, 'y': y})
                             for node in sufficient_statistic_nodes]
    correct_sufficient_statistics = [
        -0.5 * beta.dot(beta), beta,
        -0.5 * np.einsum('ij,ik,j,k', X, X, beta, beta)
    ]
    self.assertTrue(_match_values(sufficient_statistics,
                                  correct_sufficient_statistics))

    _, natural_parameter_funs = _extract_conditional_factors(graph, 'beta')
    self.assertTrue(_match_values(natural_parameter_funs.keys(),
                                  ['x', 'einsum(...a,...b->...ab, x, x)',
                                   'einsum(...,...->..., x, x)'],
                                  lambda x, y: x == y))
    natural_parameter_vals = [f(X, beta, y) for f in
                              natural_parameter_funs.values()]
    correct_parameter_vals = [-0.5 * np.ones(n_predictors), -0.5 * X.T.dot(X),
                              y.dot(X)]
    self.assertTrue(_match_values(natural_parameter_vals,
                                  correct_parameter_vals))

    conditional_factory = complete_conditional(log_joint, 1, SupportTypes.REAL,
                                               X, beta, y)
    conditional = conditional_factory(X, y)
    true_cov = np.linalg.inv(X.T.dot(X) + np.eye(n_predictors))
    true_mean = true_cov.dot(y.dot(X))
    self.assertTrue(np.allclose(true_cov, conditional.cov))
    self.assertTrue(np.allclose(true_mean, conditional.mean))
  def _test_condition_and_marginalize_diagonal_zero_mean_normal(self,
                                                                log_joint):
    n_dimensions = 5
    x = np.random.randn(n_dimensions)
    tau = np.random.randn(n_dimensions) ** 2

    end_node = make_expr(log_joint, x, tau)
    end_node = canonicalize(end_node)

    conditional, marginalized_value = _condition_and_marginalize(
        log_joint, 0, SupportTypes.REAL, x, tau)
    correct_marginalized_value = (-0.5 * np.log(tau).sum()
                                  + 0.5 * n_dimensions * np.log(2. * np.pi))
    self.assertAlmostEqual(correct_marginalized_value, marginalized_value)

    self.assertTrue(np.allclose(np.zeros(n_dimensions), conditional.args[0]))
    self.assertTrue(np.allclose(1. / np.sqrt(tau), conditional.args[1]))
 def testLinearRegression(self):
   def squared_loss(X, beta, y):
     predictions = np.einsum('ij,j->i', X, beta)
     errors = y - predictions
     return np.einsum('k,k->', errors, errors)
   n_examples = 10
   n_predictors = 2
   X = np.random.randn(n_examples, n_predictors)
   beta = np.random.randn(n_predictors)
   y = np.random.randn(n_examples)
   expr = make_expr(squared_loss, X, beta, y)
   correct_value = eval_expr(expr, {'X': X, 'beta': beta, 'y':y})
   self.assertFalse(is_canonical(expr))
   expr = canonicalize(expr)
   self.assertTrue(is_canonical(expr))
   new_value = eval_expr(expr, {'X': X, 'beta': beta, 'y':y})
   self.assertAlmostEqual(correct_value, new_value)
 def testEinsumCompose(self):
   def Xbeta_squared(X, beta):
     Xbeta = np.einsum('ij,j->i', X, beta)
     Xbeta2 = np.einsum('lm,m->l', X, beta)
     return np.einsum('k,k->', Xbeta, Xbeta)
   n_examples = 10
   n_predictors = 2
   X = np.random.randn(n_examples, n_predictors)
   beta = np.random.randn(n_predictors)
   expr = make_expr(Xbeta_squared, X, beta)
   correct_value = eval_expr(expr, {'X': X, 'beta': beta})
   self.assertFalse(is_canonical(expr))
   expr = canonicalize(expr)
   new_value = eval_expr(expr, {'X': X, 'beta': beta})
   self.assertAlmostEqual(correct_value, new_value)
   self.assertIsInstance(expr, GraphExpr)
   self.assertEqual(expr.expr_node.fun, np.einsum)
   self.assertTrue(is_canonical(expr))
Ejemplo n.º 9
0
  def testLinearRegression(self):
    def log_joint(X, beta, y):
      predictions = np.einsum('ij,j->i', X, beta)
      errors = y - predictions
      log_prior = np.einsum('i,i,i->', -0.5 * np.ones_like(beta), beta, beta)
      log_likelihood = np.einsum(',k,k->', -0.5, errors, errors)
      return log_prior + log_likelihood
    n_examples = 10
    n_predictors = 2
    X = np.random.randn(n_examples, n_predictors)
    beta = np.random.randn(n_predictors)
    y = np.random.randn(n_examples)
    graph = make_expr(log_joint, X, beta, y)
    graph = canonicalize(graph)

    args = graph.free_vars.keys()
    sufficient_statistic_nodes = find_sufficient_statistic_nodes(graph, args[1])
    sufficient_statistics = [eval_node(node, graph.free_vars,
                                       {'X': X, 'beta': beta, 'y': y})
                             for node in sufficient_statistic_nodes]
    correct_sufficient_statistics = [
        -0.5 * beta.dot(beta), beta,
        -0.5 * np.einsum('ij,ik,j,k', X, X, beta, beta)
    ]
    self.assertTrue(_match_values(sufficient_statistics,
                                  correct_sufficient_statistics))

    new_log_joint, _, stats_funs, _ = (
        statistic_representation(log_joint, (X, beta, y),
                               (SupportTypes.REAL,), (1,)))
    beta_stat_fun = stats_funs[0]
    beta_natparam = grad_namedtuple(new_log_joint, 1)(X, beta_stat_fun(beta), y)
    correct_beta_natparam = (-0.5 * X.T.dot(X), y.dot(X),
                             -0.5 * np.ones(n_predictors))
    self.assertTrue(_match_values(beta_natparam, correct_beta_natparam))

    conditional_factory = complete_conditional(log_joint, 1, SupportTypes.REAL,
                                               X, beta, y)
    conditional = conditional_factory(X, y)
    true_cov = np.linalg.inv(X.T.dot(X) + np.eye(n_predictors))
    true_mean = true_cov.dot(y.dot(X))
    self.assertTrue(np.allclose(true_cov, conditional.cov))
    self.assertTrue(np.allclose(true_mean, conditional.mean))