def test_Dimshuffle_lift_restrictions(): rng = shared(np.random.RandomState(1233532), borrow=False) x = normal(tt.arange(2).reshape((2, )), 100, size=(2, 2, 2), rng=rng) y = x.dimshuffle(1, 0, 2) # The non-`Dimshuffle` client depends on the RNG state, so we can't # perform the lift z = x - y fg = FunctionGraph([rng], [z], clone=False) _ = EquilibriumOptimizer([local_dimshuffle_rv_lift], max_use_ratio=100).apply(fg) dimshuffle_node = fg.outputs[0].owner.inputs[1].owner assert dimshuffle_node == y.owner assert isinstance(dimshuffle_node.op, DimShuffle) assert dimshuffle_node.inputs[0].owner.op == normal # The non-`Dimshuffle` client doesn't depend on the RNG state, so we can # perform the lift z = tt.ones(x.shape) - y fg = FunctionGraph([rng], [z], clone=False) EquilibriumOptimizer([local_dimshuffle_rv_lift], max_use_ratio=100).apply(fg) rv_node = fg.outputs[0].owner.inputs[1].owner assert rv_node.op == normal assert isinstance(rv_node.inputs[-1].owner.op, DimShuffle) assert isinstance(rv_node.inputs[-2].owner.op, DimShuffle)
def test_Subtensor_lift_restrictions(): rng = shared(np.random.RandomState(1233532), borrow=False) std = tt.vector("std") std.tag.test_value = np.array([1e-5, 2e-5, 3e-5], dtype=config.floatX) x = normal(tt.arange(2), tt.ones(2), rng=rng) y = x[1] # The non-`Subtensor` client depends on the RNG state, so we can't perform # the lift z = x - y fg = FunctionGraph([rng], [z], clone=False) _ = EquilibriumOptimizer([local_subtensor_rv_lift], max_use_ratio=100).apply(fg) subtensor_node = fg.outputs[0].owner.inputs[1].owner.inputs[0].owner assert subtensor_node == y.owner assert isinstance(subtensor_node.op, Subtensor) assert subtensor_node.inputs[0].owner.op == normal # The non-`Subtensor` client doesn't depend on the RNG state, so we can # perform the lift z = tt.ones(x.shape) - x[1] fg = FunctionGraph([rng], [z], clone=False) EquilibriumOptimizer([local_subtensor_rv_lift], max_use_ratio=100).apply(fg) rv_node = fg.outputs[0].owner.inputs[1].owner.inputs[0].owner assert rv_node.op == normal assert isinstance(rv_node.inputs[-1].owner.op, Subtensor) assert isinstance(rv_node.inputs[-2].owner.op, Subtensor)
def test_1(self): x, y, z = map(MyVariable, 'xyz') e = op3(op4(x, y)) g = FunctionGraph([x, y, z], [e]) # print g opt = EquilibriumOptimizer( [PatternSub((op1, 'x', 'y'), (op2, 'x', 'y')), PatternSub((op4, 'x', 'y'), (op1, 'x', 'y')), PatternSub((op3, (op2, 'x', 'y')), (op4, 'x', 'y')) ], max_use_ratio=10) opt.optimize(g) # print g assert str(g) == '[Op2(x, y)]'
def test_1(self): x, y, z = map(MyVariable, 'xyz') e = op3(op4(x, y)) g = FunctionGraph([x, y, z], [e]) # print g opt = EquilibriumOptimizer([ PatternSub((op1, 'x', 'y'), (op2, 'x', 'y')), PatternSub((op4, 'x', 'y'), (op1, 'x', 'y')), PatternSub((op3, (op2, 'x', 'y')), (op4, 'x', 'y')) ], max_use_ratio=10) opt.optimize(g) # print g assert str(g) == '[Op2(x, y)]'
def test_1(self): x, y, z = map(MyVariable, "xyz") e = op3(op4(x, y)) g = FunctionGraph([x, y, z], [e]) # print g opt = EquilibriumOptimizer( [ PatternSub((op1, "x", "y"), (op2, "x", "y")), PatternSub((op4, "x", "y"), (op1, "x", "y")), PatternSub((op3, (op2, "x", "y")), (op4, "x", "y")), ], max_use_ratio=10, ) opt.optimize(g) # print g assert str(g) == "FunctionGraph(Op2(x, y))"
def test_DimShuffle_lift(ds_order, lifted, dist_op, dist_params, size, rtol): rng = shared(np.random.RandomState(1233532), borrow=False) dist_params_tt = [] for p in dist_params: p_tt = tt.as_tensor(p).type() p_tt.tag.test_value = p dist_params_tt.append(p_tt) size_tt = [] for s in size: s_tt = tt.iscalar() s_tt.tag.test_value = s size_tt.append(s_tt) dist_st = dist_op(*dist_params_tt, size=size_tt, rng=rng).dimshuffle(ds_order) f_inputs = [ p for p in dist_params_tt + size_tt if not isinstance(p, (slice, Constant)) ] mode = Mode( "py", EquilibriumOptimizer([local_dimshuffle_rv_lift], max_use_ratio=100)) f_opt = function( f_inputs, dist_st, mode=mode, ) (new_out, ) = f_opt.maker.fgraph.outputs if lifted: assert new_out.owner.op == dist_op assert all( isinstance(i.owner.op, DimShuffle) for i in new_out.owner.inputs[3:] if i.owner) else: assert isinstance(new_out.owner.op, DimShuffle) return f_base = function( f_inputs, dist_st, mode=no_mode, ) arg_values = [p.get_test_value() for p in f_inputs] res_base = f_base(*arg_values) res_opt = f_opt(*arg_values) np.testing.assert_allclose(res_base, res_opt, rtol=rtol)
def test_low_use_ratio(self): x, y, z = map(MyVariable, 'xyz') e = op3(op4(x, y)) g = FunctionGraph([x, y, z], [e]) # print 'before', g # display pesky warnings along with stdout # also silence logger for 'theano.gof.opt' _logger = logging.getLogger('theano.gof.opt') oldlevel = _logger.level _logger.setLevel(logging.CRITICAL) try: opt = EquilibriumOptimizer( [PatternSub((op1, 'x', 'y'), (op2, 'x', 'y')), PatternSub((op4, 'x', 'y'), (op1, 'x', 'y')), PatternSub((op3, (op2, 'x', 'y')), (op4, 'x', 'y')) ], max_use_ratio=1. / len(g.apply_nodes)) # each opt can only be applied once opt.optimize(g) finally: _logger.setLevel(oldlevel) # print 'after', g assert str(g) == '[Op1(x, y)]'
def test_low_use_ratio(self): x, y, z = map(MyVariable, "xyz") e = op3(op4(x, y)) g = FunctionGraph([x, y, z], [e]) # print 'before', g # display pesky warnings along with stdout # also silence logger for 'theano.gof.opt' _logger = logging.getLogger("theano.gof.opt") oldlevel = _logger.level _logger.setLevel(logging.CRITICAL) try: opt = EquilibriumOptimizer( [ PatternSub((op1, "x", "y"), (op2, "x", "y")), PatternSub((op4, "x", "y"), (op1, "x", "y")), PatternSub((op3, (op2, "x", "y")), (op4, "x", "y")), ], max_use_ratio=1.0 / len(g.apply_nodes), ) # each opt can only be applied once opt.optimize(g) finally: _logger.setLevel(oldlevel) # print 'after', g assert str(g) == "FunctionGraph(Op1(x, y))"
def test_kanren_opt(): """Make sure we can run miniKanren "optimizations" over a graph until a fixed-point/normal-form is reached. """ tt.config.cxx = "" tt.config.compute_test_value = "ignore" x_tt = tt.vector("x") c_tt = tt.vector("c") d_tt = tt.vector("c") A_tt = tt.matrix("A") B_tt = tt.matrix("B") Z_tt = A_tt.dot(x_tt + B_tt.dot(c_tt + d_tt)) fgraph = FunctionGraph(tt_inputs([Z_tt]), [Z_tt], clone=True) assert isinstance(fgraph.outputs[0].owner.op, tt.Dot) def distributes(in_lv, out_lv): return lall( # lhs == A * (x + b) eq(etuple(mt.dot, var("A"), etuple(mt.add, var("x"), var("b"))), etuplize(in_lv)), # rhs == A * x + A * b eq( etuple(mt.add, etuple(mt.dot, var("A"), var("x")), etuple(mt.dot, var("A"), var("b"))), out_lv, ), ) distribute_opt = EquilibriumOptimizer([KanrenRelationSub(distributes)], max_use_ratio=10) fgraph_opt = optimize_graph(fgraph, distribute_opt, return_graph=False) assert fgraph_opt.owner.op == tt.add assert isinstance(fgraph_opt.owner.inputs[0].owner.op, tt.Dot) # TODO: Something wrong with `etuple` caching? # assert fgraph_opt.owner.inputs[0].owner.inputs[0] == A_tt assert fgraph_opt.owner.inputs[0].owner.inputs[0].name == "A" assert fgraph_opt.owner.inputs[1].owner.op == tt.add assert isinstance(fgraph_opt.owner.inputs[1].owner.inputs[0].owner.op, tt.Dot) assert isinstance(fgraph_opt.owner.inputs[1].owner.inputs[1].owner.op, tt.Dot)
def test_mvnormal_mvnormal(): a_tt = tt.vector('a') R_tt = tt.matrix('R') F_t_tt = tt.matrix('F') V_tt = tt.matrix('V') a_tt.tag.test_value = np.r_[1., 0.] R_tt.tag.test_value = np.diag([10., 10.]) F_t_tt.tag.test_value = np.c_[-2., 1.] V_tt.tag.test_value = np.diag([0.5]) beta_rv = MvNormalRV(a_tt, R_tt, name='\\beta') E_y_rv = F_t_tt.dot(beta_rv) Y_rv = MvNormalRV(E_y_rv, V_tt, name='Y') y_tt = tt.as_tensor_variable(np.r_[-3.]) y_tt.name = 'y' Y_obs = observed(y_tt, Y_rv) fgraph = FunctionGraph(tt_inputs([beta_rv, Y_obs]), [beta_rv, Y_obs], clone=True) posterior_opt = EquilibriumOptimizer( [KanrenRelationSub(conjugate_posteriors)], max_use_ratio=10) fgraph_opt = optimize_graph(fgraph, posterior_opt, return_graph=False) # Make sure that it removed the old, integrated observation distribution. assert fgraph_opt[1].owner.inputs[1].equals(tt.NoneConst) # Check that the SSE has decreased from prior to posterior. # TODO: Use a better test. beta_prior_mean_val = a_tt.tag.test_value F_val = F_t_tt.tag.test_value beta_post_mean_val = fgraph_opt[0].owner.inputs[0].tag.test_value priorp_err = np.square(y_tt.data - F_val.dot(beta_prior_mean_val)).sum() postp_err = np.square(y_tt.data - F_val.dot(beta_post_mean_val)).sum() # First, make sure the prior and posterior means are simply not equal. np.testing.assert_raises(AssertionError, np.testing.assert_array_equal, priorp_err, postp_err) # Now, make sure there's a decrease (relative to the observed point). np.testing.assert_array_less(postp_err, priorp_err)
def test_Subtensor_lift(indices, lifted, dist_op, dist_params, size): rng = shared(np.random.RandomState(1233532), borrow=False) dist_params_tt = [] for p in dist_params: p_tt = tt.as_tensor(p).type() p_tt.tag.test_value = p dist_params_tt.append(p_tt) size_tt = [] for s in size: s_tt = tt.iscalar() s_tt.tag.test_value = s size_tt.append(s_tt) from theano.tensor.subtensor import as_index_constant indices_tt = () for i in indices: i_tt = as_index_constant(i) if not isinstance(i_tt, slice): i_tt.tag.test_value = i indices_tt += (i_tt, ) dist_st = dist_op(*dist_params_tt, size=size_tt, rng=rng)[indices_tt] f_inputs = [ p for p in dist_params_tt + size_tt + list(indices_tt) if not isinstance(p, (slice, Constant)) ] mode = Mode( "py", EquilibriumOptimizer([local_subtensor_rv_lift], max_use_ratio=100)) f_opt = function( f_inputs, dist_st, mode=mode, ) (new_out, ) = f_opt.maker.fgraph.outputs if lifted: assert isinstance(new_out.owner.op, RandomVariable) assert all( isinstance(i.owner.op, (AdvancedSubtensor, AdvancedSubtensor1, Subtensor)) for i in new_out.owner.inputs[3:] if i.owner) else: assert isinstance(new_out.owner.op, (AdvancedSubtensor, AdvancedSubtensor1, Subtensor)) return f_base = function( f_inputs, dist_st, mode=no_mode, ) arg_values = [p.get_test_value() for p in f_inputs] res_base = f_base(*arg_values) res_opt = f_opt(*arg_values) np.testing.assert_allclose(res_base, res_opt, rtol=1e-3)
def logp(*output_vars): """Compute the log-likelihood for a graph. Parameters ---------- *output_vars: Tuple[TensorVariable] The output of a graph containing `RandomVariable`s. Results ------- Dict[TensorVariable, TensorVariable] A map from `RandomVariable`s to their log-likelihood graphs. """ # model_inputs = [i for i in tt_inputs(output_vars) if not isinstance(i, tt.Constant)] model_inputs = tt_inputs(output_vars) model_fgraph = FunctionGraph( model_inputs, output_vars, clone=True, # XXX: `ShapeFeature` introduces cached constants # features=[tt.opt.ShapeFeature()] ) canonicalize_opt = optdb.query(Query(include=["canonicalize"])) push_out_opt = EquilibriumOptimizer([push_out_rvs_from_scan], max_use_ratio=10) optimizations = SeqOptimizer(canonicalize_opt.copy()) optimizations.append(push_out_opt) opt_fgraph = optimize_graph(model_fgraph, optimizations, in_place=True) replacements = {} rv_to_logp_io = {} for node in opt_fgraph.toposort(): # TODO: This `RandomVariable` "parsing" should be generalized and used # in more places (e.g. what if the outer-outputs are `Subtensor`s) if isinstance(node.op, RandomVariable): var = node.default_output() # shape = list(node.fgraph.shape_feature.shape_tuple(new_var)) shape = None new_input_var = var.clone() if new_input_var.name: new_input_var.name = new_input_var.name.lower() replacements[var] = new_input_var rv_to_logp_io[var] = (new_input_var, _logp_fn(node.op, var.owner, shape)(new_input_var)) if isinstance(node.op, tt.Subtensor) and node.inputs[0].owner: # The output of `theano.scan` is sometimes a sliced tensor (in # order to get rid of initial values introduced by in the `Scan`) node = node.inputs[0].owner if isinstance(node.op, Scan): scan_args = ScanArgs.from_node(node) rv_outer_outs = get_random_outer_outputs(scan_args) for var_idx, var, io_var in rv_outer_outs: scan_args, new_oi_var = convert_outer_out_to_in( scan_args, var, inner_out_fn=create_inner_out_logp, output_scan_args=scan_args) replacements[var] = new_oi_var logp_scan_out = construct_scan(scan_args) for var_idx, var, io_var in rv_outer_outs: rv_to_logp_io[var] = (replacements[var], logp_scan_out[var_idx]) # We need to use the new log-likelihood input variables that were generated # for each `RandomVariable` node. They need to replace the corresponding # original variables within each log-likelihood graph. rv_vars, inputs_logp_outputs = zip(*rv_to_logp_io.items()) new_inputs, logp_outputs = zip(*inputs_logp_outputs) rev_memo = {v: k for k, v in model_fgraph.memo.items()} # Replace the new cloned variables with the original ones, but only if # they're not any of `RandomVariable` terms we've converted to # log-likelihoods. replacements.update({ k: v for k, v in rev_memo.items() if isinstance(k, tt.Variable) and v not in new_inputs and k not in replacements }) new_logp_outputs = tt_clone(logp_outputs, replace=replacements) rv_to_logp_io = { rev_memo[k]: v for k, v in zip(rv_vars, zip(new_inputs, new_logp_outputs)) } return rv_to_logp_io