def rec_conv_to_rv(v, replacements, model, rand_state=None): """Recursively convert a PyMC3 random variable to a Theano graph.""" if v in replacements: return walk(v, replacements) elif v.name and pm.util.is_transformed_name(v.name): untrans_name = pm.util.get_untransformed_name(v.name) v_untrans = getattr(model, untrans_name) rv_new = rec_conv_to_rv(v_untrans, replacements, model, rand_state=rand_state) replacements[v] = rv_new return rv_new elif hasattr(v, "distribution"): rv = pymc3_var_to_rv(v, rand_state=rand_state) rv_ins = [] for i in tt_inputs([rv]): i_rv = rec_conv_to_rv(i, replacements, model, rand_state=rand_state) if i_rv is not None: replacements[i] = i_rv rv_ins.append(i_rv) else: rv_ins.append(i) _ = replace_input_nodes(rv_ins, [rv], memo=replacements, clone_inputs=False) rv_new = walk(rv, replacements) replacements[v] = rv_new return rv_new else: return None
def test_mvnormalrv_ShapeFeature(): M_tt = tt.iscalar("M") M_tt.tag.test_value = 2 d_rv = MvNormalRV(tt.ones((M_tt, )), tt.eye(M_tt), size=2) fg = FunctionGraph( [i for i in tt_inputs([d_rv]) if not isinstance(i, tt.Constant)], [d_rv], clone=True, features=[tt.opt.ShapeFeature()], ) s1, s2 = fg.shape_feature.shape_of[fg.memo[d_rv]] assert s1.eval() == 2 assert fg.memo[M_tt] in tt_inputs([s2])
def test_mvnormal_ShapeFeature(): M_tt = tt.iscalar("M") M_tt.tag.test_value = 2 d_rv = multivariate_normal(tt.ones((M_tt, )), tt.eye(M_tt), size=2) fg = FunctionGraph( [i for i in tt_inputs([d_rv]) if not isinstance(i, tt.Constant)], [d_rv], clone=False, features=[tt.opt.ShapeFeature()], ) s1, s2 = fg.shape_feature.shape_of[d_rv] assert get_test_value(s1) == 2 assert M_tt in tt_inputs([s2]) # Test broadcasted shapes mean = tt.tensor(config.floatX, [True, False]) mean.tag.test_value = np.array([[0, 1, 2]], dtype=config.floatX) test_covar = np.diag(np.array([1, 10, 100], dtype=config.floatX)) test_covar = np.stack([test_covar, test_covar * 10.0]) cov = tt.as_tensor(test_covar).type() cov.tag.test_value = test_covar d_rv = multivariate_normal(mean, cov, size=[2, 3]) fg = FunctionGraph( [i for i in tt_inputs([d_rv]) if not isinstance(i, tt.Constant)], [d_rv], clone=False, features=[tt.opt.ShapeFeature()], ) s1, s2, s3, s4 = fg.shape_feature.shape_of[d_rv] assert s1.get_test_value() == 2 assert s2.get_test_value() == 3 assert s3.get_test_value() == 2 assert s4.get_test_value() == 3
def test_dirichlet_ShapeFeature(): """Make sure `RandomVariable.infer_shape` works with `ShapeFeature`.""" M_tt = tt.iscalar("M") M_tt.tag.test_value = 2 N_tt = tt.iscalar("N") N_tt.tag.test_value = 3 d_rv = dirichlet(tt.ones((M_tt, N_tt)), name="Gamma") fg = FunctionGraph( [i for i in tt_inputs([d_rv]) if not isinstance(i, tt.Constant)], [d_rv], clone=False, features=[tt.opt.ShapeFeature()], ) s1, s2 = fg.shape_feature.shape_of[d_rv] assert M_tt in tt_inputs([s1]) assert N_tt in tt_inputs([s2])
def test_convert_rv_to_dist_shape(): # Make sure we use the `ShapeFeature` to get the shape info X_rv = NormalRV(np.r_[1, 2], 2.0, name="X_rv") fgraph = FunctionGraph(tt_inputs([X_rv]), [X_rv], features=[tt.opt.ShapeFeature()]) with pm.Model(): res = convert_rv_to_dist(fgraph.outputs[0].owner, None) assert isinstance(res.distribution, pm.Normal) assert np.array_equal(res.distribution.shape, np.r_[2])
def model_graph(pymc_model, output_vars=None, rand_state=None, attach_memo=True): """Convert a PyMC3 model into a Theano `FunctionGraph`. Parameters ---------- pymc_model: `Model` A PyMC3 model object. output_vars: list (optional) Variables to use as `FunctionGraph` outputs. If not specified, the model's observed random variables are used. rand_state: Numpy rng (optional) When converting to `RandomVariable`s, use this random state object. attach_memo: boolean (optional) Add a property to the returned `FunctionGraph` name `memo` that contains the mappings between PyMC and `RandomVariable` terms. Results ------- out: `FunctionGraph` """ model = pm.modelcontext(pymc_model) replacements = {} if output_vars is None: output_vars = list(model.observed_RVs) if rand_state is None: rand_state = theano.shared(np.random.RandomState()) replacements = {} # First pass... for i, o in enumerate(output_vars): _ = rec_conv_to_rv(o, replacements, model, rand_state=rand_state) output_vars[i] = walk(o, replacements) output_vars = [walk(o, replacements) for o in output_vars] fg_features = [tt.opt.ShapeFeature()] model_fg = FunctionGraph( [i for i in tt_inputs(output_vars) if not isinstance(i, tt.Constant)], output_vars, clone=True, memo=replacements, features=fg_features, ) if attach_memo: model_fg.memo = replacements return model_fg
def graph_model(graph, *model_args, generate_names=False, **model_kwargs): """Create a PyMC3 model from a Theano graph with `RandomVariable` nodes.""" model = pm.Model(*model_args, **model_kwargs) fgraph = graph if not isinstance(fgraph, FunctionGraph): fgraph = FunctionGraph(tt.gof.graph.inputs([fgraph]), [fgraph]) nodes = [n for n in fgraph.toposort() if isinstance(n.op, RandomVariable)] rv_replacements = {} node_id = 0 for node in nodes: obs = get_rv_observation(node) if obs is not None: obs = obs.inputs[0] obs = tt_get_values(obs) old_rv_var = node.default_output() rv_var = theano.scan_module.scan_utils.clone(old_rv_var, replace=rv_replacements) node = rv_var.owner # Make sure there are only PyMC3 vars in the result. assert not any( isinstance(op.op, RandomVariable) for op in theano.gof.graph.ops(tt_inputs([rv_var]), [rv_var]) if op != node) if generate_names and rv_var.name is None: node_name = "{}_{}".format(node.op.name, node_id) # warn("Name {} generated for node {}.".format(node, node_name)) node_id += 1 rv_var.name = node_name with model: rv = convert_rv_to_dist(node, obs) rv_replacements[old_rv_var] = rv model.rv_replacements = rv_replacements return model
def test_kanren_opt(): """Make sure we can run miniKanren "optimizations" over a graph until a fixed-point/normal-form is reached. """ tt.config.cxx = "" tt.config.compute_test_value = "ignore" x_tt = tt.vector("x") c_tt = tt.vector("c") d_tt = tt.vector("c") A_tt = tt.matrix("A") B_tt = tt.matrix("B") Z_tt = A_tt.dot(x_tt + B_tt.dot(c_tt + d_tt)) fgraph = FunctionGraph(tt_inputs([Z_tt]), [Z_tt], clone=True) assert isinstance(fgraph.outputs[0].owner.op, tt.Dot) def distributes(in_lv, out_lv): return lall( # lhs == A * (x + b) eq(etuple(mt.dot, var("A"), etuple(mt.add, var("x"), var("b"))), etuplize(in_lv)), # rhs == A * x + A * b eq( etuple(mt.add, etuple(mt.dot, var("A"), var("x")), etuple(mt.dot, var("A"), var("b"))), out_lv, ), ) distribute_opt = EquilibriumOptimizer([KanrenRelationSub(distributes)], max_use_ratio=10) fgraph_opt = optimize_graph(fgraph, distribute_opt, return_graph=False) assert fgraph_opt.owner.op == tt.add assert isinstance(fgraph_opt.owner.inputs[0].owner.op, tt.Dot) # TODO: Something wrong with `etuple` caching? # assert fgraph_opt.owner.inputs[0].owner.inputs[0] == A_tt assert fgraph_opt.owner.inputs[0].owner.inputs[0].name == "A" assert fgraph_opt.owner.inputs[1].owner.op == tt.add assert isinstance(fgraph_opt.owner.inputs[1].owner.inputs[0].owner.op, tt.Dot) assert isinstance(fgraph_opt.owner.inputs[1].owner.inputs[1].owner.op, tt.Dot)
def test_mvnormal_mvnormal(): a_tt = tt.vector('a') R_tt = tt.matrix('R') F_t_tt = tt.matrix('F') V_tt = tt.matrix('V') a_tt.tag.test_value = np.r_[1., 0.] R_tt.tag.test_value = np.diag([10., 10.]) F_t_tt.tag.test_value = np.c_[-2., 1.] V_tt.tag.test_value = np.diag([0.5]) beta_rv = MvNormalRV(a_tt, R_tt, name='\\beta') E_y_rv = F_t_tt.dot(beta_rv) Y_rv = MvNormalRV(E_y_rv, V_tt, name='Y') y_tt = tt.as_tensor_variable(np.r_[-3.]) y_tt.name = 'y' Y_obs = observed(y_tt, Y_rv) fgraph = FunctionGraph(tt_inputs([beta_rv, Y_obs]), [beta_rv, Y_obs], clone=True) posterior_opt = EquilibriumOptimizer( [KanrenRelationSub(conjugate_posteriors)], max_use_ratio=10) fgraph_opt = optimize_graph(fgraph, posterior_opt, return_graph=False) # Make sure that it removed the old, integrated observation distribution. assert fgraph_opt[1].owner.inputs[1].equals(tt.NoneConst) # Check that the SSE has decreased from prior to posterior. # TODO: Use a better test. beta_prior_mean_val = a_tt.tag.test_value F_val = F_t_tt.tag.test_value beta_post_mean_val = fgraph_opt[0].owner.inputs[0].tag.test_value priorp_err = np.square(y_tt.data - F_val.dot(beta_prior_mean_val)).sum() postp_err = np.square(y_tt.data - F_val.dot(beta_post_mean_val)).sum() # First, make sure the prior and posterior means are simply not equal. np.testing.assert_raises(AssertionError, np.testing.assert_array_equal, priorp_err, postp_err) # Now, make sure there's a decrease (relative to the observed point). np.testing.assert_array_less(postp_err, priorp_err)
def test_normal_ShapeFeature(): M_tt = tt.iscalar("M") M_tt.tag.test_value = 3 sd_tt = tt.scalar("sd") sd_tt.tag.test_value = np.array(1.0, dtype=config.floatX) d_rv = normal(tt.ones((M_tt, )), sd_tt, size=(2, M_tt)) d_rv.tag.test_value fg = FunctionGraph( [i for i in tt_inputs([d_rv]) if not isinstance(i, tt.Constant)], [d_rv], clone=False, features=[tt.opt.ShapeFeature()], ) s1, s2 = fg.shape_feature.shape_of[d_rv] assert get_test_value(s1) == get_test_value(d_rv).shape[0] assert get_test_value(s2) == get_test_value(d_rv).shape[1]
def graph_model(fgraph, *model_args, **model_kwargs): """Create a PyMC3 model from a Theano graph with `RandomVariable` nodes. """ model = pm.Model(*model_args, **model_kwargs) nodes = [n for n in fgraph.toposort() if isinstance(n.op, RandomVariable)] rv_replacements = {} for node in nodes: obs = get_rv_observation(node) if obs is not None: obs = obs.inputs[0] if isinstance(obs, tt.Constant): obs = obs.data elif isinstance(obs, theano.compile.sharedvalue.SharedVariable): obs = obs.get_value() else: raise TypeError(f'Unhandled observation type: {type(obs)}') old_rv_var = node.default_output() rv_var = theano.scan_module.scan_utils.clone(old_rv_var, replace=rv_replacements) node = rv_var.owner # Make sure there are only PyMC3 vars in the result. assert not any( isinstance(op.op, RandomVariable) for op in theano.gof.graph.ops(tt_inputs([rv_var]), [rv_var]) if op != node) with model: rv = convert_rv_to_dist(node, obs) rv_replacements[old_rv_var] = rv model.rv_replacements = rv_replacements return model
def test_Normal_ShapeFeature(): M_tt = tt.iscalar("M") M_tt.tag.test_value = 3 sd_tt = tt.scalar("sd") sd_tt.tag.test_value = 1.0 d_rv = NormalRV(tt.ones((M_tt, )), sd_tt, size=(2, M_tt)) d_rv.tag.test_value fg = FunctionGraph( [i for i in tt_inputs([d_rv]) if not isinstance(i, tt.Constant)], [d_rv], clone=True, features=[tt.opt.ShapeFeature()], ) s1, s2 = fg.shape_feature.shape_of[fg.memo[d_rv]] assert get_test_value(s1) == get_test_value(d_rv).shape[0] assert get_test_value(s2) == get_test_value(d_rv).shape[1]
def optimize_graph(x, optimization, return_graph=None, in_place=False): """Easily optimize Theano graphs. Apply an optimization to either the graph formed by a Theano variable or an existing graph and return the resulting optimized graph. When given an existing `FunctionGraph`, the optimization is performed without side-effects (i.e. won't change the given graph). """ if not isinstance(x, tt_FunctionGraph): inputs = tt_inputs([x]) outputs = [x] model_memo = clone_get_equiv(inputs, outputs, copy_orphans=False) cloned_inputs = [ model_memo[i] for i in inputs if not isinstance(i, tt.Constant) ] cloned_outputs = [model_memo[i] for i in outputs] x_graph = FunctionGraph(cloned_inputs, cloned_outputs, clone=False) x_graph.memo = model_memo if return_graph is None: return_graph = False else: x_graph = x if return_graph is None: return_graph = True x_graph_opt = x_graph if in_place else x_graph.clone() _ = optimization.optimize(x_graph_opt) if return_graph: res = x_graph_opt else: res = x_graph_opt.outputs x_graph_opt.disown() if len(res) == 1: (res, ) = res return res
def test_normals_to_model(): a_tt = tt.vector('a') R_tt = tt.matrix('R') F_t_tt = tt.matrix('F') V_tt = tt.matrix('V') a_tt.tag.test_value = np.r_[1., 0.] R_tt.tag.test_value = np.diag([10., 10.]) F_t_tt.tag.test_value = np.c_[-2., 1.] V_tt.tag.test_value = np.diag([0.5]) beta_rv = MvNormalRV(a_tt, R_tt, name='\\beta') E_y_rv = F_t_tt.dot(beta_rv) Y_rv = MvNormalRV(E_y_rv, V_tt, name='Y') y_tt = tt.as_tensor_variable(np.r_[-3.]) y_tt.name = 'y' Y_obs = observed(y_tt, Y_rv) fgraph = FunctionGraph(tt_inputs([beta_rv, Y_obs]), [beta_rv, Y_obs], clone=True) model = graph_model(fgraph) assert len(model.observed_RVs) == 1 assert model.observed_RVs[0].name == 'Y' Y_pm = model.observed_RVs[0].distribution assert isinstance(Y_pm, pm.MvNormal) np.testing.assert_array_equal( model.observed_RVs[0].observations.data, y_tt.data) assert Y_pm.mu.owner.op == tt.basic._dot assert Y_pm.cov.name == 'V' assert len(model.unobserved_RVs) == 1 assert model.unobserved_RVs[0].name == '\\beta' beta_pm = model.unobserved_RVs[0].distribution assert isinstance(beta_pm, pm.MvNormal)
def test_logp(): hmm_model_env = create_test_hmm() M_tt = hmm_model_env["M_tt"] N_tt = hmm_model_env["N_tt"] mus_tt = hmm_model_env["mus_tt"] sigmas_tt = hmm_model_env["sigmas_tt"] Y_rv = hmm_model_env["Y_rv"] S_rv = hmm_model_env["S_rv"] S_in = hmm_model_env["S_in"] Gamma_rv = hmm_model_env["Gamma_rv"] rng_tt = hmm_model_env["rng_tt"] Y_obs = Y_rv.clone() Y_obs.name = "Y_obs" # `S_in` includes `S_0_rv` (and `pi_0_rv`), unlike `S_rv` S_obs = S_in.clone() S_obs.name = "S_obs" Gamma_obs = Gamma_rv.clone() Gamma_obs.name = "Gamma_obs" test_point = { mus_tt: mus_tt.tag.test_value, N_tt: N_tt.tag.test_value, Gamma_obs: Gamma_rv.tag.test_value, Y_obs: Y_rv.tag.test_value, S_obs: S_in.tag.test_value, } def logp_scan_fn(s_t, s_tm1, y_t, mus_t, sigma_t, Gamma_t): gamma_t = Gamma_t[s_tm1] log_s_t = pm.Categorical.dist(gamma_t).logp(s_t) mu_t = mus_t[s_t] log_y_t = pm.Normal.dist(mu_t, sigma_t).logp(y_t) gamma_t.name = "gamma_t" log_y_t.name = "logp(y_t)" log_s_t.name = "logp(s_t)" mu_t.name = "mu[S_t]" return log_s_t, log_y_t (true_S_logp, true_Y_logp), scan_updates = theano.scan( fn=logp_scan_fn, sequences=[{ "input": S_obs, "taps": [0, -1] }, Y_obs, mus_tt, sigmas_tt], non_sequences=[Gamma_obs], outputs_info=[{}, {}], strict=True, name="scan_rv", ) # Make sure there are no `RandomVariable` nodes among our # expected/true log-likelihood graph. assert not vars_to_rvs(true_S_logp) assert not vars_to_rvs(true_Y_logp) true_S_logp_val = true_S_logp.eval(test_point) true_Y_logp_val = true_Y_logp.eval(test_point) # # Now, compute the log-likelihoods # logps = logp(Y_rv) S_logp = logps[S_in][1] Y_logp = logps[Y_rv][1] # from theano.printing import debugprint as tt_dprint # There shouldn't be any `RandomVariable`s here either assert not vars_to_rvs(S_logp[1]) assert not vars_to_rvs(Y_logp[1]) assert N_tt in tt_inputs([S_logp]) assert mus_tt in tt_inputs([S_logp]) assert logps[S_in][0] in tt_inputs([S_logp]) assert logps[Y_rv][0] in tt_inputs([S_logp]) assert logps[Gamma_rv][0] in tt_inputs([S_logp]) new_test_point = { mus_tt: mus_tt.tag.test_value, N_tt: N_tt.tag.test_value, logps[Gamma_rv][0]: Gamma_rv.tag.test_value, logps[Y_rv][0]: Y_rv.tag.test_value, logps[S_in][0]: S_in.tag.test_value, } with theano.change_flags(on_unused_input="warn"): S_logp_val = S_logp.eval(new_test_point) Y_logp_val = Y_logp.eval(new_test_point) assert np.array_equal(true_S_logp_val, S_logp_val) assert np.array_equal(Y_logp_val, true_Y_logp_val)
def test_normals_to_model(): """Test conversion to a PyMC3 model.""" tt.config.compute_test_value = 'ignore' a_tt = tt.vector('a') R_tt = tt.matrix('R') F_t_tt = tt.matrix('F') V_tt = tt.matrix('V') a_tt.tag.test_value = np.r_[1., 0.] R_tt.tag.test_value = np.diag([10., 10.]) F_t_tt.tag.test_value = np.c_[-2., 1.] V_tt.tag.test_value = np.diag([0.5]) beta_rv = MvNormalRV(a_tt, R_tt, name='\\beta') E_y_rv = F_t_tt.dot(beta_rv) Y_rv = MvNormalRV(E_y_rv, V_tt, name='Y') y_val = np.r_[-3.] def _check_model(model): assert len(model.observed_RVs) == 1 assert model.observed_RVs[0].name == 'Y' Y_pm = model.observed_RVs[0].distribution assert isinstance(Y_pm, pm.MvNormal) np.testing.assert_array_equal(model.observed_RVs[0].observations.data, y_val) assert Y_pm.mu.owner.op == tt.basic._dot assert Y_pm.cov.name == 'V' assert len(model.unobserved_RVs) == 1 assert model.unobserved_RVs[0].name == '\\beta' beta_pm = model.unobserved_RVs[0].distribution assert isinstance(beta_pm, pm.MvNormal) y_tt = theano.shared(y_val, name='y') Y_obs = observed(y_tt, Y_rv) fgraph = FunctionGraph(tt_inputs([beta_rv, Y_obs]), [beta_rv, Y_obs], clone=True) model = graph_model(fgraph) _check_model(model) # Now, let `graph_model` create the `FunctionGraph` model = graph_model(Y_obs) _check_model(model) # Use a different type of observation value y_tt = tt.as_tensor_variable(y_val, name='y') Y_obs = observed(y_tt, Y_rv) model = graph_model(Y_obs) _check_model(model) # Use an invalid type of observation value tt.config.compute_test_value = 'ignore' y_tt = tt.vector('y') Y_obs = observed(y_tt, Y_rv) with pytest.raises(TypeError): model = graph_model(Y_obs)
def logp(*output_vars): """Compute the log-likelihood for a graph. Parameters ---------- *output_vars: Tuple[TensorVariable] The output of a graph containing `RandomVariable`s. Results ------- Dict[TensorVariable, TensorVariable] A map from `RandomVariable`s to their log-likelihood graphs. """ # model_inputs = [i for i in tt_inputs(output_vars) if not isinstance(i, tt.Constant)] model_inputs = tt_inputs(output_vars) model_fgraph = FunctionGraph( model_inputs, output_vars, clone=True, # XXX: `ShapeFeature` introduces cached constants # features=[tt.opt.ShapeFeature()] ) canonicalize_opt = optdb.query(Query(include=["canonicalize"])) push_out_opt = EquilibriumOptimizer([push_out_rvs_from_scan], max_use_ratio=10) optimizations = SeqOptimizer(canonicalize_opt.copy()) optimizations.append(push_out_opt) opt_fgraph = optimize_graph(model_fgraph, optimizations, in_place=True) replacements = {} rv_to_logp_io = {} for node in opt_fgraph.toposort(): # TODO: This `RandomVariable` "parsing" should be generalized and used # in more places (e.g. what if the outer-outputs are `Subtensor`s) if isinstance(node.op, RandomVariable): var = node.default_output() # shape = list(node.fgraph.shape_feature.shape_tuple(new_var)) shape = None new_input_var = var.clone() if new_input_var.name: new_input_var.name = new_input_var.name.lower() replacements[var] = new_input_var rv_to_logp_io[var] = (new_input_var, _logp_fn(node.op, var.owner, shape)(new_input_var)) if isinstance(node.op, tt.Subtensor) and node.inputs[0].owner: # The output of `theano.scan` is sometimes a sliced tensor (in # order to get rid of initial values introduced by in the `Scan`) node = node.inputs[0].owner if isinstance(node.op, Scan): scan_args = ScanArgs.from_node(node) rv_outer_outs = get_random_outer_outputs(scan_args) for var_idx, var, io_var in rv_outer_outs: scan_args, new_oi_var = convert_outer_out_to_in( scan_args, var, inner_out_fn=create_inner_out_logp, output_scan_args=scan_args) replacements[var] = new_oi_var logp_scan_out = construct_scan(scan_args) for var_idx, var, io_var in rv_outer_outs: rv_to_logp_io[var] = (replacements[var], logp_scan_out[var_idx]) # We need to use the new log-likelihood input variables that were generated # for each `RandomVariable` node. They need to replace the corresponding # original variables within each log-likelihood graph. rv_vars, inputs_logp_outputs = zip(*rv_to_logp_io.items()) new_inputs, logp_outputs = zip(*inputs_logp_outputs) rev_memo = {v: k for k, v in model_fgraph.memo.items()} # Replace the new cloned variables with the original ones, but only if # they're not any of `RandomVariable` terms we've converted to # log-likelihoods. replacements.update({ k: v for k, v in rev_memo.items() if isinstance(k, tt.Variable) and v not in new_inputs and k not in replacements }) new_logp_outputs = tt_clone(logp_outputs, replace=replacements) rv_to_logp_io = { rev_memo[k]: v for k, v in zip(rv_vars, zip(new_inputs, new_logp_outputs)) } return rv_to_logp_io