def log_post_ratio(from_tree, to_tree, seqdist): from_tree_seps = jtlib.separators(from_tree) log_post1 = seqdist.log_likelihood_partial(from_tree.nodes(), from_tree_seps) log_post1 -= jtlib.log_n_junction_trees(from_tree, from_tree_seps) to_tree_seps = jtlib.separators(to_tree) log_post2 = seqdist.log_likelihood_partial(to_tree.nodes(), to_tree_seps) log_post2 -= jtlib.log_n_junction_trees(to_tree, to_tree_seps) return log_post2 - log_post1
def test_logmu(): """ Check so that logmu is equal to the example in Thomas & Green 2009. """ cliques = [frozenset([11, 12]), frozenset([9, 12, 17]), frozenset([3, 7, 17, 22]), frozenset([9, 10]), frozenset([6]), frozenset([4]), frozenset([8, 17]), frozenset([17, 21]), frozenset([3, 18, 19]), frozenset([2, 3, 18]), frozenset([2, 3, 16]), frozenset([3, 20]), frozenset([2, 3, 18]), frozenset([1, 2, 3]), frozenset([3, 5]), frozenset([13, 14, 15]), frozenset([13, 14, 23])] edges = [(frozenset([11, 12]), frozenset([9, 12, 17])), (frozenset([9, 12, 17]), frozenset([9, 10])), (frozenset([9, 12, 17]), frozenset([3, 7, 17, 22])), (frozenset([3, 7, 17, 22]), frozenset([6])), (frozenset([3, 7, 17, 22]), frozenset([8, 17])), (frozenset([3, 7, 17, 22]), frozenset([3, 18, 19])), (frozenset([6]), frozenset([4])), (frozenset([8, 17]), frozenset([17, 21])), (frozenset([3, 18, 19]), frozenset([2, 3, 18])), (frozenset([2, 3, 18]), frozenset([2, 3, 16])), (frozenset([2, 3, 18]), frozenset([3, 20])), (frozenset([2, 3, 18]), frozenset([1, 2, 3])), (frozenset([1, 2, 3]), frozenset([3, 5])), (frozenset([1, 2, 3]), frozenset([13, 14, 15])), (frozenset([13, 14, 15]), frozenset([13, 14, 23]))] g = nx.Graph() g.add_nodes_from(cliques) g.add_edges_from(edges) S = libj.separators(g) assert int(np.round(np.exp(libj.log_n_junction_trees(g, S)))) == 57802752
def predictive_pdf(self, x_new, x, mu, v, tau, alpha, graph_dist): """ This is the predictive distribution of x_new. It is a multivatiate T-distributiona where the graph is marginalized out accordning to according to graph_dist. """ pred_density = 0.0 cache = {} for graph in graph_dist.domain: log_pred_density = 0.0 tree = dlib.junction_tree(graph) cliques = tree.nodes() separators = jtlib.separators(tree) k = x_new.shape[0] # items to classify n = x.shape[0] mu = np.matrix(mu).reshape(len(mu), 1) x_bar = np.mean(x, axis=0).T s = (x - mu.T).T * (x - mu.T) mu_star = (v * mu + n * x_bar) / (v + n) tau_star = tau + s + (n * v / (v + n)) * (mu - x_bar) * (mu - x_bar).T v_star = v + n #print "v_star: " + str(v_star) for c in cliques: if c not in cache: node_list = list(c) x_new_c = x_new[np.ix_(range(k), node_list)].T t_d = len(c) #TODO: Bug? q in paper t_mu = mu_star[node_list] t_tau_star = (tau_star[np.ix_(node_list, node_list)] * (v_star + 1) / (v_star * (v_star + 1 - t_d))).I t_df = v_star - t_d + 1 # what is this? cache[c] = tdist.log_pdf(x_new_c, t_mu, t_tau_star, t_df) log_pred_density += cache[c] for sep in separators: if len(sep) == 0: continue nu = len(separators[sep]) if sep not in cache: node_list = list(sep) x_new_s = x_new[np.ix_(range(k), node_list)].T t_d = len(sep) t_mu = mu_star[node_list] t_tau_star = (tau_star[np.ix_(node_list, node_list)] * (v_star + 1) / (v_star * (v_star + 1 - t_d))).I t_df = v_star - t_d + 1 cache[sep] = tdist.log_pdf(x_new_s, t_mu, t_tau_star, t_df) log_pred_density -= nu * cache[sep] pred_density += np.exp(log_pred_density) * graph_dist.pdf(graph) return float(pred_density)
def log_likelihood(graph, S, n, D, delta, cache={}): """ Args: S (Numpy matrix): sum of squares matrix for the full distribution D (Numpy matrix): location matrix for the full distribution delta (float): scale parameter n (int): number of data samples on which S is built """ tree = trilearn.graph.decomposable.junction_tree(graph) separators = jtlib.separators(tree) cliques = tree.nodes() return log_likelihood_partial(S, n, D, delta, cliques, separators, cache)
def cov_matrix(G, r, s2): """ Returns a covariance matrix cov such that zeros in cov.I is determined by G. Args: G (NetworkX graph): A decomposable graph. r (float): Correlation. s2 (float): Variance. Returns: Numpy matrix: A covariance matrix cov such that zeros in it inverse is determined by G. """ p = G.order() T = trilearn.graph.decomposable.junction_tree(G) cliques = T.nodes() seps = jtlib.separators(T) omega = np.matrix(np.zeros((p, p))) cov = np.matrix(np.zeros((p, p))) for c in cliques: l = len(c) cov[np.ix_(list(c), list(c))] += np.identity(l) * s2 cov[np.ix_(list(c), list(c))] += (np.zeros( (l, l)) + 1 - np.identity(l)) * s2 * r for s in seps: l = len(s) if l == 0: continue ls = len(seps[s]) cov[np.ix_(list(s), list(s))] -= ls * np.identity(l) * s2 cov[np.ix_(list(s), list(s))] -= ls * (np.zeros( (l, l)) + 1 - np.identity(l)) * s2 * r for c in cliques: l = len(c) omega[np.ix_(list(c), list(c))] += cov[np.ix_(list(c), list(c))].I for s in seps: l = len(s) if l == 0: continue ls = len(seps[s]) omega[np.ix_(list(s), list(s))] -= ls * cov[np.ix_(list(s), list(s))].I return omega.I
def separators(graph): """ Returns the separators of graph. Args: graph (NetworkX graph): A decomposable graph. Returns: dict: A dict with separators as keys an their corresponding edges as values. Example: >>> g = dlib.sample_random_AR_graph(5,3) >>> g.nodes NodeView((0, 1, 2, 3, 4)) >>> g.edges EdgeView([(0, 1), (0, 2), (1, 2), (2, 3), (3, 4)]) >>> dlib.separators(g) {frozenset([2]): set([(frozenset([2, 3]), frozenset([0, 1, 2]))]), frozenset([3]): set([(frozenset([2, 3]), frozenset([3, 4]))])} """ tree = junction_tree(graph) return libj.separators(tree)
def sample_trajectory(n_samples, randomize, sd): graph = nx.Graph() graph.add_nodes_from(range(sd.p)) jt = dlib.junction_tree(graph) assert (jtlib.is_junction_tree(jt)) jt_traj = [None] * n_samples graphs = [None] * n_samples jt_traj[0] = jt graphs[0] = jtlib.graph(jt) log_prob_traj = [None] * n_samples gtraj = mcmctraj.Trajectory() gtraj.set_sampling_method({ "method": "mh", "params": { "samples": n_samples, "randomize_interval": randomize } }) gtraj.set_sequential_distribution(sd) log_prob_traj[0] = 0.0 log_prob_traj[0] = sd.log_likelihood(jtlib.graph(jt_traj[0])) log_prob_traj[0] += -jtlib.log_n_junction_trees( jt_traj[0], jtlib.separators(jt_traj[0])) accept_traj = [0] * n_samples MAP_graph = (graphs[0], log_prob_traj[0]) for i in tqdm(range(1, n_samples), desc="Metropolis-Hastings samples"): if log_prob_traj[i - 1] > MAP_graph[1]: MAP_graph = (graphs[i - 1], log_prob_traj[i - 1]) if i % randomize == 0: jtlib.randomize(jt) graphs[i] = jtlib.graph(jt) # TODO: Improve. log_prob_traj[i] = sd.log_likelihood( graphs[i]) - jtlib.log_n_junction_trees( jt, jtlib.separators(jt)) r = np.random.randint(2) # Connect / disconnect move num_seps = jt.size() log_p1 = log_prob_traj[i - 1] if r == 0: # Connect move num_cliques = jt.order() conn = aglib.connect_move( jt) # need to move to calculate posterior seps_prop = jtlib.separators(jt) log_p2 = sd.log_likelihood( jtlib.graph(jt)) - jtlib.log_n_junction_trees(jt, seps_prop) if not conn: log_prob_traj[i] = log_prob_traj[i - 1] graphs[i] = graphs[i - 1] continue C_disconn = conn[2] | conn[3] | conn[4] if conn[0] == "a": (case, log_q12, X, Y, S, CX_disconn, CY_disconn, XSneig, YSneig) = conn (NX_disconn, NY_disconn, N_disconn) = aglib.disconnect_get_neighbors( jt, C_disconn, X, Y) # TODO: could this be done faster? log_q21 = aglib.disconnect_logprob_a(num_cliques - 1, X, Y, S, N_disconn) #print log_p2, log_q21, log_p1, log_q12 alpha = min(np.exp(log_p2 + log_q21 - log_p1 - log_q12), 1) #print alpha samp = np.random.choice(2, 1, p=[(1 - alpha), alpha]) if samp == 1: # print "Accept" accept_traj[i] = 1 log_prob_traj[i] = log_p2 graphs[i] = jtlib.graph(jt) # TODO: Improve. else: #print "Reject" aglib.disconnect_a(jt, C_disconn, X, Y, CX_disconn, CY_disconn, XSneig, YSneig) log_prob_traj[i] = log_prob_traj[i - 1] graphs[i] = graphs[i - 1] continue elif conn[0] == "b": (case, log_q12, X, Y, S, CX_disconn, CY_disconn) = conn log_q21 = aglib.disconnect_logprob_bcd(num_cliques, X, Y, S) alpha = min(np.exp(log_p2 + log_q21 - log_p1 - log_q12), 1) samp = np.random.choice(2, 1, p=[(1 - alpha), alpha]) if samp == 1: #print "Accept" accept_traj[i] = 1 log_prob_traj[i] = log_p2 graphs[i] = jtlib.graph(jt) # TODO: Improve. else: #print "Reject" aglib.disconnect_b(jt, C_disconn, X, Y, CX_disconn, CY_disconn) log_prob_traj[i] = log_prob_traj[i - 1] graphs[i] = graphs[i - 1] continue elif conn[0] == "c": (case, log_q12, X, Y, S, CX_disconn, CY_disconn) = conn log_q21 = aglib.disconnect_logprob_bcd(num_cliques, X, Y, S) alpha = min(np.exp(log_p2 + log_q21 - log_p1 - log_q12), 1) samp = np.random.choice(2, 1, p=[(1 - alpha), alpha]) if samp == 1: accept_traj[i] = 1 #print "Accept" log_prob_traj[i] = log_p2 graphs[i] = jtlib.graph(jt) # TODO: Improve. else: #print "Reject" aglib.disconnect_c(jt, C_disconn, X, Y, CX_disconn, CY_disconn) log_prob_traj[i] = log_prob_traj[i - 1] graphs[i] = graphs[i - 1] continue elif conn[0] == "d": (case, log_q12, X, Y, S, CX_disconn, CY_disconn) = conn log_q21 = aglib.disconnect_logprob_bcd(num_cliques + 1, X, Y, S) alpha = min(np.exp(log_p2 + log_q21 - log_p1 - log_q12), 1) samp = np.random.choice(2, 1, p=[(1 - alpha), alpha]) if samp == 1: accept_traj[i] = 1 #print "Accept" log_prob_traj[i] = log_p2 graphs[i] = jtlib.graph(jt) # TODO: Improve. else: #print "Reject" aglib.disconnect_d(jt, C_disconn, X, Y, CX_disconn, CY_disconn) log_prob_traj[i] = log_prob_traj[i - 1] graphs[i] = graphs[i - 1] continue elif r == 1: # Disconnect move disconnect = aglib.disconnect_move( jt) # need to move to calculate posterior seps_prop = jtlib.separators(jt) log_p2 = sd.log_likelihood( jtlib.graph(jt)) - jtlib.log_n_junction_trees(jt, seps_prop) #assert(jtlib.is_junction_tree(jt)) #print "disconnect" if disconnect is not False: if disconnect[0] == "a": (case, log_q12, X, Y, S, CX_conn, CY_conn) = disconnect log_q21 = aglib.connect_logprob(num_seps + 1, X, Y, CX_conn, CY_conn) alpha = min(np.exp(log_p2 + log_q21 - log_p1 - log_q12), 1) samp = np.random.choice(2, 1, p=[(1 - alpha), alpha]) if samp == 1: accept_traj[i] = 1 #print "Accept" log_prob_traj[i] = log_p2 graphs[i] = jtlib.graph(jt) # TODO: Improve. else: #print "Reject" aglib.connect_a(jt, S, X, Y, CX_conn, CY_conn) log_prob_traj[i] = log_prob_traj[i - 1] graphs[i] = graphs[i - 1] continue elif disconnect[0] == "b": (case, log_q12, X, Y, S, CX_conn, CY_conn) = disconnect log_q21 = aglib.connect_logprob(num_seps, X, Y, CX_conn, CY_conn) alpha = min(np.exp(log_p2 + log_q21 - log_p1 - log_q12), 1) samp = np.random.choice(2, 1, p=[(1 - alpha), alpha]) if samp == 1: accept_traj[i] = 1 #print "Accept" log_prob_traj[i] = log_p2 graphs[i] = jtlib.graph(jt) # TODO: Improve. else: #print "Reject" aglib.connect_b(jt, S, X, Y, CX_conn, CY_conn) log_prob_traj[i] = log_prob_traj[i - 1] graphs[i] = graphs[i - 1] continue elif disconnect[0] == "c": (case, log_q12, X, Y, S, CX_conn, CY_conn) = disconnect log_q21 = aglib.connect_logprob(num_seps, X, Y, CX_conn, CY_conn) alpha = min(np.exp(log_p2 + log_q21 - log_p1 - log_q12), 1) samp = np.random.choice(2, 1, p=[(1 - alpha), alpha]) if samp == 1: accept_traj[i] = 1 #print "Accept" log_prob_traj[i] = log_p2 graphs[i] = jtlib.graph(jt) # TODO: Improve. else: #print "Reject" aglib.connect_c(jt, S, X, Y, CX_conn, CY_conn) log_prob_traj[i] = log_prob_traj[i - 1] graphs[i] = graphs[i - 1] continue elif disconnect[0] == "d": (case, log_q12, X, Y, S, CX_conn, CY_conn) = disconnect log_q21 = aglib.connect_logprob(num_seps - 1, X, Y, CX_conn, CY_conn) alpha = min(np.exp(log_p2 + log_q21 - log_p1 - log_q12), 1) samp = np.random.choice(2, 1, p=[(1 - alpha), alpha]) if samp == 1: #print "Accept" accept_traj[i] = 1 log_prob_traj[i] = log_p2 graphs[i] = jtlib.graph(jt) # TODO: Improve. else: #print "Reject" aglib.connect_d(jt, S, X, Y, CX_conn, CY_conn) log_prob_traj[i] = log_prob_traj[i - 1] graphs[i] = graphs[i - 1] continue else: log_prob_traj[i] = log_prob_traj[i - 1] graphs[i] = graphs[i - 1] continue #print(np.mean(accept_traj[:i])) gtraj.set_trajectory(graphs) gtraj.logl = log_prob_traj return gtraj