Beispiel #1
0
def log_post_ratio(from_tree, to_tree, seqdist):
    from_tree_seps = jtlib.separators(from_tree)
    log_post1 = seqdist.log_likelihood_partial(from_tree.nodes(), from_tree_seps)
    log_post1 -= jtlib.log_n_junction_trees(from_tree, from_tree_seps)

    to_tree_seps = jtlib.separators(to_tree)
    log_post2 = seqdist.log_likelihood_partial(to_tree.nodes(), to_tree_seps)
    log_post2 -= jtlib.log_n_junction_trees(to_tree, to_tree_seps)

    return log_post2 - log_post1
Beispiel #2
0
def test_logmu():
    """
    Check so that logmu is equal to the example in Thomas & Green 2009.
    """
    cliques = [frozenset([11, 12]), frozenset([9, 12, 17]), frozenset([3, 7, 17, 22]),
    frozenset([9, 10]), frozenset([6]), frozenset([4]), frozenset([8, 17]),
    frozenset([17, 21]), frozenset([3, 18, 19]), frozenset([2, 3, 18]),
    frozenset([2, 3, 16]), frozenset([3, 20]), frozenset([2, 3, 18]), frozenset([1, 2, 3]),
    frozenset([3, 5]), frozenset([13, 14, 15]), frozenset([13, 14, 23])]
    edges = [(frozenset([11, 12]), frozenset([9, 12, 17])),
    (frozenset([9, 12, 17]), frozenset([9, 10])),
    (frozenset([9, 12, 17]), frozenset([3, 7, 17, 22])),
    (frozenset([3, 7, 17, 22]), frozenset([6])),
    (frozenset([3, 7, 17, 22]), frozenset([8, 17])),
    (frozenset([3, 7, 17, 22]), frozenset([3, 18, 19])),
    (frozenset([6]), frozenset([4])),
    (frozenset([8, 17]), frozenset([17, 21])),
    (frozenset([3, 18, 19]), frozenset([2, 3, 18])),
    (frozenset([2, 3, 18]), frozenset([2, 3, 16])),
    (frozenset([2, 3, 18]), frozenset([3, 20])),
    (frozenset([2, 3, 18]), frozenset([1, 2, 3])),
    (frozenset([1, 2, 3]), frozenset([3, 5])),
    (frozenset([1, 2, 3]), frozenset([13, 14, 15])),
    (frozenset([13, 14, 15]), frozenset([13, 14, 23]))]

    g = nx.Graph()
    g.add_nodes_from(cliques)
    g.add_edges_from(edges)
    S = libj.separators(g)

    assert int(np.round(np.exp(libj.log_n_junction_trees(g, S)))) == 57802752
Beispiel #3
0
    def predictive_pdf(self, x_new, x, mu, v, tau, alpha, graph_dist):
        """
        This is the predictive distribution of x_new.
        It is a multivatiate T-distributiona where the graph
        is marginalized out accordning to according to graph_dist.
        """
        pred_density = 0.0

        cache = {}
        for graph in graph_dist.domain:
            log_pred_density = 0.0
            tree = dlib.junction_tree(graph)
            cliques = tree.nodes()
            separators = jtlib.separators(tree)

            k = x_new.shape[0]  # items to classify
            n = x.shape[0]
            mu = np.matrix(mu).reshape(len(mu), 1)
            x_bar = np.mean(x, axis=0).T
            s = (x - mu.T).T * (x - mu.T)
            mu_star = (v * mu + n * x_bar) / (v + n)
            tau_star = tau + s + (n * v /
                                  (v + n)) * (mu - x_bar) * (mu - x_bar).T
            v_star = v + n
            #print "v_star: " + str(v_star)
            for c in cliques:
                if c not in cache:
                    node_list = list(c)
                    x_new_c = x_new[np.ix_(range(k), node_list)].T
                    t_d = len(c)  #TODO: Bug?  q in paper
                    t_mu = mu_star[node_list]
                    t_tau_star = (tau_star[np.ix_(node_list, node_list)] *
                                  (v_star + 1) / (v_star *
                                                  (v_star + 1 - t_d))).I
                    t_df = v_star - t_d + 1  # what is this?
                    cache[c] = tdist.log_pdf(x_new_c, t_mu, t_tau_star, t_df)
                log_pred_density += cache[c]

            for sep in separators:
                if len(sep) == 0:
                    continue
                nu = len(separators[sep])
                if sep not in cache:
                    node_list = list(sep)

                    x_new_s = x_new[np.ix_(range(k), node_list)].T
                    t_d = len(sep)
                    t_mu = mu_star[node_list]
                    t_tau_star = (tau_star[np.ix_(node_list, node_list)] *
                                  (v_star + 1) / (v_star *
                                                  (v_star + 1 - t_d))).I
                    t_df = v_star - t_d + 1
                    cache[sep] = tdist.log_pdf(x_new_s, t_mu, t_tau_star, t_df)
                log_pred_density -= nu * cache[sep]
            pred_density += np.exp(log_pred_density) * graph_dist.pdf(graph)
        return float(pred_density)
def log_likelihood(graph, S, n, D, delta, cache={}):
    """

    Args:
        S (Numpy matrix): sum of squares matrix for the full distribution
        D (Numpy matrix): location matrix for the full distribution
        delta (float): scale parameter
        n (int): number of data samples on which S is built
    """
    tree = trilearn.graph.decomposable.junction_tree(graph)
    separators = jtlib.separators(tree)
    cliques = tree.nodes()
    return log_likelihood_partial(S, n, D, delta, cliques, separators, cache)
Beispiel #5
0
def cov_matrix(G, r, s2):
    """ Returns a covariance matrix cov such that zeros in cov.I is determined by G.

    Args:
        G (NetworkX graph): A decomposable graph.
        r (float): Correlation.
        s2 (float): Variance.

    Returns:
        Numpy matrix: A covariance matrix cov such that zeros in it inverse is determined by G.
    """
    p = G.order()
    T = trilearn.graph.decomposable.junction_tree(G)
    cliques = T.nodes()
    seps = jtlib.separators(T)
    omega = np.matrix(np.zeros((p, p)))
    cov = np.matrix(np.zeros((p, p)))
    for c in cliques:
        l = len(c)
        cov[np.ix_(list(c), list(c))] += np.identity(l) * s2
        cov[np.ix_(list(c), list(c))] += (np.zeros(
            (l, l)) + 1 - np.identity(l)) * s2 * r

    for s in seps:
        l = len(s)
        if l == 0:
            continue

        ls = len(seps[s])
        cov[np.ix_(list(s), list(s))] -= ls * np.identity(l) * s2
        cov[np.ix_(list(s), list(s))] -= ls * (np.zeros(
            (l, l)) + 1 - np.identity(l)) * s2 * r

    for c in cliques:
        l = len(c)
        omega[np.ix_(list(c), list(c))] += cov[np.ix_(list(c), list(c))].I

    for s in seps:
        l = len(s)
        if l == 0:
            continue
        ls = len(seps[s])
        omega[np.ix_(list(s), list(s))] -= ls * cov[np.ix_(list(s), list(s))].I

    return omega.I
Beispiel #6
0
def separators(graph):
    """ Returns the separators of graph.

    Args:
        graph (NetworkX graph): A decomposable graph.

    Returns:
        dict: A dict with separators as keys an their corresponding edges as values. 

    Example:
        >>> g = dlib.sample_random_AR_graph(5,3)
        >>> g.nodes
        NodeView((0, 1, 2, 3, 4))
        >>> g.edges
        EdgeView([(0, 1), (0, 2), (1, 2), (2, 3), (3, 4)])
        >>>  dlib.separators(g)
        {frozenset([2]): set([(frozenset([2, 3]), frozenset([0, 1, 2]))]), frozenset([3]): set([(frozenset([2, 3]), frozenset([3, 4]))])}

    """
    tree = junction_tree(graph)
    return libj.separators(tree)
def sample_trajectory(n_samples, randomize, sd):
    graph = nx.Graph()
    graph.add_nodes_from(range(sd.p))
    jt = dlib.junction_tree(graph)
    assert (jtlib.is_junction_tree(jt))
    jt_traj = [None] * n_samples
    graphs = [None] * n_samples
    jt_traj[0] = jt
    graphs[0] = jtlib.graph(jt)
    log_prob_traj = [None] * n_samples

    gtraj = mcmctraj.Trajectory()
    gtraj.set_sampling_method({
        "method": "mh",
        "params": {
            "samples": n_samples,
            "randomize_interval": randomize
        }
    })

    gtraj.set_sequential_distribution(sd)

    log_prob_traj[0] = 0.0
    log_prob_traj[0] = sd.log_likelihood(jtlib.graph(jt_traj[0]))
    log_prob_traj[0] += -jtlib.log_n_junction_trees(
        jt_traj[0], jtlib.separators(jt_traj[0]))

    accept_traj = [0] * n_samples

    MAP_graph = (graphs[0], log_prob_traj[0])

    for i in tqdm(range(1, n_samples), desc="Metropolis-Hastings samples"):
        if log_prob_traj[i - 1] > MAP_graph[1]:
            MAP_graph = (graphs[i - 1], log_prob_traj[i - 1])

        if i % randomize == 0:
            jtlib.randomize(jt)
            graphs[i] = jtlib.graph(jt)  # TODO: Improve.
            log_prob_traj[i] = sd.log_likelihood(
                graphs[i]) - jtlib.log_n_junction_trees(
                    jt, jtlib.separators(jt))

        r = np.random.randint(2)  # Connect / disconnect move
        num_seps = jt.size()
        log_p1 = log_prob_traj[i - 1]
        if r == 0:
            # Connect move
            num_cliques = jt.order()
            conn = aglib.connect_move(
                jt)  # need to move to calculate posterior
            seps_prop = jtlib.separators(jt)
            log_p2 = sd.log_likelihood(
                jtlib.graph(jt)) - jtlib.log_n_junction_trees(jt, seps_prop)

            if not conn:
                log_prob_traj[i] = log_prob_traj[i - 1]
                graphs[i] = graphs[i - 1]
                continue
            C_disconn = conn[2] | conn[3] | conn[4]
            if conn[0] == "a":
                (case, log_q12, X, Y, S, CX_disconn, CY_disconn, XSneig,
                 YSneig) = conn
                (NX_disconn, NY_disconn,
                 N_disconn) = aglib.disconnect_get_neighbors(
                     jt, C_disconn, X, Y)  # TODO: could this be done faster?
                log_q21 = aglib.disconnect_logprob_a(num_cliques - 1, X, Y, S,
                                                     N_disconn)
                #print log_p2, log_q21, log_p1, log_q12
                alpha = min(np.exp(log_p2 + log_q21 - log_p1 - log_q12), 1)
                #print alpha
                samp = np.random.choice(2, 1, p=[(1 - alpha), alpha])
                if samp == 1:
                    # print "Accept"
                    accept_traj[i] = 1
                    log_prob_traj[i] = log_p2
                    graphs[i] = jtlib.graph(jt)  # TODO: Improve.
                else:
                    #print "Reject"
                    aglib.disconnect_a(jt, C_disconn, X, Y, CX_disconn,
                                       CY_disconn, XSneig, YSneig)
                    log_prob_traj[i] = log_prob_traj[i - 1]
                    graphs[i] = graphs[i - 1]
                    continue

            elif conn[0] == "b":
                (case, log_q12, X, Y, S, CX_disconn, CY_disconn) = conn
                log_q21 = aglib.disconnect_logprob_bcd(num_cliques, X, Y, S)
                alpha = min(np.exp(log_p2 + log_q21 - log_p1 - log_q12), 1)
                samp = np.random.choice(2, 1, p=[(1 - alpha), alpha])
                if samp == 1:
                    #print "Accept"
                    accept_traj[i] = 1
                    log_prob_traj[i] = log_p2
                    graphs[i] = jtlib.graph(jt)  # TODO: Improve.
                else:
                    #print "Reject"
                    aglib.disconnect_b(jt, C_disconn, X, Y, CX_disconn,
                                       CY_disconn)
                    log_prob_traj[i] = log_prob_traj[i - 1]
                    graphs[i] = graphs[i - 1]
                    continue

            elif conn[0] == "c":
                (case, log_q12, X, Y, S, CX_disconn, CY_disconn) = conn
                log_q21 = aglib.disconnect_logprob_bcd(num_cliques, X, Y, S)
                alpha = min(np.exp(log_p2 + log_q21 - log_p1 - log_q12), 1)
                samp = np.random.choice(2, 1, p=[(1 - alpha), alpha])
                if samp == 1:
                    accept_traj[i] = 1
                    #print "Accept"
                    log_prob_traj[i] = log_p2
                    graphs[i] = jtlib.graph(jt)  # TODO: Improve.
                else:
                    #print "Reject"
                    aglib.disconnect_c(jt, C_disconn, X, Y, CX_disconn,
                                       CY_disconn)
                    log_prob_traj[i] = log_prob_traj[i - 1]
                    graphs[i] = graphs[i - 1]
                    continue

            elif conn[0] == "d":
                (case, log_q12, X, Y, S, CX_disconn, CY_disconn) = conn
                log_q21 = aglib.disconnect_logprob_bcd(num_cliques + 1, X, Y,
                                                       S)
                alpha = min(np.exp(log_p2 + log_q21 - log_p1 - log_q12), 1)
                samp = np.random.choice(2, 1, p=[(1 - alpha), alpha])
                if samp == 1:
                    accept_traj[i] = 1
                    #print "Accept"
                    log_prob_traj[i] = log_p2
                    graphs[i] = jtlib.graph(jt)  # TODO: Improve.
                else:
                    #print "Reject"
                    aglib.disconnect_d(jt, C_disconn, X, Y, CX_disconn,
                                       CY_disconn)
                    log_prob_traj[i] = log_prob_traj[i - 1]
                    graphs[i] = graphs[i - 1]
                    continue

        elif r == 1:
            # Disconnect move
            disconnect = aglib.disconnect_move(
                jt)  # need to move to calculate posterior
            seps_prop = jtlib.separators(jt)
            log_p2 = sd.log_likelihood(
                jtlib.graph(jt)) - jtlib.log_n_junction_trees(jt, seps_prop)

            #assert(jtlib.is_junction_tree(jt))
            #print "disconnect"
            if disconnect is not False:
                if disconnect[0] == "a":
                    (case, log_q12, X, Y, S, CX_conn, CY_conn) = disconnect
                    log_q21 = aglib.connect_logprob(num_seps + 1, X, Y,
                                                    CX_conn, CY_conn)
                    alpha = min(np.exp(log_p2 + log_q21 - log_p1 - log_q12), 1)
                    samp = np.random.choice(2, 1, p=[(1 - alpha), alpha])
                    if samp == 1:
                        accept_traj[i] = 1
                        #print "Accept"
                        log_prob_traj[i] = log_p2
                        graphs[i] = jtlib.graph(jt)  # TODO: Improve.
                    else:
                        #print "Reject"
                        aglib.connect_a(jt, S, X, Y, CX_conn, CY_conn)
                        log_prob_traj[i] = log_prob_traj[i - 1]
                        graphs[i] = graphs[i - 1]
                        continue

                elif disconnect[0] == "b":
                    (case, log_q12, X, Y, S, CX_conn, CY_conn) = disconnect
                    log_q21 = aglib.connect_logprob(num_seps, X, Y, CX_conn,
                                                    CY_conn)
                    alpha = min(np.exp(log_p2 + log_q21 - log_p1 - log_q12), 1)
                    samp = np.random.choice(2, 1, p=[(1 - alpha), alpha])
                    if samp == 1:
                        accept_traj[i] = 1
                        #print "Accept"
                        log_prob_traj[i] = log_p2
                        graphs[i] = jtlib.graph(jt)  # TODO: Improve.
                    else:
                        #print "Reject"
                        aglib.connect_b(jt, S, X, Y, CX_conn, CY_conn)
                        log_prob_traj[i] = log_prob_traj[i - 1]
                        graphs[i] = graphs[i - 1]
                        continue

                elif disconnect[0] == "c":
                    (case, log_q12, X, Y, S, CX_conn, CY_conn) = disconnect
                    log_q21 = aglib.connect_logprob(num_seps, X, Y, CX_conn,
                                                    CY_conn)
                    alpha = min(np.exp(log_p2 + log_q21 - log_p1 - log_q12), 1)
                    samp = np.random.choice(2, 1, p=[(1 - alpha), alpha])
                    if samp == 1:
                        accept_traj[i] = 1
                        #print "Accept"
                        log_prob_traj[i] = log_p2
                        graphs[i] = jtlib.graph(jt)  # TODO: Improve.
                    else:
                        #print "Reject"
                        aglib.connect_c(jt, S, X, Y, CX_conn, CY_conn)
                        log_prob_traj[i] = log_prob_traj[i - 1]
                        graphs[i] = graphs[i - 1]
                        continue

                elif disconnect[0] == "d":
                    (case, log_q12, X, Y, S, CX_conn, CY_conn) = disconnect
                    log_q21 = aglib.connect_logprob(num_seps - 1, X, Y,
                                                    CX_conn, CY_conn)
                    alpha = min(np.exp(log_p2 + log_q21 - log_p1 - log_q12), 1)
                    samp = np.random.choice(2, 1, p=[(1 - alpha), alpha])
                    if samp == 1:
                        #print "Accept"
                        accept_traj[i] = 1
                        log_prob_traj[i] = log_p2
                        graphs[i] = jtlib.graph(jt)  # TODO: Improve.
                    else:
                        #print "Reject"
                        aglib.connect_d(jt, S, X, Y, CX_conn, CY_conn)
                        log_prob_traj[i] = log_prob_traj[i - 1]
                        graphs[i] = graphs[i - 1]
                        continue
            else:
                log_prob_traj[i] = log_prob_traj[i - 1]
                graphs[i] = graphs[i - 1]
                continue
        #print(np.mean(accept_traj[:i]))
    gtraj.set_trajectory(graphs)
    gtraj.logl = log_prob_traj
    return gtraj