Exemple #1
0
def gen_ggm_trajectory(dataframe, n_samples, D=None, delta=1.0, cache={}, alpha=0.5, beta=0.5, **args):
    p = dataframe.shape[1]
    if D is None:
        D = np.identity(p)
    sd = seqdist.GGMJTPosterior()
    sd.init_model(np.asmatrix(dataframe), D, delta, cache)
    return mh(alpha, beta, n_samples, sd)
Exemple #2
0
def sample_trajectories_ggm_parallel(dataframe, n_samples, randomize=[1000], D=None, delta=1.0,
                                     reps=1, output_directory=".", **args):
    p = dataframe.shape[1]
    if D is None:
        D = np.identity(p)
    queue = multiprocessing.Queue()
    processes = []
    rets = []

    for _ in range(reps):
        for r in randomize:
            for T in n_samples:
                sd = seqdist.GGMJTPosterior()
                sd.init_model(np.asmatrix(dataframe), D, delta, {})

                print("Starting: " + str((T, r, str(sd), True)))

                proc = Process(target=trajectory_to_queue,
                               args=(T, r,
                                     sd, queue, True))
                proc.start()
                processes.append(proc)
                time.sleep(2)

    for _ in processes:
        ret = queue.get() # will block
        rets.append(ret)
    for p in processes:
        p.join()

    return rets
Exemple #3
0
def sample_trajectory_ggm(dataframe, n_samples, randomize=1000, D=None, delta=1.0, cache={}, **args):
    p = dataframe.shape[1]
    if D is None:
        D = np.identity(p)
    sd = seqdist.GGMJTPosterior()
    sd.init_model(np.asmatrix(dataframe), D, delta, cache)
    return sample_trajectory(n_samples, randomize, sd)
Exemple #4
0
def smc_ggm_graphs(N, alpha, beta, radius, X, D, delta):
    cache = {}
    seq_dist = seqdist.GGMJTPosterior()
    seq_dist.init_model(X, D, delta, cache)
    (trees, log_w) = approximate(N, alpha, beta, radius, seq_dist)
    log_w_rescaled = np.array(log_w.T)[seq_dist.p - 1] - \
                     max(np.array(log_w.T)[seq_dist.p - 1])
    norm_w = np.exp(log_w_rescaled) / sum(np.exp(log_w_rescaled))
    graphs = [jtlib.graph(tree) for tree in trees]
    return (graphs, norm_w)
Exemple #5
0
    def from_json(self, mcmc_json):
        graphs = [json_graph.node_link_graph(js_graph)
                  for js_graph in mcmc_json["trajectory"]]

        self.set_trajectory(graphs)
        self.set_time(mcmc_json["run_time"])
        self.optional = mcmc_json["optional"]
        self.sampling_method = mcmc_json["sampling_method"]
        if mcmc_json["model"]["name"] == "ggm_jt_post":
            self.seqdist = sd.GGMJTPosterior()
        elif mcmc_json["model"]["name"] == "loglin_jt_post":
            self.seqdist = sd.LogLinearJTPosterior()

        self.seqdist.init_model_from_json(mcmc_json["model"])
Exemple #6
0
def sample_trajectories_ggm_to_file(dataframe, n_samples, randomize=[1000], D=None, delta=1.0,
                                    reps=1, output_directory=".", **args):
    p = dataframe.shape[1]
    if D is None:
        D = np.identity(p)

    graph_trajectories = []
    for _ in range(reps):
        for r in randomize:
            for T in n_samples:
                sd = seqdist.GGMJTPosterior()
                sd.init_model(np.asmatrix(dataframe), D, delta, {})
                graph_trajectory = trajectory_to_file(T, r, sd, dir=output_directory)
                graph_trajectories.append(graph_trajectory)
    return graph_trajectories
Exemple #7
0
def sample_trajectories_ggm_parallel(dataframe,
                                     n_particles,
                                     n_samples,
                                     D=None,
                                     delta=1.0,
                                     alphas=[0.5],
                                     betas=[0.5],
                                     radii=[None],
                                     reset_cache=True,
                                     reps=1,
                                     **args):
    p = dataframe.shape[1]
    if D is None:
        D = np.identity(p)
    if radii == [None]:
        radii = [p]
    queue = multiprocessing.Queue()
    processes = []
    rets = []
    for _ in range(reps):
        for N in n_particles:
            for T in n_samples:
                for rad in radii:
                    for alpha in alphas:
                        for beta in betas:
                            sd = seqdist.GGMJTPosterior()
                            sd.init_model(np.asmatrix(dataframe), D, delta, {})

                            print("Starting: " +
                                  str((N, T, alpha, beta, rad, str(sd),
                                       reset_cache, True)))

                            proc = Process(target=trajectory_to_queue,
                                           args=(N, T, alpha, beta, rad, sd,
                                                 queue, reset_cache, True))
                            processes.append(proc)
                            proc.start()
                            time.sleep(2)

    for _ in processes:
        ret = queue.get()  # will block
        rets.append(ret)
    for p in processes:
        p.join()

    return rets
Exemple #8
0
def sample_trajectory_ggm(dataframe,
                          n_particles,
                          n_samples,
                          D=None,
                          delta=1.0,
                          alpha=0.5,
                          beta=0.5,
                          radius=None,
                          reset_cache=True,
                          **args):
    """ Particle Gibbs for approximating distributions over
    Gaussian graphical models.

    Args:
        n_particles (int): Number of particles in SMC in each Gibbs iteration
        n_samples (int): Number of Gibbs iterations (samples)
        alpha (float): sparsity parameter for the Christmas tree algorithm
        beta (float): sparsity parameter for the Christmas tree algorithm
        radius (float): defines the radius within which ned nodes are selected
        dataframe (np.matrix): row matrix of data
        D (np.matrix): matrix parameter for the hyper inverse wishart prior
        delta (float): degrees of freedom for the hyper inverse wishart prior
        cache (dict): cache for clique likelihoods

    Returns:
        Trajectory: Markov chain of the underlying graphs of the junction trees sampled by pgibbs.
    """

    p = dataframe.shape[1]
    if D is None:
        D = np.identity(p)
    if radius is None:
        radius = p
    sd = seqdist.GGMJTPosterior()
    sd.init_model(np.asmatrix(dataframe), D, delta, {})
    return sample_trajectory(n_particles,
                             alpha,
                             beta,
                             radius,
                             n_samples,
                             sd,
                             reset_cache=reset_cache)
Exemple #9
0
def sample_trajectories_ggm_to_file(dataframe,
                                    n_particles,
                                    n_samples,
                                    D=None,
                                    delta=1.0,
                                    alphas=[0.5],
                                    betas=[0.5],
                                    radii=[None],
                                    reset_cache=True,
                                    reps=1,
                                    output_directory=".",
                                    output_filename="trajectory.json",
                                    **args):
    p = dataframe.shape[1]
    if D is None:
        D = np.identity(p)
    if radii == [None]:
        radii = [p]

    graph_trajectories = []
    for _ in range(reps):
        for N in n_particles:
            for T in n_samples:
                for rad in radii:
                    for alpha in alphas:
                        for beta in betas:
                            sd = seqdist.GGMJTPosterior()
                            sd.init_model(np.asmatrix(dataframe), D, delta, {})

                            graph_trajectory = trajectory_to_file(
                                N,
                                T,
                                alpha,
                                beta,
                                rad,
                                sd,
                                reset_cache=reset_cache,
                                output_filename=output_filename,
                                dir=output_directory)
                            graph_trajectories.append(graph_trajectory)
    return graph_trajectories
Exemple #10
0
            pool = Pool(processes=len(self.classes))

        async_results = [None for _ in self.same_graph_groups]
        for g, group in enumerate(self.same_graph_groups):
            x_centered = np.array([])
            # concatenate the data, centered by mean in each class
            for c in group:
                c_inds = (np.array(self.y).ravel() == c)
                xc = self.x[np.ix_(c_inds, range(self.p))]
                xc_centered = xc - np.mean(xc, axis=0)
                if len(x_centered) == 0:
                    x_centered = xc_centered
                else:
                    x_centered = np.concatenate((x_centered, xc_centered),
                                                axis=0)
            seq_dist = seqjtdist.GGMJTPosterior()
            cache = {}
            seq_dist.init_model(x_centered, self.hyper_tau[g],
                                self.hyper_alpha[g], cache)
            if async is True:
                async_results[g] = pool.apply_async(
                    trilearn.pgibbs.sample_trajectory,
                    (n_particles, cta_alpha, cta_beta, self.smc_radius,
                     n_pgibbs_samples, seq_dist))
            else:
                self.ggm_trajs[g] = trilearn.pgibbs.sample_trajectory(
                    n_particles, cta_alpha, cta_beta, self.smc_radius,
                    n_pgibbs_samples, seq_dist)
        if async is True:
            for g in range(len(self.same_graph_groups)):
                self.ggm_trajs[g] = async_results[g].get()