def gen_ggm_trajectory(dataframe, n_samples, D=None, delta=1.0, cache={}, alpha=0.5, beta=0.5, **args): p = dataframe.shape[1] if D is None: D = np.identity(p) sd = seqdist.GGMJTPosterior() sd.init_model(np.asmatrix(dataframe), D, delta, cache) return mh(alpha, beta, n_samples, sd)
def sample_trajectories_ggm_parallel(dataframe, n_samples, randomize=[1000], D=None, delta=1.0, reps=1, output_directory=".", **args): p = dataframe.shape[1] if D is None: D = np.identity(p) queue = multiprocessing.Queue() processes = [] rets = [] for _ in range(reps): for r in randomize: for T in n_samples: sd = seqdist.GGMJTPosterior() sd.init_model(np.asmatrix(dataframe), D, delta, {}) print("Starting: " + str((T, r, str(sd), True))) proc = Process(target=trajectory_to_queue, args=(T, r, sd, queue, True)) proc.start() processes.append(proc) time.sleep(2) for _ in processes: ret = queue.get() # will block rets.append(ret) for p in processes: p.join() return rets
def sample_trajectory_ggm(dataframe, n_samples, randomize=1000, D=None, delta=1.0, cache={}, **args): p = dataframe.shape[1] if D is None: D = np.identity(p) sd = seqdist.GGMJTPosterior() sd.init_model(np.asmatrix(dataframe), D, delta, cache) return sample_trajectory(n_samples, randomize, sd)
def smc_ggm_graphs(N, alpha, beta, radius, X, D, delta): cache = {} seq_dist = seqdist.GGMJTPosterior() seq_dist.init_model(X, D, delta, cache) (trees, log_w) = approximate(N, alpha, beta, radius, seq_dist) log_w_rescaled = np.array(log_w.T)[seq_dist.p - 1] - \ max(np.array(log_w.T)[seq_dist.p - 1]) norm_w = np.exp(log_w_rescaled) / sum(np.exp(log_w_rescaled)) graphs = [jtlib.graph(tree) for tree in trees] return (graphs, norm_w)
def from_json(self, mcmc_json): graphs = [json_graph.node_link_graph(js_graph) for js_graph in mcmc_json["trajectory"]] self.set_trajectory(graphs) self.set_time(mcmc_json["run_time"]) self.optional = mcmc_json["optional"] self.sampling_method = mcmc_json["sampling_method"] if mcmc_json["model"]["name"] == "ggm_jt_post": self.seqdist = sd.GGMJTPosterior() elif mcmc_json["model"]["name"] == "loglin_jt_post": self.seqdist = sd.LogLinearJTPosterior() self.seqdist.init_model_from_json(mcmc_json["model"])
def sample_trajectories_ggm_to_file(dataframe, n_samples, randomize=[1000], D=None, delta=1.0, reps=1, output_directory=".", **args): p = dataframe.shape[1] if D is None: D = np.identity(p) graph_trajectories = [] for _ in range(reps): for r in randomize: for T in n_samples: sd = seqdist.GGMJTPosterior() sd.init_model(np.asmatrix(dataframe), D, delta, {}) graph_trajectory = trajectory_to_file(T, r, sd, dir=output_directory) graph_trajectories.append(graph_trajectory) return graph_trajectories
def sample_trajectories_ggm_parallel(dataframe, n_particles, n_samples, D=None, delta=1.0, alphas=[0.5], betas=[0.5], radii=[None], reset_cache=True, reps=1, **args): p = dataframe.shape[1] if D is None: D = np.identity(p) if radii == [None]: radii = [p] queue = multiprocessing.Queue() processes = [] rets = [] for _ in range(reps): for N in n_particles: for T in n_samples: for rad in radii: for alpha in alphas: for beta in betas: sd = seqdist.GGMJTPosterior() sd.init_model(np.asmatrix(dataframe), D, delta, {}) print("Starting: " + str((N, T, alpha, beta, rad, str(sd), reset_cache, True))) proc = Process(target=trajectory_to_queue, args=(N, T, alpha, beta, rad, sd, queue, reset_cache, True)) processes.append(proc) proc.start() time.sleep(2) for _ in processes: ret = queue.get() # will block rets.append(ret) for p in processes: p.join() return rets
def sample_trajectory_ggm(dataframe, n_particles, n_samples, D=None, delta=1.0, alpha=0.5, beta=0.5, radius=None, reset_cache=True, **args): """ Particle Gibbs for approximating distributions over Gaussian graphical models. Args: n_particles (int): Number of particles in SMC in each Gibbs iteration n_samples (int): Number of Gibbs iterations (samples) alpha (float): sparsity parameter for the Christmas tree algorithm beta (float): sparsity parameter for the Christmas tree algorithm radius (float): defines the radius within which ned nodes are selected dataframe (np.matrix): row matrix of data D (np.matrix): matrix parameter for the hyper inverse wishart prior delta (float): degrees of freedom for the hyper inverse wishart prior cache (dict): cache for clique likelihoods Returns: Trajectory: Markov chain of the underlying graphs of the junction trees sampled by pgibbs. """ p = dataframe.shape[1] if D is None: D = np.identity(p) if radius is None: radius = p sd = seqdist.GGMJTPosterior() sd.init_model(np.asmatrix(dataframe), D, delta, {}) return sample_trajectory(n_particles, alpha, beta, radius, n_samples, sd, reset_cache=reset_cache)
def sample_trajectories_ggm_to_file(dataframe, n_particles, n_samples, D=None, delta=1.0, alphas=[0.5], betas=[0.5], radii=[None], reset_cache=True, reps=1, output_directory=".", output_filename="trajectory.json", **args): p = dataframe.shape[1] if D is None: D = np.identity(p) if radii == [None]: radii = [p] graph_trajectories = [] for _ in range(reps): for N in n_particles: for T in n_samples: for rad in radii: for alpha in alphas: for beta in betas: sd = seqdist.GGMJTPosterior() sd.init_model(np.asmatrix(dataframe), D, delta, {}) graph_trajectory = trajectory_to_file( N, T, alpha, beta, rad, sd, reset_cache=reset_cache, output_filename=output_filename, dir=output_directory) graph_trajectories.append(graph_trajectory) return graph_trajectories
pool = Pool(processes=len(self.classes)) async_results = [None for _ in self.same_graph_groups] for g, group in enumerate(self.same_graph_groups): x_centered = np.array([]) # concatenate the data, centered by mean in each class for c in group: c_inds = (np.array(self.y).ravel() == c) xc = self.x[np.ix_(c_inds, range(self.p))] xc_centered = xc - np.mean(xc, axis=0) if len(x_centered) == 0: x_centered = xc_centered else: x_centered = np.concatenate((x_centered, xc_centered), axis=0) seq_dist = seqjtdist.GGMJTPosterior() cache = {} seq_dist.init_model(x_centered, self.hyper_tau[g], self.hyper_alpha[g], cache) if async is True: async_results[g] = pool.apply_async( trilearn.pgibbs.sample_trajectory, (n_particles, cta_alpha, cta_beta, self.smc_radius, n_pgibbs_samples, seq_dist)) else: self.ggm_trajs[g] = trilearn.pgibbs.sample_trajectory( n_particles, cta_alpha, cta_beta, self.smc_radius, n_pgibbs_samples, seq_dist) if async is True: for g in range(len(self.same_graph_groups)): self.ggm_trajs[g] = async_results[g].get()