def simulate(self): """ Generation and modeling of single-cell-like data Returns ------- """ i = 0 # iterate over all parameter combinations for c, k, nt, ns, b, w, nr in self.simulation_params: # generate data set temp_data = gen.generate_case_control(cases=c, K=k, n_total=nt, n_samples=ns, b_true=b, w_true=w) # Save parameter set s = [c, k, nt, ns, b, w, nr] print('Simulating:', s) self.parameters.loc[i] = s # if baseline model: Simulate with baseline, else: without. The baseline index is always the last one ana = ca.CompositionalAnalysis(temp_data, self.formula, baseline_index=self.baseline_index) result_temp = ana.sample_hmc( num_results=int(nr), n_burnin=self.n_burnin, step_size=self.step_size, num_leapfrog_steps=self.num_leapfrog_steps) self.mcmc_results[i] = result_temp.summary_prepare() i += 1 return None
def test_baseline(self): np.random.seed(1234) tf.random.set_seed(5678) model_salm = mod.CompositionalAnalysis(self.data, formula="Condition", baseline_index=5) # Run MCMC sim_results = model_salm.sample_hmc(num_results=20000, n_burnin=5000) alpha_df, beta_df = sim_results.summary_prepare() # Mean cell counts for both groups alphas_true = np.round(np.mean(self.data.X[:4], 0), 0) betas_true = np.round(np.mean(self.data.X[4:], 0), 0) # Mean cell counts for simulated data final_alphas = np.round(alpha_df.loc[:, "expected_sample"].tolist(), 0) final_betas = np.round(beta_df.loc[:, "expected_sample"].tolist(), 0) # Check if model approximately predicts ground truth differing_alphas = any(np.abs(alphas_true - final_alphas) > 30) differing_betas = any(np.abs(betas_true - final_betas) > 30) self.assertTrue((not differing_alphas) & (not differing_betas))
print(data.uns["w_true"]) print(data.uns["b_true"]) print(data.X) print(data.obs) print(data.var) #%% importlib.reload(mod) importlib.reload(res) #data.obs["x_0"] = ["A", "A", "A", "A", "A", "A", "B", "B", "B", "B", "B", "B"] #data.obs["x_1"] = ["A", "A", "A", "B", "B", "B", "A", "A", "A", "B", "B", "B"] ana = mod.CompositionalAnalysis(data, "x_0", baseline_index=None) print(ana.x) print(ana.covariate_names) #%% params_mcmc = ana.sample_hmc(num_results=int(1000), n_burnin=500) print(params_mcmc) #%% params_mcmc.summary() #%% params_mcmc.summary_extended(credible_interval=0.9) #%% az.plot_trace(params_mcmc) plt.show()
print(data.obs) #%% importlib.reload(viz) sns.set(style="ticks", font_scale=2) args_swarmplot = {"hue": "subject", "size": 10, "palette": "Reds"} viz.boxplot_facets(data, feature="site") plt.show() #%% # Model that differentiates both palms model_palms = mod.CompositionalAnalysis(data[data.obs["site"].isin( ["left palm", "right palm"])], "site", baseline_index=None) result_palms = model_palms.sample_hmc(num_results=int(20000), n_burnin=5000) result_palms.summary_extended(hdi_prob=0.95) #%% with az.rc_context(rc={'plot.max_subplots': None}): az.plot_trace(result_palms, compact=True) plt.show() #%% # less samples, less burnin
cluster_names = np.arange(60) + 1 print(cluster_names) cell_types = pd.DataFrame(index=cluster_names) print(cell_types) #%% # Put all together sle_freq_data = ad.AnnData(X=cell_counts, var=cell_types, obs=group_df) print(sle_freq_data.obs) #%% # Modeling without baseline ana = mod.CompositionalAnalysis(sle_freq_data, "Group", baseline_index=None) #%% ca_result = ana.sample_hmc(num_results=int(20000), n_burnin=5000) ca_result.summary(hdi_prob=0.95) #%% az.plot_trace(ca_result) plt.show() #%% # Modeling with baseline ana_2 = mod.CompositionalAnalysis(sle_freq_data, "Group", baseline_index=None)
importlib.reload(res) import patsy as pt formula = "x_0" model = mod.NoBaselineModelNoEdward(covariate_matrix=np.array(covariate_matrix), data_matrix=data_matrix, cell_types=cell_types, covariate_names=covariate_names, formula=formula) #print(model.target_log_prob_fn(*(params.values()))) #%% result = model.sample_hmc(num_results=int(1000), n_burnin=500) result.summary() #%% model_2 = ca.CompositionalAnalysis(data, "x_0", baseline_index=None) print(model_2.target_log_prob_fn(model_2.params[0], model_2.params[1], model_2.params[2], model_2.params[3], model_2.params[4])) #%% res_2 = model_2.sample_hmc(num_results=int(20000), n_burnin=5000) res_2.summary() #%% time = np.array([0, 1, 2, 3, 4, 0, 1, 2, 3, 4], dtype="float32") phi = np.random.normal(0, 1, size=(D, K)) phi_ = np.repeat(phi[np.newaxis,:], N, axis=0)
K = 5 n_samples = [n, n] n_total = np.full(shape=[2 * n], fill_value=1000) data = gen.generate_case_control(cases, K, n_total[0], n_samples, w_true=np.array([[1, 0, 0, 0, 0]]), b_true=np.log(np.repeat(0.2, K)).tolist()) print(data.uns["w_true"]) print(data.uns["b_true"]) print(data.X) print(data.obs) #%% importlib.reload(mod) ana = mod.CompositionalAnalysis(data, "x_0", baseline_index=None) print(ana.x) print(ana.y) print(ana.covariate_names) params_mcmc = ana.sample_hmc(num_results=int(1000), n_burnin=500) print(params_mcmc) #%% params_mcmc.summary(hdi_prob=0.9)
# pseudo-covariate of 1 on all samples data.obs["c"] = 1 print(data.X) #%% viz.plot_feature_stackbars(data, ["day"]) #%% importlib.reload(ca) importlib.reload(mod) importlib.reload(tm) model = ca.CompositionalAnalysis(data, formula="c", baseline_index=None, time_column="day") result = model.sample_hmc(num_results=int(20000), n_burnin=0) result.summary() #%% print(result.posterior["phi"][-1]) #%% az.plot_trace(result, var_names=["beta", "phi"], compact=True) plt.show()
data_matrix=data_matrix, cell_types=cell_types, covariate_names=covariate_names, formula=formula) print( model.target_log_prob_fn(model.params["mu_b"], model.params["sigma_b"], model.params["b_offset"], model.params["ind_raw"], model.params["alpha"])) #%% result = model.sample_hmc(num_results=int(1000), n_burnin=500) result.summary() #%% model_2 = ca.CompositionalAnalysis(data, "x_0", baseline_index=None) print( model_2.target_log_prob_fn(model_2.params[0], model_2.params[1], model_2.params[2], model_2.params[3], model_2.params[4])) #%% res_2 = model_2.sample_hmc(num_results=int(1000), n_burnin=500) res_2.summary() #%% D = x.shape[1] K = y.shape[1] N = y.shape[0] dtype = tf.float32 beta_size = [D, K]
def simulate(self): """ Generation and modeling of single-cell-like data Returns ------- None Fills up self.mcmc_results """ for j in range(len(self.models)): self.results[j] = {} i = 0 # For each parameter combination: for c, k, nt, ns, b, w, nr in self.l: # Generate dataset temp_data = gen.generate_case_control(cases=c, K=k, n_total=nt, n_samples=ns, b_true=b, w_true=w, sigma=np.identity(k) * 0.01) self.data[i] = temp_data x_temp = temp_data.obs.values y_temp = temp_data.X # Write parameter combination s = [c, k, nt, ns, b, w, nr] print('Simulating:', s) self.parameters.loc[i] = s j = 0 # For each model: for model in self.models: # If Poisson model: Simulate, eval Poisson if model == "Poisson": print("Model: Poisson") # Catch edge case of perfect separation if ns == [1, 1]: self.results[j][i] = (1, 4, 0, 0) else: model_temp = om.PoissonModel(covariate_matrix=x_temp, data_matrix=y_temp) model_temp.fit_model() tp, tn, fp, fn = model_temp.eval_model() self.results[j][i] = (tp, tn, fp, fn) # If simple model: Simulate, set "final_parameter" to 0 if 95% confint includes 0 elif model == "Simple": print("Model: Simple") ana = ca.CompositionalAnalysis(temp_data, self.formula, baseline_index="simple") result_temp = ana.sample_hmc( num_results=int(nr), n_burnin=self.n_burnin, step_size=self.step_size, num_leapfrog_steps=self.num_leapfrog_steps) alphas_df, betas_df = result_temp.summary_prepare( credible_interval=0.95) betas_df.loc[:, "final_parameter"] = np.where( (betas_df.loc[:, "hpd_2.5%"] < 0) & (betas_df.loc[:, "hpd_97.5%"] > 0), 0, betas_df.loc[:, "final_parameter"]) self.results[j][i] = (alphas_df, betas_df) # if baseline model: Simulate with baseline, else: without. The baseline index is always the last one elif model == "Baseline": print("Model: Baseline") ana = ca.CompositionalAnalysis(temp_data, self.formula, baseline_index=k - 1) result_temp = ana.sample_hmc( num_results=int(nr), n_burnin=self.n_burnin, step_size=self.step_size, num_leapfrog_steps=self.num_leapfrog_steps) self.results[j][i] = result_temp.summary_prepare() elif model == "NoBaseline": print("Model: No Baseline") ana = ca.CompositionalAnalysis(temp_data, self.formula, baseline_index=None) result_temp = ana.sample_hmc( num_results=int(nr), n_burnin=self.n_burnin, step_size=self.step_size, num_leapfrog_steps=self.num_leapfrog_steps) self.results[j][i] = result_temp.summary_prepare() # If SCDC model: Export data, run R script elif model == "SCDC": print("model: SCDC") model = om.scdney_model(data=temp_data, ns=ns) r = model.analyze() self.results[j][i] = r else: print("Not a valid model specified") # HMC sampling, save results j += 1 i += 1 return None
[x for x in biom_data.columns[:-4] if all(biom_data[x] < 10)], 1) data_nonrare = dat.from_pandas(biom_data_nonrare, metadata_columns) data_nonrare.obs = data_nonrare.obs.rename( columns={ "reported-antibiotic-usage": "antibiotic", "body-site": "site", "days-since-experiment-start": "days_since_start" }) print(data_nonrare.X) print(data_nonrare.obs) #%% # No significances # Model with subject as covariate model_subject = mod.CompositionalAnalysis(data, "subject", baseline_index=None) result_subject = model_subject.sample_hmc(num_results=int(20000), n_burnin=5000) result_subject.summary_extended(hdi_prob=0.95) #%% az.plot_trace(result_subject, var_names=["beta"]) plt.show() #%% # Model with antibiotic use as covariate model_anti = mod.CompositionalAnalysis(data, "antibiotic", baseline_index=None) result_anti = model_anti.sample_hmc(num_results=int(20000), n_burnin=5000)
print(data.obs) print(data.var) #%% cells = cell_counts.iloc[:, 1:].to_numpy().astype("int") print(cells) obs = pd.DataFrame(cell_counts["sample_id"]) obs["Condition"] = obs["sample_id"].str.replace(r"[0-9]", "") print(obs) var = pd.DataFrame(index=cell_counts.iloc[:, 1:].columns.droplevel(0)) print(var) data = ad.AnnData(X=cells.astype("int32"), obs=obs, var=var) #%% importlib.reload(mod) model = mod.CompositionalAnalysis(data=data, formula="Condition", baseline_index=3) result = model.sample_hmc() #%% result.summary() #%%
data_scdcdm = dat.from_pandas(data_bal_expr, col) print(data_scdcdm.X.shape) #%% # Free up some memory del ([counts_bal, data, data_bal, metadata, meta_rel, file, otus, split]) #%% importlib.reload(mod) model_mbs = mod.CompositionalAnalysis(data_scdcdm, "mbs_consolidated", baseline_index=None) result_mbs = model_mbs.sample_hmc(num_results=int(10000), n_burnin=0) result_mbs.summary_extended(hdi_prob=0.95) #%% tax_interesting = result_mbs.effect_df[ ~result_mbs.effect_df["Inclusion probability"].isin([0, 1])] print([x[1] for x in tax_interesting.index]) #%%