Esempio n. 1
0
    def simulate(self):
        """
        Generation and modeling of single-cell-like data

        Returns
        -------
        """

        i = 0
        # iterate over all parameter combinations

        for c, k, nt, ns, b, w, nr in self.simulation_params:
            # generate data set
            temp_data = gen.generate_case_control(cases=c,
                                                  K=k,
                                                  n_total=nt,
                                                  n_samples=ns,
                                                  b_true=b,
                                                  w_true=w)

            # Save parameter set
            s = [c, k, nt, ns, b, w, nr]
            print('Simulating:', s)
            self.parameters.loc[i] = s

            # if baseline model: Simulate with baseline, else: without. The baseline index is always the last one
            ana = ca.CompositionalAnalysis(temp_data,
                                           self.formula,
                                           baseline_index=self.baseline_index)

            result_temp = ana.sample_hmc(
                num_results=int(nr),
                n_burnin=self.n_burnin,
                step_size=self.step_size,
                num_leapfrog_steps=self.num_leapfrog_steps)

            self.mcmc_results[i] = result_temp.summary_prepare()

            i += 1

        return None
Esempio n. 2
0
    def test_baseline(self):
        np.random.seed(1234)
        tf.random.set_seed(5678)

        model_salm = mod.CompositionalAnalysis(self.data, formula="Condition", baseline_index=5)

        # Run MCMC
        sim_results = model_salm.sample_hmc(num_results=20000, n_burnin=5000)
        alpha_df, beta_df = sim_results.summary_prepare()

        # Mean cell counts for both groups
        alphas_true = np.round(np.mean(self.data.X[:4], 0), 0)
        betas_true = np.round(np.mean(self.data.X[4:], 0), 0)

        # Mean cell counts for simulated data
        final_alphas = np.round(alpha_df.loc[:, "expected_sample"].tolist(), 0)
        final_betas = np.round(beta_df.loc[:, "expected_sample"].tolist(), 0)

        # Check if model approximately predicts ground truth
        differing_alphas = any(np.abs(alphas_true - final_alphas) > 30)
        differing_betas = any(np.abs(betas_true - final_betas) > 30)

        self.assertTrue((not differing_alphas) & (not differing_betas))
Esempio n. 3
0
print(data.uns["w_true"])
print(data.uns["b_true"])

print(data.X)
print(data.obs)
print(data.var)

#%%
importlib.reload(mod)
importlib.reload(res)

#data.obs["x_0"] = ["A", "A", "A", "A", "A", "A", "B", "B", "B", "B", "B", "B"]
#data.obs["x_1"] = ["A", "A", "A", "B", "B", "B", "A", "A", "A", "B", "B", "B"]

ana = mod.CompositionalAnalysis(data, "x_0", baseline_index=None)
print(ana.x)
print(ana.covariate_names)

#%%
params_mcmc = ana.sample_hmc(num_results=int(1000), n_burnin=500)
print(params_mcmc)

#%%
params_mcmc.summary()

#%%
params_mcmc.summary_extended(credible_interval=0.9)
#%%
az.plot_trace(params_mcmc)
plt.show()
print(data.obs)

#%%
importlib.reload(viz)

sns.set(style="ticks", font_scale=2)
args_swarmplot = {"hue": "subject", "size": 10, "palette": "Reds"}
viz.boxplot_facets(data, feature="site")
plt.show()

#%%

# Model that differentiates both palms
model_palms = mod.CompositionalAnalysis(data[data.obs["site"].isin(
    ["left palm", "right palm"])],
                                        "site",
                                        baseline_index=None)

result_palms = model_palms.sample_hmc(num_results=int(20000), n_burnin=5000)

result_palms.summary_extended(hdi_prob=0.95)

#%%

with az.rc_context(rc={'plot.max_subplots': None}):
    az.plot_trace(result_palms, compact=True)
    plt.show()

#%%

# less samples, less burnin
Esempio n. 5
0
cluster_names = np.arange(60) + 1
print(cluster_names)

cell_types = pd.DataFrame(index=cluster_names)
print(cell_types)

#%%
# Put all together

sle_freq_data = ad.AnnData(X=cell_counts, var=cell_types, obs=group_df)
print(sle_freq_data.obs)

#%%
# Modeling without baseline

ana = mod.CompositionalAnalysis(sle_freq_data, "Group", baseline_index=None)

#%%
ca_result = ana.sample_hmc(num_results=int(20000), n_burnin=5000)

ca_result.summary(hdi_prob=0.95)

#%%
az.plot_trace(ca_result)
plt.show()

#%%

# Modeling with baseline
ana_2 = mod.CompositionalAnalysis(sle_freq_data, "Group", baseline_index=None)
Esempio n. 6
0
importlib.reload(res)
import patsy as pt

formula = "x_0"

model = mod.NoBaselineModelNoEdward(covariate_matrix=np.array(covariate_matrix), data_matrix=data_matrix,
                                    cell_types=cell_types, covariate_names=covariate_names, formula=formula)
#print(model.target_log_prob_fn(*(params.values())))

#%%
result = model.sample_hmc(num_results=int(1000), n_burnin=500)

result.summary()

#%%
model_2 = ca.CompositionalAnalysis(data, "x_0", baseline_index=None)
print(model_2.target_log_prob_fn(model_2.params[0], model_2.params[1], model_2.params[2], model_2.params[3], model_2.params[4]))

#%%
res_2 = model_2.sample_hmc(num_results=int(20000), n_burnin=5000)
res_2.summary()



#%%

time = np.array([0, 1, 2, 3, 4, 0, 1, 2, 3, 4], dtype="float32")

phi = np.random.normal(0, 1, size=(D, K))

phi_ = np.repeat(phi[np.newaxis,:], N, axis=0)
Esempio n. 7
0
K = 5
n_samples = [n, n]
n_total = np.full(shape=[2 * n], fill_value=1000)

data = gen.generate_case_control(cases,
                                 K,
                                 n_total[0],
                                 n_samples,
                                 w_true=np.array([[1, 0, 0, 0, 0]]),
                                 b_true=np.log(np.repeat(0.2, K)).tolist())

print(data.uns["w_true"])
print(data.uns["b_true"])

print(data.X)
print(data.obs)

#%%
importlib.reload(mod)

ana = mod.CompositionalAnalysis(data, "x_0", baseline_index=None)
print(ana.x)
print(ana.y)
print(ana.covariate_names)

params_mcmc = ana.sample_hmc(num_results=int(1000), n_burnin=500)
print(params_mcmc)

#%%

params_mcmc.summary(hdi_prob=0.9)
Esempio n. 8
0
# pseudo-covariate of 1 on all samples
data.obs["c"] = 1

print(data.X)

#%%

viz.plot_feature_stackbars(data, ["day"])

#%%
importlib.reload(ca)
importlib.reload(mod)
importlib.reload(tm)

model = ca.CompositionalAnalysis(data,
                                 formula="c",
                                 baseline_index=None,
                                 time_column="day")

result = model.sample_hmc(num_results=int(20000), n_burnin=0)

result.summary()

#%%

print(result.posterior["phi"][-1])

#%%

az.plot_trace(result, var_names=["beta", "phi"], compact=True)
plt.show()
Esempio n. 9
0
    data_matrix=data_matrix,
    cell_types=cell_types,
    covariate_names=covariate_names,
    formula=formula)
print(
    model.target_log_prob_fn(model.params["mu_b"], model.params["sigma_b"],
                             model.params["b_offset"], model.params["ind_raw"],
                             model.params["alpha"]))

#%%
result = model.sample_hmc(num_results=int(1000), n_burnin=500)

result.summary()

#%%
model_2 = ca.CompositionalAnalysis(data, "x_0", baseline_index=None)
print(
    model_2.target_log_prob_fn(model_2.params[0], model_2.params[1],
                               model_2.params[2], model_2.params[3],
                               model_2.params[4]))

#%%
res_2 = model_2.sample_hmc(num_results=int(1000), n_burnin=500)
res_2.summary()

#%%
D = x.shape[1]
K = y.shape[1]
N = y.shape[0]
dtype = tf.float32
beta_size = [D, K]
Esempio n. 10
0
    def simulate(self):
        """
        Generation and modeling of single-cell-like data

        Returns
        -------
        None
            Fills up self.mcmc_results
        """

        for j in range(len(self.models)):
            self.results[j] = {}

        i = 0

        # For each parameter combination:
        for c, k, nt, ns, b, w, nr in self.l:
            # Generate dataset
            temp_data = gen.generate_case_control(cases=c,
                                                  K=k,
                                                  n_total=nt,
                                                  n_samples=ns,
                                                  b_true=b,
                                                  w_true=w,
                                                  sigma=np.identity(k) * 0.01)

            self.data[i] = temp_data

            x_temp = temp_data.obs.values
            y_temp = temp_data.X

            # Write parameter combination
            s = [c, k, nt, ns, b, w, nr]
            print('Simulating:', s)
            self.parameters.loc[i] = s

            j = 0

            # For each model:
            for model in self.models:
                # If Poisson model: Simulate, eval Poisson
                if model == "Poisson":
                    print("Model: Poisson")
                    # Catch edge case of perfect separation
                    if ns == [1, 1]:
                        self.results[j][i] = (1, 4, 0, 0)
                    else:
                        model_temp = om.PoissonModel(covariate_matrix=x_temp,
                                                     data_matrix=y_temp)
                        model_temp.fit_model()
                        tp, tn, fp, fn = model_temp.eval_model()
                        self.results[j][i] = (tp, tn, fp, fn)

                # If simple model: Simulate, set "final_parameter" to 0 if 95% confint includes 0
                elif model == "Simple":
                    print("Model: Simple")
                    ana = ca.CompositionalAnalysis(temp_data,
                                                   self.formula,
                                                   baseline_index="simple")
                    result_temp = ana.sample_hmc(
                        num_results=int(nr),
                        n_burnin=self.n_burnin,
                        step_size=self.step_size,
                        num_leapfrog_steps=self.num_leapfrog_steps)
                    alphas_df, betas_df = result_temp.summary_prepare(
                        credible_interval=0.95)

                    betas_df.loc[:, "final_parameter"] = np.where(
                        (betas_df.loc[:, "hpd_2.5%"] < 0) &
                        (betas_df.loc[:, "hpd_97.5%"] > 0), 0,
                        betas_df.loc[:, "final_parameter"])

                    self.results[j][i] = (alphas_df, betas_df)

                # if baseline model: Simulate with baseline, else: without. The baseline index is always the last one
                elif model == "Baseline":
                    print("Model: Baseline")
                    ana = ca.CompositionalAnalysis(temp_data,
                                                   self.formula,
                                                   baseline_index=k - 1)
                    result_temp = ana.sample_hmc(
                        num_results=int(nr),
                        n_burnin=self.n_burnin,
                        step_size=self.step_size,
                        num_leapfrog_steps=self.num_leapfrog_steps)
                    self.results[j][i] = result_temp.summary_prepare()

                elif model == "NoBaseline":
                    print("Model: No Baseline")
                    ana = ca.CompositionalAnalysis(temp_data,
                                                   self.formula,
                                                   baseline_index=None)
                    result_temp = ana.sample_hmc(
                        num_results=int(nr),
                        n_burnin=self.n_burnin,
                        step_size=self.step_size,
                        num_leapfrog_steps=self.num_leapfrog_steps)
                    self.results[j][i] = result_temp.summary_prepare()

                # If SCDC model: Export data, run R script
                elif model == "SCDC":
                    print("model: SCDC")
                    model = om.scdney_model(data=temp_data, ns=ns)
                    r = model.analyze()
                    self.results[j][i] = r

                else:
                    print("Not a valid model specified")

                # HMC sampling, save results

                j += 1

            i += 1

        return None
    [x for x in biom_data.columns[:-4] if all(biom_data[x] < 10)], 1)

data_nonrare = dat.from_pandas(biom_data_nonrare, metadata_columns)
data_nonrare.obs = data_nonrare.obs.rename(
    columns={
        "reported-antibiotic-usage": "antibiotic",
        "body-site": "site",
        "days-since-experiment-start": "days_since_start"
    })
print(data_nonrare.X)
print(data_nonrare.obs)

#%%
# No significances
# Model with subject as covariate
model_subject = mod.CompositionalAnalysis(data, "subject", baseline_index=None)

result_subject = model_subject.sample_hmc(num_results=int(20000),
                                          n_burnin=5000)

result_subject.summary_extended(hdi_prob=0.95)

#%%
az.plot_trace(result_subject, var_names=["beta"])
plt.show()

#%%
# Model with antibiotic use as covariate
model_anti = mod.CompositionalAnalysis(data, "antibiotic", baseline_index=None)

result_anti = model_anti.sample_hmc(num_results=int(20000), n_burnin=5000)
Esempio n. 12
0
print(data.obs)
print(data.var)

#%%

cells = cell_counts.iloc[:, 1:].to_numpy().astype("int")
print(cells)

obs = pd.DataFrame(cell_counts["sample_id"])
obs["Condition"] = obs["sample_id"].str.replace(r"[0-9]", "")
print(obs)

var = pd.DataFrame(index=cell_counts.iloc[:, 1:].columns.droplevel(0))
print(var)

data = ad.AnnData(X=cells.astype("int32"), obs=obs, var=var)

#%%
importlib.reload(mod)

model = mod.CompositionalAnalysis(data=data,
                                  formula="Condition",
                                  baseline_index=3)
result = model.sample_hmc()

#%%

result.summary()

#%%
Esempio n. 13
0
data_scdcdm = dat.from_pandas(data_bal_expr, col)

print(data_scdcdm.X.shape)

#%%

# Free up some memory

del ([counts_bal, data, data_bal, metadata, meta_rel, file, otus, split])

#%%
importlib.reload(mod)

model_mbs = mod.CompositionalAnalysis(data_scdcdm,
                                      "mbs_consolidated",
                                      baseline_index=None)

result_mbs = model_mbs.sample_hmc(num_results=int(10000), n_burnin=0)

result_mbs.summary_extended(hdi_prob=0.95)

#%%

tax_interesting = result_mbs.effect_df[
    ~result_mbs.effect_df["Inclusion probability"].isin([0, 1])]

print([x[1] for x in tax_interesting.index])

#%%