def test_case_control_gen(self): """ Tests data generation for case/control scenarios Returns ------- boolean -- all tests were passed or not """ np.random.seed(1234) cases = 1 K = 2 n_total = 1000 n_samples = [2, 2] noise_std_true = 0 sigma = None b_true = None w_true = None data = gen.generate_case_control(cases, K, n_total, n_samples, noise_std_true, sigma, b_true, w_true) test = True if any(np.abs(data.obs["x_0"] - [0, 0, 1, 1]) > 1e-5): print("obs is not correct!") test = False if not np.array_equal(data.X, np.array([[74., 926.], [58., 942.], [32., 968.], [53., 947.]])): print("X is not correct!") test = False if not np.array_equal(data.uns["b_true"], np.array([-1.8508832, 0.7326526], dtype=np.float32)) & \ np.array_equal(data.uns["w_true"], np.array([[0., 0.]])): print("uns is not correct!") test = False self.assertTrue(test)
def simulate(self): """ Generation and modeling of single-cell-like data Returns ------- """ i = 0 # iterate over all parameter combinations for c, k, nt, ns, b, w, nr in self.simulation_params: # generate data set temp_data = gen.generate_case_control(cases=c, K=k, n_total=nt, n_samples=ns, b_true=b, w_true=w) # Save parameter set s = [c, k, nt, ns, b, w, nr] print('Simulating:', s) self.parameters.loc[i] = s # if baseline model: Simulate with baseline, else: without. The baseline index is always the last one ana = ca.CompositionalAnalysis(temp_data, self.formula, baseline_index=self.baseline_index) result_temp = ana.sample_hmc( num_results=int(nr), n_burnin=self.n_burnin, step_size=self.step_size, num_leapfrog_steps=self.num_leapfrog_steps) self.mcmc_results[i] = result_temp.summary_prepare() i += 1 return None
pd.set_option('display.max_columns', None) #%% # Artificial data np.random.seed(1234) n = 3 cases = 1 K = 5 n_samples = [n, n] n_total = np.full(shape=[2 * n], fill_value=1000) data = gen.generate_case_control(cases, K, n_total[0], n_samples, w_true=np.array([[1, 0, 0, 0, 0]]), b_true=np.log(np.repeat(0.2, K)).tolist()) print(data.uns["w_true"]) print(data.uns["b_true"]) print(data.X) print(data.obs) #%% n = 3 cases = 2 K = 5
def simulate(self): """ Generation and modeling of single-cell-like data Returns ------- None Fills up self.mcmc_results """ for j in range(len(self.models)): self.results[j] = {} i = 0 # For each parameter combination: for c, k, nt, ns, b, w, nr in self.l: # Generate dataset temp_data = gen.generate_case_control(cases=c, K=k, n_total=nt, n_samples=ns, b_true=b, w_true=w, sigma=np.identity(k) * 0.01) self.data[i] = temp_data x_temp = temp_data.obs.values y_temp = temp_data.X # Write parameter combination s = [c, k, nt, ns, b, w, nr] print('Simulating:', s) self.parameters.loc[i] = s j = 0 # For each model: for model in self.models: # If Poisson model: Simulate, eval Poisson if model == "Poisson": print("Model: Poisson") # Catch edge case of perfect separation if ns == [1, 1]: self.results[j][i] = (1, 4, 0, 0) else: model_temp = om.PoissonModel(covariate_matrix=x_temp, data_matrix=y_temp) model_temp.fit_model() tp, tn, fp, fn = model_temp.eval_model() self.results[j][i] = (tp, tn, fp, fn) # If simple model: Simulate, set "final_parameter" to 0 if 95% confint includes 0 elif model == "Simple": print("Model: Simple") ana = ca.CompositionalAnalysis(temp_data, self.formula, baseline_index="simple") result_temp = ana.sample_hmc( num_results=int(nr), n_burnin=self.n_burnin, step_size=self.step_size, num_leapfrog_steps=self.num_leapfrog_steps) alphas_df, betas_df = result_temp.summary_prepare( credible_interval=0.95) betas_df.loc[:, "final_parameter"] = np.where( (betas_df.loc[:, "hpd_2.5%"] < 0) & (betas_df.loc[:, "hpd_97.5%"] > 0), 0, betas_df.loc[:, "final_parameter"]) self.results[j][i] = (alphas_df, betas_df) # if baseline model: Simulate with baseline, else: without. The baseline index is always the last one elif model == "Baseline": print("Model: Baseline") ana = ca.CompositionalAnalysis(temp_data, self.formula, baseline_index=k - 1) result_temp = ana.sample_hmc( num_results=int(nr), n_burnin=self.n_burnin, step_size=self.step_size, num_leapfrog_steps=self.num_leapfrog_steps) self.results[j][i] = result_temp.summary_prepare() elif model == "NoBaseline": print("Model: No Baseline") ana = ca.CompositionalAnalysis(temp_data, self.formula, baseline_index=None) result_temp = ana.sample_hmc( num_results=int(nr), n_burnin=self.n_burnin, step_size=self.step_size, num_leapfrog_steps=self.num_leapfrog_steps) self.results[j][i] = result_temp.summary_prepare() # If SCDC model: Export data, run R script elif model == "SCDC": print("model: SCDC") model = om.scdney_model(data=temp_data, ns=ns) r = model.analyze() self.results[j][i] = r else: print("Not a valid model specified") # HMC sampling, save results j += 1 i += 1 return None