def __init__( self, table: biom.table.Table, metadata: pd.DataFrame, model_path: str, num_iter: int = 500, num_warmup: int = None, chains: int = 4, seed: float = 42, parallelize_across: str = "chains" ): self.table = table self.num_iter = num_iter if num_warmup is None: self.num_warmup = num_iter else: self.num_warmup = num_warmup self.chains = chains self.seed = seed self.feature_names = table.ids(axis="observation") self.sample_names = table.ids(axis="sample") self.model_path = model_path self.sm = None self.fit = None self.parallelize_across = parallelize_across self.dat = { "y": table.matrix_data.todense().T.astype(int), "D": table.shape[0], # number of features "N": table.shape[1], # number of samples } self.specifications = dict()
def __init__( self, table: biom.table.Table, model_path: str, num_iter: int = 500, num_warmup: int = None, chains: int = 4, seed: float = 42, ): self.num_iter = num_iter if num_warmup is None: self.num_warmup = num_iter else: self.num_warmup = num_warmup self.chains = chains self.seed = seed self.sample_names = table.ids(axis="sample") self.model_path = model_path self.sm = None self.fit = None self.dat = { "D": table.shape[0], # number of features "N": table.shape[1], # number of samples } self.specified = False
def __init__(self, table: biom.table.Table, formula: str, group_var: str, metadata: pd.DataFrame, num_iter: int = 500, num_warmup: int = None, chains: int = 4, seed: float = 42, beta_prior: float = 5.0, cauchy_scale: float = 5.0, group_var_prior: float = 1.0): filepath = DEFAULT_MODEL_DICT["negative_binomial"]["lme"] super().__init__(table=table, formula=formula, metadata=metadata, model_path=filepath, num_iter=num_iter, num_warmup=num_warmup, chains=chains, seed=seed, parallelize_across="chains") # Encode group IDs starting at 1 because Stan 1-indexes arrays group_var_series = metadata[group_var].loc[self.sample_names] samp_subj_map = group_var_series.astype("category").cat.codes + 1 # Encoding as categories uses alphabetic sorting self.groups = np.sort(group_var_series.unique()) param_dict = { "depth": np.log(table.sum(axis="sample")), # sampling depths "B_p": beta_prior, "phi_s": cauchy_scale, "S": len(group_var_series.unique()), "subj_ids": samp_subj_map.values, "u_p": group_var_prior } self.add_parameters(param_dict) self.specify_model(params=["beta", "phi", "subj_int"], dims={ "beta": ["covariate", "feature"], "phi": ["feature"], "subj_int": ["group", "feature"], "log_lhood": ["tbl_sample", "feature"], "y_predict": ["tbl_sample", "feature"] }, coords={ "covariate": self.colnames, "feature": self.feature_names, "tbl_sample": self.sample_names, "group": self.groups }, include_observed_data=True, posterior_predictive="y_predict", log_likelihood="log_lhood") if self.parallelize_across == "chains": self.specifications["alr_params"] = ["beta", "subj_int"]
def __init__( self, table: biom.table.Table, formula: str, metadata: pd.DataFrame, num_iter: int = 500, num_warmup: int = None, chains: int = 4, seed: float = 42, beta_prior: float = 5.0, cauchy_scale: float = 5.0, parallelize_across: str = "chains", ): filepath = DEFAULT_MODEL_DICT["negative_binomial"][parallelize_across] super().__init__(table=table, formula=formula, metadata=metadata, model_path=filepath, num_iter=num_iter, num_warmup=num_warmup, chains=chains, seed=seed, parallelize_across=parallelize_across) param_dict = { "depth": np.log(table.sum(axis="sample")), # sampling depths "B_p": beta_prior, "phi_s": cauchy_scale } self.add_parameters(param_dict) self.specify_model(params=["beta", "phi"], dims={ "beta": ["covariate", "feature"], "phi": ["feature"], "log_lhood": ["tbl_sample", "feature"], "y_predict": ["tbl_sample", "feature"] }, coords={ "covariate": self.colnames, "feature": self.feature_names, "tbl_sample": self.sample_names }, include_observed_data=True, posterior_predictive="y_predict", log_likelihood="log_lhood") if self.parallelize_across == "chains": self.specifications["alr_params"] = ["beta"]
def __init__( self, table: biom.table.Table, formula: str, metadata: pd.DataFrame, num_iter: int = 500, num_warmup: int = None, chains: int = 4, seed: float = 42, beta_prior: float = 5.0, ): filepath = DEFAULT_MODEL_DICT["multinomial"] super().__init__( table=table, model_path=filepath, num_iter=num_iter, num_warmup=num_warmup, chains=chains, seed=seed, ) self.create_regression(formula=formula, metadata=metadata) param_dict = { "B_p": beta_prior, "depth": table.sum(axis="sample").astype(int) } self.add_parameters(param_dict) self.specify_model(params=["beta"], dims={ "beta": ["covariate", "feature_alr"], "log_lhood": ["tbl_sample"], "y_predict": ["tbl_sample", "feature"] }, coords={ "covariate": self.colnames, "feature": self.feature_names, "feature_alr": self.feature_names[1:], "tbl_sample": self.sample_names, }, include_observed_data=True, posterior_predictive="y_predict", log_likelihood="log_lhood")