Esempio n. 1
0
    def __init__(
        self,
        table: biom.table.Table,
        metadata: pd.DataFrame,
        model_path: str,
        num_iter: int = 500,
        num_warmup: int = None,
        chains: int = 4,
        seed: float = 42,
        parallelize_across: str = "chains"
    ):
        self.table = table
        self.num_iter = num_iter
        if num_warmup is None:
            self.num_warmup = num_iter
        else:
            self.num_warmup = num_warmup
        self.chains = chains
        self.seed = seed
        self.feature_names = table.ids(axis="observation")
        self.sample_names = table.ids(axis="sample")
        self.model_path = model_path
        self.sm = None
        self.fit = None
        self.parallelize_across = parallelize_across

        self.dat = {
            "y": table.matrix_data.todense().T.astype(int),
            "D": table.shape[0],  # number of features
            "N": table.shape[1],  # number of samples
        }

        self.specifications = dict()
Esempio n. 2
0
    def __init__(
        self,
        table: biom.table.Table,
        model_path: str,
        num_iter: int = 500,
        num_warmup: int = None,
        chains: int = 4,
        seed: float = 42,
    ):
        self.num_iter = num_iter
        if num_warmup is None:
            self.num_warmup = num_iter
        else:
            self.num_warmup = num_warmup
        self.chains = chains
        self.seed = seed
        self.sample_names = table.ids(axis="sample")
        self.model_path = model_path
        self.sm = None
        self.fit = None

        self.dat = {
            "D": table.shape[0],  # number of features
            "N": table.shape[1],  # number of samples
        }

        self.specified = False
Esempio n. 3
0
    def __init__(self,
                 table: biom.table.Table,
                 formula: str,
                 group_var: str,
                 metadata: pd.DataFrame,
                 num_iter: int = 500,
                 num_warmup: int = None,
                 chains: int = 4,
                 seed: float = 42,
                 beta_prior: float = 5.0,
                 cauchy_scale: float = 5.0,
                 group_var_prior: float = 1.0):
        filepath = DEFAULT_MODEL_DICT["negative_binomial"]["lme"]
        super().__init__(table=table,
                         formula=formula,
                         metadata=metadata,
                         model_path=filepath,
                         num_iter=num_iter,
                         num_warmup=num_warmup,
                         chains=chains,
                         seed=seed,
                         parallelize_across="chains")

        # Encode group IDs starting at 1 because Stan 1-indexes arrays
        group_var_series = metadata[group_var].loc[self.sample_names]
        samp_subj_map = group_var_series.astype("category").cat.codes + 1
        # Encoding as categories uses alphabetic sorting
        self.groups = np.sort(group_var_series.unique())

        param_dict = {
            "depth": np.log(table.sum(axis="sample")),  # sampling depths
            "B_p": beta_prior,
            "phi_s": cauchy_scale,
            "S": len(group_var_series.unique()),
            "subj_ids": samp_subj_map.values,
            "u_p": group_var_prior
        }
        self.add_parameters(param_dict)

        self.specify_model(params=["beta", "phi", "subj_int"],
                           dims={
                               "beta": ["covariate", "feature"],
                               "phi": ["feature"],
                               "subj_int": ["group", "feature"],
                               "log_lhood": ["tbl_sample", "feature"],
                               "y_predict": ["tbl_sample", "feature"]
                           },
                           coords={
                               "covariate": self.colnames,
                               "feature": self.feature_names,
                               "tbl_sample": self.sample_names,
                               "group": self.groups
                           },
                           include_observed_data=True,
                           posterior_predictive="y_predict",
                           log_likelihood="log_lhood")

        if self.parallelize_across == "chains":
            self.specifications["alr_params"] = ["beta", "subj_int"]
Esempio n. 4
0
    def __init__(
        self,
        table: biom.table.Table,
        formula: str,
        metadata: pd.DataFrame,
        num_iter: int = 500,
        num_warmup: int = None,
        chains: int = 4,
        seed: float = 42,
        beta_prior: float = 5.0,
        cauchy_scale: float = 5.0,
        parallelize_across: str = "chains",
    ):
        filepath = DEFAULT_MODEL_DICT["negative_binomial"][parallelize_across]
        super().__init__(table=table,
                         formula=formula,
                         metadata=metadata,
                         model_path=filepath,
                         num_iter=num_iter,
                         num_warmup=num_warmup,
                         chains=chains,
                         seed=seed,
                         parallelize_across=parallelize_across)

        param_dict = {
            "depth": np.log(table.sum(axis="sample")),  # sampling depths
            "B_p": beta_prior,
            "phi_s": cauchy_scale
        }
        self.add_parameters(param_dict)

        self.specify_model(params=["beta", "phi"],
                           dims={
                               "beta": ["covariate", "feature"],
                               "phi": ["feature"],
                               "log_lhood": ["tbl_sample", "feature"],
                               "y_predict": ["tbl_sample", "feature"]
                           },
                           coords={
                               "covariate": self.colnames,
                               "feature": self.feature_names,
                               "tbl_sample": self.sample_names
                           },
                           include_observed_data=True,
                           posterior_predictive="y_predict",
                           log_likelihood="log_lhood")

        if self.parallelize_across == "chains":
            self.specifications["alr_params"] = ["beta"]
Esempio n. 5
0
    def __init__(
        self,
        table: biom.table.Table,
        formula: str,
        metadata: pd.DataFrame,
        num_iter: int = 500,
        num_warmup: int = None,
        chains: int = 4,
        seed: float = 42,
        beta_prior: float = 5.0,
    ):
        filepath = DEFAULT_MODEL_DICT["multinomial"]
        super().__init__(
            table=table,
            model_path=filepath,
            num_iter=num_iter,
            num_warmup=num_warmup,
            chains=chains,
            seed=seed,
        )
        self.create_regression(formula=formula, metadata=metadata)

        param_dict = {
            "B_p": beta_prior,
            "depth": table.sum(axis="sample").astype(int)
        }
        self.add_parameters(param_dict)

        self.specify_model(params=["beta"],
                           dims={
                               "beta": ["covariate", "feature_alr"],
                               "log_lhood": ["tbl_sample"],
                               "y_predict": ["tbl_sample", "feature"]
                           },
                           coords={
                               "covariate": self.colnames,
                               "feature": self.feature_names,
                               "feature_alr": self.feature_names[1:],
                               "tbl_sample": self.sample_names,
                           },
                           include_observed_data=True,
                           posterior_predictive="y_predict",
                           log_likelihood="log_lhood")