Esempio n. 1
0
    def _build_pp_matrix(self):
        """
        From priors and context likelihood of relatedness, determine which predictors should be included in the model

        :return pp: pd.DataFrame [G x K]
            Boolean matrix indicating which predictor variables should be included in BBSR for each response variable
        """

        # Create a predictor boolean array from priors
        pp = np.logical_or(self.prior_mat != 0, self.weights_mat != self.no_prior_weight)

        pp_idx = pp.index
        pp_col = pp.columns

        if self.filter_priors_for_clr:
            # Set priors which have a CLR of 0 to FALSE
            pp = np.logical_and(pp, self.clr_mat != 0).values
        else:
            pp = pp.values

        # Mark the nS predictors with the highest CLR true (Do not include anything with a CLR of 0)
        mask = np.logical_or(self.clr_mat == 0, ~np.isfinite(self.clr_mat)).values
        masked_clr = np.ma.array(self.clr_mat.values, mask=mask)
        for i in range(self.G):
            n_to_keep = min(self.nS, self.K, mask.shape[1] - np.sum(mask[i, :]))
            if n_to_keep == 0:
                continue
            clrs = np.ma.argsort(masked_clr[i, :], endwith=False)[-1 * n_to_keep:]
            pp[i, clrs] = True

        # Rebuild into a DataFrame and set autoregulation to 0
        pp = pd.DataFrame(pp, index=pp_idx, columns=pp_col, dtype=np.dtype(bool))
        pp = utils.df_set_diag(pp, False)

        return pp
Esempio n. 2
0
    def test_dataframe_set_diag(self):

        def check_diag(df, val):
            for match in df.index.intersection(df.columns):
                if df.loc[match, match] == val:
                    pass
                else:
                    return False
            return True

        data_frame = pd.DataFrame(np.ones((20, 10)), index=list(range(20)), columns=list(range(10)))
        data_frame2 = utils.df_set_diag(data_frame, 0, copy=True)

        self.assertTrue(check_diag(data_frame2, 0))
        self.assertEqual(data_frame2.sum().sum(), 190)
        self.assertTrue(check_diag(data_frame, 1))
        self.assertEqual(data_frame.sum().sum(), 200)

        utils.df_set_diag(data_frame, 0, copy=False)
        self.assertTrue(check_diag(data_frame, 0))
        self.assertEqual(data_frame.sum().sum(), 190)
Esempio n. 3
0
    def run(self, x_df, y_df, bins=None, logtype=DEFAULT_LOG_TYPE):
        """
        Wrapper to calculate the CLR and MI for two data sets that have common condition columns
        :param x_df: pd.DataFrame
        :param y_df: pd.DataFrame
        :param logtype: np.log func
        :param bins: int
            Number of bins for discretizing continuous variables
        :return clr, mi: pd.DataFrame, pd.DataFrame
            CLR and MI DataFrames
        """

        assert check.argument_integer(bins, allow_none=True)
        assert check.indexes_align((x_df.columns, y_df.columns))
        assert x_df.shape[0] > 0
        assert x_df.shape[1] > 0
        assert y_df.shape[0] > 0
        assert y_df.shape[1] > 0

        if bins is not None:
            self.bins = bins

        mi = mutual_information(y_df,
                                x_df,
                                self.bins,
                                temp_dir=self.temp_dir,
                                logtype=logtype)
        mi_bg = mutual_information(x_df,
                                   x_df,
                                   self.bins,
                                   temp_dir=self.temp_dir,
                                   logtype=logtype)
        clr = calc_mixed_clr(utils.df_set_diag(mi, 0),
                             utils.df_set_diag(mi_bg, 0))

        MPControl.sync_processes(pref=SYNC_CLR_KEY)

        return clr, mi
Esempio n. 4
0
    def _check_prior(self, prior, expression_data, keep_self=False):
        if not keep_self:
            prior = utils.df_set_diag(prior, 0)

        activity_tfs, expr_tfs, drop_tfs = self._determine_tf_status(
            prior, expression_data)

        if len(drop_tfs) > 0:
            msg = "{n} TFs are removed from activity (no expression or prior exists)".format(
                n=len(drop_tfs))
            utils.Debug.vprint(msg, level=0)
            utils.Debug.vprint(" ".join(drop_tfs), level=1)

        prior = prior.drop(drop_tfs, axis=1)

        return prior, activity_tfs, expr_tfs
Esempio n. 5
0
    def compute_transcription_factor_activity(prior,
                                              expression_data,
                                              expression_data_halftau=None,
                                              keep_self=False):
        """
        Calculate TFA from a prior and expression data object

        :param prior: pd.DataFrame [G x K]
        :param expression_data: InferelatorData [N x G]
        :param expression_data_halftau: InferelatorData [N x G]
        :param keep_self: bool
        :return: InferelatorData [N x K]
        """

        if not keep_self:
            prior = utils.df_set_diag(prior, 0)

        activity_tfs, expr_tfs, drop_tfs = TFA._determine_tf_status(
            prior, expression_data)

        if len(drop_tfs) > 0:
            msg = "{n} TFs are removed from activity (no expression or prior exists)".format(
                n=len(drop_tfs))
            utils.Debug.vprint(msg, level=0)
            utils.Debug.vprint(" ".join(drop_tfs), level=1)

        prior = prior.drop(drop_tfs, axis=1)

        activity = np.zeros((expression_data.shape[0], prior.shape[1]),
                            dtype=np.float64)

        if len(activity_tfs) > 0:
            a_cols = prior.columns.isin(activity_tfs)
            expr = expression_data_halftau if expression_data_halftau is not None else expression_data
            activity[:, a_cols] = TFA._calculate_activity(
                prior.loc[:, activity_tfs].values, expr)

        if len(expr_tfs) > 0:
            activity[:, prior.columns.isin(expr_tfs
                                           )] = expression_data.get_gene_data(
                                               expr_tfs, force_dense=True)

        return utils.InferelatorData(activity,
                                     gene_names=prior.columns,
                                     sample_names=expression_data.sample_names,
                                     meta_data=expression_data.meta_data)