Exemple #1
0
 def infer_gamma(self, ):
     """
     """
     T = self.T
     gammas = list()
     for i, tile_cov in enumerate(self.tile_covs):
         temp_tile_cov = stack_temporal_covariances(tile_cov, self.R,
                                                    self.T)
         U, Sigma, V = np.linalg.svd(tile_cov[:, :, 0] - np.eye(T) /
                                     (2 * 10**log10N))
     np.eye(T)
Exemple #2
0
 def G_by_tile(self,
               end=None,
               abs=False,
               ignore_adjacent=True,
               *args,
               **kwargs):
     covs, covns = self.calc_covs_by_tile(*args, **kwargs)
     covs_temp = [[
         calc_G(rc, end, abs, ignore_adjacent) for rc in
         stack_temporal_covariances(c, self.R, self.T, as_tensor=False)
     ] for c in covs]
     return np.array(covs_temp)
Exemple #3
0
    def _bootstrap_G(self,
                     alpha,
                     B,
                     end=None,
                     ignore_adjacent=False,
                     return_straps=False,
                     min_af=0.0,
                     average_replicates=False,
                     depth_limits=None,
                     binomial_correction=True,
                     suppress_warnings=False,
                     percentile=False):
        # TODO:
        covs, covns = self.calc_covs_by_tile(
            min_af=min_af,
            depth_limits=depth_limits,
            binomial_correction=binomial_correction,
            suppress_warnings=suppress_warnings)
        covs = np.stack(
            [stack_temporal_covariances(c, self.R, self.T) for c in covs])
        # tile weights by number of loci
        weights = np.array([len(x) for x in self.tile_indices])
        weights = weights / weights.sum()

        # number of samples in resample
        N = len(self.tile_indices)
        straps = list()
        for b in np.arange(B):
            bidx = np.random.randint(0, N, size=N)
            covs_masked = np.ma.masked_array(covs[bidx, :, :],
                                             np.isnan(covs[bidx, :, :]))
            avecov = np.ma.average(covs_masked, axis=0, weights=weights).data
            repcovs = list()
            # iterate through all the replicates, calculating G for each one.
            for rep in np.arange(self.R):
                repcovs.append(
                    calc_G(avecov[:, :, rep],
                           end=end,
                           ignore_adjacent=ignore_adjacent))
            repcovs = np.stack(repcovs)
            if average_replicates:
                repcovs = np.mean(repcovs)
            straps.append(repcovs)
        That = np.mean(straps, axis=0)
        alpha = 100. * alpha  # because, numpy.
        qlower, qupper = (np.nanpercentile(straps, alpha / 2, axis=0),
                          np.nanpercentile(straps, 100 - alpha / 2, axis=0))
        if return_straps:
            return straps
        if percentile:
            return qlower, That, qupper
        else:
            return 2 * That - qupper, That, 2 * That - qlower
Exemple #4
0
    def _G(self,
           end=None,
           abs=False,
           ignore_adjacent=False,
           double_offdiag=False):
        """
        Args:
            end: last timepoint to consider, useful for seeing
                 cumulative contributions
            abs: use absolute value of covariances
            ignore_adjacent: whether to ignore cov(Δp_{t}, Δp_{t+1}),
                             which is corrected for shared sampling noise

        ignore_adjacent is very conservative.
        """
        if self.cov is None:
            msg = "calculate covariances first with TemporalFreqs.calc_covs()"
            raise ValueError(msg)
        R, T = self.R, self.T
        covs = stack_temporal_covariances(self.cov, R, T, as_tensor=False)
        Gs = [
            calc_G(c, end, abs, ignore_adjacent, double_offdiag) for c in covs
        ]
        return np.array(Gs)
Exemple #5
0
    def bootstrap_tempcov(self,
                          alpha,
                          B,
                          bootstrap_replicates=False,
                          replicate=None,
                          average_replicates=False,
                          keep_seqids=None,
                          return_straps=False,
                          min_af=0.0,
                          depth_limits=None,
                          percentile=False,
                          binomial_correction=True,
                          suppress_warnings=False):
        """
        This procedure bootstraps the temporal sub-block covariance matrices (there are R of
        these, and each is TxT). Optionally, if bootstrap_replicates is True, the R replicates
        are resampled as well, and the covarainces are calculated for this sample as well. If
        replicate is supplied (an integer 0 ≤ replicate < R), then this procedure will return the
        bootstraps for this replicate only. If average_replicates is True, then the procedure will
        average across the replicates.

        This confidence interval returned is a pivotal CIs,
          C_l = 2 T - Q(1-α/2)
          C_u = 2 T - Q(α/2)
          where T is the estimator for the stastistic T, and α is the confidence level,
          and Q(x) is the empirical x percentile across the bootstraps.

        """
        covs, covns = self.calc_covs_by_tile(
            keep_seqids=keep_seqids,
            min_af=min_af,
            depth_limits=depth_limits,
            binomial_correction=binomial_correction,
            suppress_warnings=suppress_warnings)
        covs = np.stack(
            [stack_temporal_covariances(c, self.R, self.T) for c in covs])
        if replicate is not None and bootstrap_replicates:
            msg = "cannot bootstrap on single replicate; set either bootstrap_replicates=False or replicate=None"
            raise ValueError(msg)
        if replicate is not None:
            covs = covs[:, :, :, replicate]
        # tile weights by number of loci
        keep_seqids = set(keep_seqids)
        indices_seqid_pairs = zip(self.tile_indices,
                                  self.tile_df['seqid'].values)
        weights = np.array([
            len(x) for x, seqid in indices_seqid_pairs if seqid in keep_seqids
        ])
        weights = weights / weights.sum()

        # number of samples in resample
        N = covs.shape[0]
        straps = list()
        for b in np.arange(B):
            bidx = np.random.randint(0, N, size=N)
            # get the windows of the resampled indices
            mat = covs[bidx, ...]
            if bootstrap_replicates:
                assert (replicate is None)
                ridx = np.random.randint(0, self.R, size=self.R)
                mat = mat[:, :, :, ridx]
            covs_masked = np.ma.masked_array(mat, np.isnan(mat))
            avecovs = np.ma.average(covs_masked, axis=0, weights=weights).data
            if average_replicates:
                avecovs = avecovs.mean(axis=2)
            straps.append(avecovs)
        straps = np.stack(straps)
        That = np.mean(straps, axis=0)
        alpha = 100. * alpha  # because, numpy.
        qlower, qupper = np.nanpercentile(
            straps, alpha / 2, axis=0), np.nanpercentile(straps,
                                                         100 - alpha / 2,
                                                         axis=0)
        if return_straps:
            return straps
        if percentile:
            return qlower, That, qupper
        else:
            return 2 * That - qupper, That, 2 * That - qlower