def remove_outliers(self, configuration):

        if self._mixed_windows is None:
            raise Error("Mixed windows have not been computed")

        # compute the statistis

        wga_rds = []
        no_wga_rds = []

        for window in self._mixed_windows:
            if not window.is_n_window():
                wga_rds.extend(window.get_rd_counts(name=WindowType.WGA))
                no_wga_rds.extend(window.get_rd_counts(name=WindowType.NO_WGA))

        wga_statistics = compute_statistic(data=wga_rds, statistics="all")
        no_wga_statistics = compute_statistic(data=no_wga_rds,
                                              statistics="all")

        config = configuration["outlier_remove"]["config"]
        config["statistics"] = {
            WindowType.NO_WGA: no_wga_statistics,
            WindowType.WGA: wga_statistics
        }

        self._mixed_windows = \
          remove_outliers(windows=self._mixed_windows,
                          removemethod=configuration["outlier_remove"]["name"],
                          config=config)
    def remove_outliers(self, configuration):

        if self._mixed_windows is None:
            raise Error("Mixed windows have not been computed")

        # compute the statistics
        wga_means = array.array('d')
        no_wga_means = array.array('d')

        for window in self._mixed_windows:
            if not window.is_gap_window():
                wga_means.append(window.get_rd_statistic(statistics="mean",
                                                         name=WindowType.WGA))
                no_wga_means.append(window.get_rd_statistic(statistics="mean",
                                                            name=WindowType.NO_WGA))

        if len(wga_means) == 0 or len(no_wga_means) == 0:
            print("{0} Cannot remove outliers for region. "
                  "Empty RD list detected".format(WARNING))
            return

        wga_statistics = compute_statistic(data=wga_means,
                                           statistics="all")
        no_wga_statistics = compute_statistic(data=no_wga_means,
                                              statistics="all")

        config = configuration["outlier_remove"]["config"]
        config["statistics"] = {WindowType.NO_WGA: no_wga_statistics,
                                WindowType.WGA: wga_statistics}

        self._mixed_windows = \
            remove_outliers(windows=self._mixed_windows,
                            removemethod=configuration["outlier_remove"]["name"],
                            config=config)
def zscore_statistic(data, null, **kwargs):

    statistic = compute_statistic(data=data, statistics=null.name)

    # compute the variance
    var = compute_statistic(data=data, statistics="var")

    score = (statistic - null.value) / (np.sqrt(var))

    if "alternative" in kwargs:
        direction = kwargs["alternative"].direction

        if direction == ">" or direction == ">=":
            prob = 1.0 - st.norm.cdf(score)
            return prob, statistic
        elif direction == "<" or direction == "<=":
            prob = st.norm.cdf(score)
            return prob, statistic
        else:
            prob = 2.0 * (1.0 - st.norm.cdf(np.abs(score)))
            return prob, statistic
    else:

        # assume two-sided by default
        prob = 2.0 * (1.0 - st.norm.cdf(np.fabs(score)))
        return prob, statistic
Exemple #4
0
 def get_rd_stats(self, statistics="all"):
     """
     Returns a statistical summary as a dictionary
     of the read depth variable in the window
     :param statistics:
     :return:
     """
     # accumulate RD as an array and use numpy
     rd_data = [item.read_depth for item in self._observations]
     from preprocess_utils import compute_statistic
     return compute_statistic(data=rd_data,
                              statistics=statistics)
    def get_statistics(self, statistic, window_type, **kwargs):

        if window_type == WindowType.BOTH:

            for index in self._indexes:
                window = self._windows[index]

                statistic1, statistic2 = \
                  window.get_rd_stats(statistics=statistic)
                return statistic1, statistic2
        else:

            wga_windows = [
                window.get_window(window_type) for window in self._windows
            ]

            window_data = flat_windows_rd_from_indexes(indexes=self._indexes,
                                                       windows=wga_windows)

            return compute_statistic(data=window_data, statistics=statistic)