def remove_outliers(self, configuration): if self._mixed_windows is None: raise Error("Mixed windows have not been computed") # compute the statistis wga_rds = [] no_wga_rds = [] for window in self._mixed_windows: if not window.is_n_window(): wga_rds.extend(window.get_rd_counts(name=WindowType.WGA)) no_wga_rds.extend(window.get_rd_counts(name=WindowType.NO_WGA)) wga_statistics = compute_statistic(data=wga_rds, statistics="all") no_wga_statistics = compute_statistic(data=no_wga_rds, statistics="all") config = configuration["outlier_remove"]["config"] config["statistics"] = { WindowType.NO_WGA: no_wga_statistics, WindowType.WGA: wga_statistics } self._mixed_windows = \ remove_outliers(windows=self._mixed_windows, removemethod=configuration["outlier_remove"]["name"], config=config)
def remove_outliers(self, configuration): if self._mixed_windows is None: raise Error("Mixed windows have not been computed") # compute the statistics wga_means = array.array('d') no_wga_means = array.array('d') for window in self._mixed_windows: if not window.is_gap_window(): wga_means.append(window.get_rd_statistic(statistics="mean", name=WindowType.WGA)) no_wga_means.append(window.get_rd_statistic(statistics="mean", name=WindowType.NO_WGA)) if len(wga_means) == 0 or len(no_wga_means) == 0: print("{0} Cannot remove outliers for region. " "Empty RD list detected".format(WARNING)) return wga_statistics = compute_statistic(data=wga_means, statistics="all") no_wga_statistics = compute_statistic(data=no_wga_means, statistics="all") config = configuration["outlier_remove"]["config"] config["statistics"] = {WindowType.NO_WGA: no_wga_statistics, WindowType.WGA: wga_statistics} self._mixed_windows = \ remove_outliers(windows=self._mixed_windows, removemethod=configuration["outlier_remove"]["name"], config=config)
def zscore_statistic(data, null, **kwargs): statistic = compute_statistic(data=data, statistics=null.name) # compute the variance var = compute_statistic(data=data, statistics="var") score = (statistic - null.value) / (np.sqrt(var)) if "alternative" in kwargs: direction = kwargs["alternative"].direction if direction == ">" or direction == ">=": prob = 1.0 - st.norm.cdf(score) return prob, statistic elif direction == "<" or direction == "<=": prob = st.norm.cdf(score) return prob, statistic else: prob = 2.0 * (1.0 - st.norm.cdf(np.abs(score))) return prob, statistic else: # assume two-sided by default prob = 2.0 * (1.0 - st.norm.cdf(np.fabs(score))) return prob, statistic
def get_rd_stats(self, statistics="all"): """ Returns a statistical summary as a dictionary of the read depth variable in the window :param statistics: :return: """ # accumulate RD as an array and use numpy rd_data = [item.read_depth for item in self._observations] from preprocess_utils import compute_statistic return compute_statistic(data=rd_data, statistics=statistics)
def get_statistics(self, statistic, window_type, **kwargs): if window_type == WindowType.BOTH: for index in self._indexes: window = self._windows[index] statistic1, statistic2 = \ window.get_rd_stats(statistics=statistic) return statistic1, statistic2 else: wga_windows = [ window.get_window(window_type) for window in self._windows ] window_data = flat_windows_rd_from_indexes(indexes=self._indexes, windows=wga_windows) return compute_statistic(data=window_data, statistics=statistic)