def test_generate_windows(self): assert self.generate_wins(10, 10, 3) == [(0, 10), (10, 20), (20, 30)] assert self.generate_wins(10, None, 3) == [(0, 10), (10, 20), (20, 30)] assert self.generate_wins(10, 5, 3) == [(0, 10), (5, 15), (10, 20)] res = [(0, 10), (10, 20), (20, 30)] assert list(generate_windows(size=10, step=10, end=30)) == res
def windows(self): chrom_lengths = self._get_chrom_lengths() snp_queue = self._snp_queue for chrom, chrom_length in chrom_lengths.items(): wins = generate_windows(start=0, size=self.win_size, step=self.win_step, end=chrom_length + 1) snp_queue.empty() for win in wins: snp_queue.pop(win.start) if snp_queue.queue: new_strech_start = snp_queue.queue[-1].pos + 1 else: new_strech_start = win.start new_snps = self._reader.fetch_snvs(chrom, new_strech_start, win.end) snp_queue.extend(new_snps) if len(snp_queue.queue) >= self.min_num_snps: yield { 'chrom': chrom, 'start': win.start, 'end': win.end, 'snps': snp_queue.queue[:] }
def calculate_coverage_distrib_in_region(self, region=None): if region is None: if self.window == 1: regions = None else: regions = [(ref, 0, le_ - 1) for ref, le_ in self._ref_lens.items()] else: regions = [region] if self.window == 1: if regions is None: region = None else: region = regions[0] return self._calculate_complete_coverage_distrib(region) counts = {} for region in regions: chrom, start, end = region for start, end in generate_windows(self.window, start=0, end=self._ref_lens[chrom], step=1): counts_in_win = self._calculate_coverages_in_win( chrom, start, end) for sample, cnts_in_win in counts_in_win.items(): if sample not in counts: counts[sample] = IntCounter() counts[sample][int(round(cnts_in_win))] += 1 return counts
def calculate_coverage_distrib_in_region(self, region=None): if region is None: if self.window == 1: regions = None else: regions = [(ref, 0, le_ - 1) for ref, le_ in self._ref_lens.items()] else: regions = [region] if self.window == 1: if regions is None: region = None else: region = regions[0] return self._calculate_complete_coverage_distrib(region) counts = {} for region in regions: chrom, start, end = region for start, end in generate_windows(self.window, start=0, end=self._ref_lens[chrom], step=1): counts_in_win = self._calculate_coverages_in_win(chrom, start, end) for sample, cnts_in_win in counts_in_win.items(): if sample not in counts: counts[sample] = IntCounter() counts[sample][int(round(cnts_in_win))] += 1 return counts
def generate_wins(self, size, step, number): windows = generate_windows(size=size, step=step) wins = [] for index, win in enumerate(windows): if index >= number: break wins.append(win) return wins
def windows(self): chrom_lengths = self._get_chrom_lengths() snp_queue = self._snp_queue for chrom, chrom_length in chrom_lengths.items(): wins = generate_windows(start=0, size=self.win_size, step=self.win_step, end=chrom_length + 1) snp_queue.empty() for win in wins: snp_queue.pop(win.start) if snp_queue.queue: new_strech_start = snp_queue.queue[-1].pos + 1 else: new_strech_start = win.start new_snps = self._reader.fetch_snvs(chrom, new_strech_start, win.end) snp_queue.extend(new_snps) if len(snp_queue.queue) >= self.min_num_snps: yield {'chrom': chrom, 'start': win.start, 'end': win.end, 'snps': snp_queue.queue[:]}