예제 #1
0
    def test_empty_iter(self):
        seq1 = []
        seq2 = RandomAccessIterator(iter(seq1), 7)
        assert seq2[0:3] == seq1[0:3]

        try:
            seq2.next()
            self.fail('StopIteration expected')
        except StopIteration:
            pass
예제 #2
0
    def test_empty_iter(self):
        seq1 = []
        seq2 = RandomAccessIterator(iter(seq1), 7)
        assert seq2[0:3] == seq1[0:3]

        try:
            seq2.next()
            self.fail('StopIteration expected')
        except StopIteration:
            pass
예제 #3
0
    def test_short_iter(self):
        seq1 = range(3)
        seq2 = RandomAccessIterator(iter(seq1), 7)
        assert seq1[0] == seq1[0]
        assert seq2[0:3] == seq1[0:3]
        assert seq2[:3] == seq1[:3]

        assert seq1[0] == seq1[0]
        assert seq2[0:3] == seq1[0:3]
        assert seq2[:3] == seq1[:3]

        first = seq2.next()
        assert first == 0
        assert seq2[0:4] == seq1[0:4]

        item = seq2.next()
        assert item == 1
        assert seq2[0:4] == seq1[0:4]

        item = seq2.next()
        assert item == 2
        assert seq2[0:4] == seq1[0:4]

        try:
            item = seq2.next()
            self.fail('StopIteration expected')
        except StopIteration:
            pass

        seq1 = range(2)
        seq2 = RandomAccessIterator(iter(seq1), 7)
        assert seq1[0] == seq1[0]
        assert seq2[0:3] == seq1[0:3]
        assert seq2[:3] == seq1[:3]
예제 #4
0
    def filter_snvs(self, snvs):

        snps = RandomAccessIterator(snvs, rnd_access_win=self.snps_in_window)
        rates = _calculate_segregation_rates(snps,
                                             self.pop_type,
                                             self.snps_in_window,
                                             samples=self.samples)
        max_zero_dist = self.max_zero_dist_recomb
        for snp, chrom, pos, rates in rates:
            self.tot_snps += 1
            dists, recombs = zip(*[(rate.pos - pos, rate.recomb_rate)
                                   for rate in rates])
            if len(dists) < self.min_num_snps:
                continue
            if self.debug_plot_dir is None:
                plot_fhand = None
            else:
                chrom_dir = pjoin(self.debug_plot_dir, str(chrom))
                if not exists(chrom_dir):
                    mkdir(chrom_dir)
                fname = str(chrom) + '_' + str(pos) + '.png'
                plot_fhand = open(pjoin(chrom_dir, fname), 'w')
            res = _calc_ajusted_recomb(dists,
                                       recombs,
                                       max_recomb=self.max_recomb_curve_fit,
                                       max_zero_dist_recomb=max_zero_dist,
                                       alpha_recomb_0=self.alpha_recomb_0,
                                       plot_fhand=plot_fhand)
            self._store_log_info(*res)
            if res[1]:
                self.passed_snps += 1
                yield snp
예제 #5
0
    def test_short_iter(self):
        seq1 = range(3)
        seq2 = RandomAccessIterator(iter(seq1), 7)
        assert seq1[0] == seq1[0]
        assert seq2[0:3] == seq1[0:3]
        assert seq2[:3] == seq1[:3]

        assert seq1[0] == seq1[0]
        assert seq2[0:3] == seq1[0:3]
        assert seq2[:3] == seq1[:3]

        first = seq2.next()
        assert first == 0
        assert seq2[0:4] == seq1[0:4]

        item = seq2.next()
        assert item == 1
        assert seq2[0:4] == seq1[0:4]

        item = seq2.next()
        assert item == 2
        assert seq2[0:4] == seq1[0:4]

        try:
            item = seq2.next()
            self.fail('StopIteration expected')
        except StopIteration:
            pass

        seq1 = range(2)
        seq2 = RandomAccessIterator(iter(seq1), 7)
        assert seq1[0] == seq1[0]
        assert seq2[0:3] == seq1[0:3]
        assert seq2[:3] == seq1[:3]
예제 #6
0
    def _smooth_genotypes(self, snp_ab_genotypes, samples):
        big_win = self.window * 2 - 1
        snp_ab_genotypes = RandomAccessIterator(snp_ab_genotypes,
                                                rnd_access_win=big_win)

        for idx, (snp, ab_genotype) in enumerate(snp_ab_genotypes):
            wins = self._create_windows(idx)
            for win in wins:
                #transpose here
                for sample in samples:
                    #keep memory of the samples already smothed
                    snps_in_win = snp_ab_genotypes[win[0]:win[1]]
                    print list(snps_in_win)
                #if all samples are smoothed break the win loop
            pass
예제 #7
0
    def __call__(self, snvs):
        # TODO: Randon acess iterator based on physical distance
        # RandomAcessRegionIterator(items, location_getter)
        # once we do this we can remove max_num_snps
        snvs = RandomAccessIterator(snvs, self._max_num_snps)
        half_win = (snvs._rnd_access_win - 1) // 2
        half_win_in_bp = (self.window_in_bp - 1) // 2
        for idx, snv in enumerate(snvs):
            self._clean_filter(snv)
            chrom = snv.chrom
            pos = snv.pos
            start = idx - half_win
            if start < 0:
                start = 0
            end = idx + half_win
            snvs_in_win = snvs[start:end]

            def snv_is_close(snv2):
                if snv2.chrom != chrom:
                    return False
                if abs(snv2.pos - pos) < half_win_in_bp:
                    return True
                else:
                    return False

            close_snvs = filter(snv_is_close, snvs_in_win)

            num_snvs = len(close_snvs)

            win_len = self.window_in_bp
            # The studied window could be smaller than expected if it is
            # located at the beginning of the chromosome
            dist_from_0 = pos
            if dist_from_0 < half_win_in_bp:
                win_len -= (half_win_in_bp - dist_from_0)

            # The studied window could be smaller than expected if it is
            # located at the end of the chromosome
            ref_len = self._lengths[chrom]
            end = ref_len - 1
            len_not_studied_at_end = pos + half_win_in_bp - end
            if len_not_studied_at_end > 0:
                win_len -= len_not_studied_at_end

            freq = num_snvs / win_len
            if freq >= self.max_variability:
                snv.add_filter(self.name)
            yield snv
예제 #8
0
    def _smooth_genotypes_old(self, snp_ab_genotypes, samples):
        win = self.window
        snp_ab_genotypes = RandomAccessIterator(snp_ab_genotypes,
                                                rnd_access_win=win)

        half_win = (win - 1) // 2
        for idx, (snp, ab_genotype) in enumerate(snp_ab_genotypes):
            chrom = snp.CHROM

            start = idx - half_win
            if start < 0:
                start = 0
            end = idx + half_win + 1

            snp_gts_in_win = snp_ab_genotypes[start:end]
            smoothed_genos = self._smooth(idx - start, snp_gts_in_win, samples)
            smoothed_genos = OrderedDict(zip(samples, smoothed_genos))
            yield snp, smoothed_genos
예제 #9
0
    def _recode_parent_genotypes(self, samples=None):
        get_coding = GetCoding(self.parents_a, self.parents_b)

        def mapper(snp):
            return snp, get_coding(snp)

        win = self.window
        snp_and_coding = RandomAccessIterator(imap(mapper, self._reader),
                                              rnd_access_win=win)
        offspring = self.offspring
        half_win = (win - 1) // 2
        for idx, (snp1, coding1) in enumerate(snp_and_coding):
            snp1_calls = [snp1.genotype(sample) for sample in offspring]

            start = idx - half_win
            if start < 0:
                start = 0
            end = idx + half_win + 1
            snp2_idxs = []
            for snp2_idx in range(start, end):
                try:
                    snp2_chrom = snp_and_coding[snp2_idx][0].CHROM
                except IndexError:
                    continue
                if snp2_chrom == snp1.CHROM:
                    snp2_idxs.append(snp2_idx)

            coding1 = self._deduce_coding(snp_and_coding, snp1_calls,
                                          snp2_idxs)
            if coding1 is None:
                # We haven't manage to deduce the AB coding for this snp
                continue
            coding1['.'] = '.'
            if samples is None:
                calls = snp1.samples
            else:
                calls = [snp1.genotype(sample) for sample in samples]
            recoded = OrderedDict((call.sample, self._map_to_ab(call, coding1))
                                  for call in calls)
            yield snp1, recoded
예제 #10
0
    def smooth_genotypes(self, snp_ab_genotypes, samples):
        win = self.window
        snp_ab_genotypes = RandomAccessIterator(snp_ab_genotypes,
                                                rnd_access_win=win)

        half_win = (win - 1) // 2
        for idx, (snp, ab_genotype) in enumerate(snp_ab_genotypes):
            chrom = snp.CHROM

            start = idx - half_win
            if start < 0:
                start = 0
            end = idx + half_win + 1

            # remove snps in other chromosomes
            snp_gts_in_win = [
                snp_gt for snp_gt in snp_ab_genotypes[start:end]
                if snp_gt[0].CHROM == chrom
            ]

            smoothed_genos = self._smooth(idx - start, snp_gts_in_win, samples)
            smoothed_genos = OrderedDict(zip(samples, smoothed_genos))
            yield snp, smoothed_genos
예제 #11
0
    def test_next_items(self):
        seq1 = range(10)
        seq2 = RandomAccessIterator(iter(seq1), 7)
        assert seq1[0] == seq1[0]
        assert seq2[0:3] == seq1[0:3]
        assert seq2[:3] == seq1[:3]
        assert seq2[2] == seq1[2]

        first = seq2.next()
        assert first == 0
        assert seq2[0:4] == seq1[0:4]

        item = seq2.next()
        assert item == 1
        assert seq2[0:5] == seq1[0:5]

        item = seq2.next()
        assert item == 2
        assert seq2[0:6] == seq1[0:6]

        item = seq2.next()
        assert item == 3
        assert seq2[0:7] == seq1[0:7]

        try:
            seq2[0:8]
            self.fail('IndexError expexted')
        except IndexError:
            pass

        item = seq2.next()
        assert item == 4
        assert seq2[1:8] == seq1[1:8]
        assert seq2[1] == seq1[1]
        try:
            seq2[0]
            self.fail('IndexError expexted')
        except IndexError:
            pass

        item = seq2.next()
        assert item == 5
        assert seq2[2:9] == seq1[2:9]

        try:
            seq2[1:8]
            self.fail('IndexError expexted')
        except IndexError:
            pass

        item = seq2.next()
        assert item == 6
        assert seq2[3:10] == seq1[3:10]

        item = seq2.next()
        assert item == 7
        assert seq2[3:10] == seq1[3:10]
        item = seq2.next()
        assert item == 8
        item = seq2.next()
        assert item == 9
        assert seq2[3:10] == seq1[3:10]
예제 #12
0
def filter_snvs_by_ld(snvs,
                      samples=None,
                      r_sqr=DEF_R_SQR_THRESHOLD,
                      p_val=DEF_P_VAL,
                      bonferroni=True,
                      snv_win=DEF_SNV_WIN,
                      min_phys_dist=MIN_PHYS_DIST,
                      log_fhand=None):
    if not snv_win % 2:
        msg = 'The window should have an odd number of snvs'
        raise ValueError(msg)
    half_win = (snv_win - 1) // 2

    if bonferroni:
        p_val /= (snv_win - 1)

    snvs = RandomAccessIterator(snvs, rnd_access_win=snv_win)
    linked_snvs = set()
    total_snvs = 0
    passed_snvs = 0
    prev_chrom = None
    stats_cache = _LDStatsCache()
    for snv_i, snv in enumerate(snvs):
        total_snvs += 1
        if snv_i in linked_snvs:
            yield snv
            passed_snvs += 1
            linked_snvs.remove(snv_i)
            continue
        linked = None
        win_start = snv_i - half_win

        this_chrom = snv.chrom
        if prev_chrom is None:
            prev_chrom = this_chrom
        if prev_chrom != this_chrom:
            stats_cache = _LDStatsCache()

        if win_start < 0:
            win_start = 0
        for snv_j in range(snv_i + half_win, win_start - 1, -1):
            try:
                snv_2 = snvs[snv_j]
            except IndexError:
                continue

            if snv_i == snv_j:
                continue

            try:
                linked = stats_cache.get_stat(snv_i, snv_j)
                in_cache = True
            except KeyError:
                in_cache = False

            if in_cache:
                pass
            elif snv.chrom != snv_2.chrom:
                # different chroms, they're not linked
                linked = False
            elif abs(snv.pos - snv_2.pos) < min_phys_dist:
                # Too close, they could be errors due to the same reads
                # so no independent errors
                linked = None
            else:
                stats = calculate_ld_stats(snv, snv_2, samples=samples)
                if stats.r_sqr >= r_sqr and stats.fisher < p_val:
                    linked = True
                    if snv_j > snv_i:
                        linked_snvs.add(snv_j)
                    break
                else:
                    linked = False
            if not linked:
                stats_cache.set_stat(snv_i, snv_j, linked)

        if linked:
            yield snv
            passed_snvs += 1
        stats_cache.del_lower_than(win_start)

    if log_fhand is not None:
        _write_log(log_fhand, total_snvs, passed_snvs)
예제 #13
0
    def test_next_items(self):
        seq1 = range(10)
        seq2 = RandomAccessIterator(iter(seq1), 7)
        assert seq1[0] == seq1[0]
        assert seq2[0:3] == seq1[0:3]
        assert seq2[:3] == seq1[:3]
        assert seq2[2] == seq1[2]

        first = seq2.next()
        assert first == 0
        assert seq2[0:4] == seq1[0:4]

        item = seq2.next()
        assert item == 1
        assert seq2[0:5] == seq1[0:5]

        item = seq2.next()
        assert item == 2
        assert seq2[0:6] == seq1[0:6]

        item = seq2.next()
        assert item == 3
        assert seq2[0:7] == seq1[0:7]

        try:
            seq2[0:8]
            self.fail('IndexError expexted')
        except IndexError:
            pass

        item = seq2.next()
        assert item == 4
        assert seq2[1:8] == seq1[1:8]
        assert seq2[1] == seq1[1]
        try:
            seq2[0]
            self.fail('IndexError expexted')
        except IndexError:
            pass

        item = seq2.next()
        assert item == 5
        assert seq2[2:9] == seq1[2:9]

        try:
            seq2[1:8]
            self.fail('IndexError expexted')
        except IndexError:
            pass

        item = seq2.next()
        assert item == 6
        assert seq2[3:10] == seq1[3:10]

        item = seq2.next()
        assert item == 7
        assert seq2[3:10] == seq1[3:10]
        item = seq2.next()
        assert item == 8
        item = seq2.next()
        assert item == 9
        assert seq2[3:10] == seq1[3:10]
예제 #14
0
 def test_iter_access(self):
     seq1 = range(100)
     seq2 = RandomAccessIterator(iter(seq1), 11)
     assert list(seq1) == list(seq2)