Ejemplo n.º 1
0
def calc_snv_read_pos_stats(sam, snvs, max_snps=None, max_pos=None):
    "This implementation is using pysam pysam"
    pileup_cols = sam.pileup()
    read_5_pos_cnts_rg = {}
    read_3_pos_cnts_rg = {}
    read_5_pos_box_rg = {}
    read_3_pos_box_rg = {}

    for index, snv in enumerate(snvs):
        if max_snps and index >= max_snps:
            break
        chrom = snv.chrom
        ref_pos = snv.pos
        snv_qual = snv.qual
        snv_col = None
        for col in pileup_cols:
            ref_name = sam.getrname(col.reference_id)
            if ref_name == chrom and col.reference_pos == ref_pos:
                snv_col = col
                break
        if snv_col is None:
            raise RuntimeError('No pileup found for snv {}:{}'.format(
                chrom, ref_pos))

        for pileup_read in snv_col.pileups:
            try:
                read_group = pileup_read.alignment.opt('RG')
            except KeyError:
                read_group = None
            read_ref_coord = ReadRefCoord(pileup_read.alignment, sam)
            read_pos = read_ref_coord.get_read_pos((chrom, ref_pos))
            read_pos_end = read_ref_coord.get_read_pos_counting_from_end(
                (chrom, ref_pos))
            if read_group not in read_5_pos_cnts_rg:
                read_5_pos_cnts_rg[read_group] = IntCounter()
                read_3_pos_cnts_rg[read_group] = IntCounter()
                read_5_pos_box_rg[read_group] = IntBoxplot()
                read_3_pos_box_rg[read_group] = IntBoxplot()
            read_5_pos_cnts = read_5_pos_cnts_rg[read_group]
            read_3_pos_cnts = read_3_pos_cnts_rg[read_group]
            read_5_pos_box = read_5_pos_box_rg[read_group]
            read_3_pos_box = read_3_pos_box_rg[read_group]

            if (read_pos is not None
                    and (not max_pos or read_pos + 1 <= max_pos)):
                read_5_pos_cnts[read_pos + 1] += 1
                read_5_pos_box.append(read_pos + 1, snv_qual)
            if (read_pos_end is not None
                    and (not max_pos or abs(read_pos_end) <= max_pos)):
                read_3_pos_cnts[abs(read_pos_end)] += 1
                read_3_pos_box.append(abs(read_pos_end), snv_qual)

    return {
        '5_read_pos_counts': read_5_pos_cnts_rg,
        '3_read_pos_counts': read_3_pos_cnts_rg,
        '5_read_pos_boxplot': read_5_pos_box_rg,
        '3_read_pos_boxplot': read_3_pos_box_rg
    }
Ejemplo n.º 2
0
    def __init__(self,
                 vcf_fpath,
                 gq_threshold=None,
                 dp_threshold=100,
                 min_calls_for_pop_stats=DEF_MIN_CALLS_FOR_POP_STATS,
                 remarkable_coverages=None,
                 window_size=WINDOWS_SIZE):
        if remarkable_coverages is None:
            remarkable_depths = REMARKABLE_DEPTHS
        self.remarkable_depths = remarkable_depths

        self._reader = VCFReader(
            open(vcf_fpath), min_calls_for_pop_stats=min_calls_for_pop_stats)

        self._random_reader = pyvcfReader(filename=vcf_fpath)

        self.window_size = window_size
        self._gq_threshold = 0 if gq_threshold is None else gq_threshold

        self.dp_threshold = dp_threshold
        self._gt_qual_depth_counter = {HOM: IntBoxplot(), HET: IntBoxplot()}
        self._ac2d = _AlleleCounts2D()

        self.sample_dp_coincidence = {1: IntCounter()}
        for cov in remarkable_depths:
            self.sample_dp_coincidence[cov] = IntCounter()

        self.called_snvs = 0
        self.called_gts = IntCounter()

        # sample_counter
        self._sample_counters = {}

        for counter_name in SAMPLE_COUNTERS:
            if counter_name not in self._sample_counters:
                self._sample_counters[counter_name] = {}
            for sample in self._reader.samples:
                if counter_name in (GT_DEPTHS, GT_QUALS):
                    counters = {HOM: IntCounter(), HET: IntCounter()}
                else:
                    counters = IntCounter()
                self._sample_counters[counter_name][sample] = counters

        self._snv_counters = {
            MAFS: IntCounter(),
            MACS: IntCounter(),
            MAFS_DP: IntCounter(),
            SNV_QUALS: IntCounter(),
            HET_IN_SNP: IntCounter(),
            SNV_DENSITY: IntCounter(),
            INBREED_F_IN_SNP: IntCounter(),
            DEPTHS: IntCounter()
        }
        self._calculate()
Ejemplo n.º 3
0
    def test_int_boxplot(self):
        box = IntBoxplot()
        box.append(1, 50)
        box.append(1, 40)
        box.append(1, 30)
        box.append(1, 40)
        box.append(2, 30)
        box.append(2, 10)
        box.append(2, 20)
        box.append(2, 40)

        fhand = NamedTemporaryFile(suffix='.png')
        draw_int_boxplot(box, fhand=fhand)
Ejemplo n.º 4
0
def calc_snv_read_pos_stats2(sam, snvs, max_snps=None, max_pos=None):
    "this implementation is using pysam fetch"
    read_5_pos_cnts_rg = {}
    read_3_pos_cnts_rg = {}
    read_5_pos_box_rg = {}
    read_3_pos_box_rg = {}

    for index, snv in enumerate(snvs):
        if max_snps and index >= max_snps:
            break
        chrom = snv.chrom
        ref_pos = snv.pos
        snv_qual = snv.qual
        for alignment_read in sam.fetch(chrom, ref_pos, ref_pos + 1):
            try:
                read_group = alignment_read.opt('RG')
            except KeyError:
                read_group = None

            read_ref_coord = ReadRefCoord(alignment_read, sam)
            read_pos = read_ref_coord.get_read_pos((chrom, ref_pos))
            read_pos_end = read_ref_coord.get_read_pos_counting_from_end(
                (chrom, ref_pos))
            if read_group not in read_5_pos_cnts_rg:
                read_5_pos_cnts_rg[read_group] = IntCounter()
                read_3_pos_cnts_rg[read_group] = IntCounter()
                read_5_pos_box_rg[read_group] = IntBoxplot()
                read_3_pos_box_rg[read_group] = IntBoxplot()
            read_5_pos_cnts = read_5_pos_cnts_rg[read_group]
            read_3_pos_cnts = read_3_pos_cnts_rg[read_group]
            read_5_pos_box = read_5_pos_box_rg[read_group]
            read_3_pos_box = read_3_pos_box_rg[read_group]

            if (read_pos is not None
                    and (not max_pos or read_pos + 1 <= max_pos)):
                read_5_pos_cnts[read_pos + 1] += 1
                read_5_pos_box.append(read_pos + 1, snv_qual)
            if (read_pos_end is not None
                    and (not max_pos or abs(read_pos_end) <= max_pos)):
                read_3_pos_cnts[abs(read_pos_end)] += 1
                read_3_pos_box.append(abs(read_pos_end), snv_qual)

    return {
        '5_read_pos_counts': read_5_pos_cnts_rg,
        '3_read_pos_counts': read_3_pos_cnts_rg,
        '5_read_pos_boxplot': read_5_pos_box_rg,
        '3_read_pos_boxplot': read_3_pos_box_rg
    }
Ejemplo n.º 5
0
    def test_int_boxplot(self):
        box = IntBoxplot()
        box.append(1, 50)
        box.append(1, 40)
        box.append(1, 30)
        box.append(1, 40)
        box.append(2, 30)
        box.append(2, 10)
        box.append(2, 20)
        box.append(2, 40)

        fhand = NamedTemporaryFile(suffix='.png')
        draw_int_boxplot(box, fhand=fhand)
Ejemplo n.º 6
0
    def test_boxplot(self):
        'It does a bloxplot for integers'
        box = IntBoxplot()
        box.append(1, 50)
        box.append(1, 40)
        box.append(1, 30)
        box.append(1, 40)
        box.append(2, 30)
        box.append(2, 10)
        box.append(2, 20)
        box.append(2, 40)
        box.append('no distrib', 40)
        counts = box.aggregated_array
        assert sum(counts.values()) == 9

        plot = box.ascii_plot
        assert '2:10.0,15.0,25.0,35.0,40.0 <-----[============|=======' in plot
Ejemplo n.º 7
0
    def test_boxplot(self):
        'It does a bloxplot for integers'
        box = IntBoxplot()
        box.append(1, 50)
        box.append(1, 40)
        box.append(1, 30)
        box.append(1, 40)
        box.append(2, 30)
        box.append(2, 10)
        box.append(2, 20)
        box.append(2, 40)
        box.append('no distrib', 40)
        counts = box.aggregated_array
        assert sum(counts.values()) == 9

        plot = box.ascii_plot
        assert '2:10.0,15.0,25.0,35.0,40.0 <-----[============|=======' in plot