Exemple #1
0
def show_distances_distributions(bam_fpath, n=None, kind_of_interest=None):
    bamfile = pysam.Samfile(bam_fpath)
    type1 = 0
    type2a = []
    type2b = []
    type3a = []
    type3b = []
    type4 = []
    type5 = []
    type6 = 0
    others = 0
    h = 0

    #It tries to find out the kind of each pair of sequences
    for grouped_mates in _group_alignments_by_reads(bamfile):
        if n is not None and h == n:
            break
        h += 1
        mates_alignments = _split_mates(grouped_mates)
        i = 0
        pair = []
        for alignments_group in mates_alignments:
            i += 1
            mate = _get_mate(i, mates_alignments)
            primary_mate = _get_primary_alignment(mate)
            primary_alignment = _get_primary_alignment(alignments_group)
            mates = [primary_alignment, primary_mate]
            if _read_is_totally_mapped(alignments_group, 0.05):
                if  primary_alignment.mate_is_unmapped:
                    kind = '1'
                else:
                    if primary_alignment.rname != primary_alignment.rnext:
                        kind = '6'
                    else:
                        if _mates_are_outies(mates):
                            kind = '3a'
                        elif _mates_are_innies(mates):
                            kind = '2a'
                        else:
                            kind = '5'
            else:
                fragment = _find_secondary_fragment(alignments_group, 5, 100)
                if fragment is not None:
                    fragments = [primary_alignment, fragment]
                    if (_alignments_in_same_ref([fragments[0], primary_mate])
                        or _alignments_in_same_ref([fragments[1], primary_mate])):
                        kind = '4'
                    else:
                        kind = 'other'
                else:
                    if primary_alignment.is_unmapped:
                        kind = '1'
                    else:
                        if primary_alignment.rname == primary_alignment.rnext:
                            if _mates_are_outies(mates):
                                kind = '3b'
                            elif _mates_are_innies(mates):
                                kind = '2b'
                            else:
                                kind = '5'
                        else:
                            kind = '6'
            pair.append(kind)
        if '1' in pair:
            type1 += 1
        elif '6' in pair:
            type6 += 1
        elif 'other' in pair:
            others += 1
        else:
            distance = _find_distance(mates)
            if '4' in pair:
                type4.append(distance)
            elif '2b' in pair:
                type2b.append(distance)
            elif '3b' in pair:
                type3b.append(distance)
            elif '2a' in pair:
                type2a.append(distance)
            elif '3a' in pair:
                type3a.append(distance)
            elif '5' in pair:
                type5.append(distance)
    stats1 = {'1': type1, '6': type6, 'other': others}
    stats2 = {'4': type4, '2b': type2b, '3b': type3b, '2a': type2a,
              '3a': type3a, '5': type5}
    for key in stats1.keys():
        print key.ljust(5), stats1[key]
    for key in stats2.keys():
        print key.ljust(5), len(stats2[key])
    for key in stats2.keys():
        if key in kind_of_interest:
            print key, 'distance distribution'
            counter = IntCounter(iter(stats2[key]))
            distribution = counter.calculate_distribution(remove_outliers=True)
            counts = distribution['counts']
            bin_limits = distribution['bin_limits']
            print draw_histogram(bin_limits, counts)
Exemple #2
0
 def test_ascii_histogram(self):
     'It plots an ASCII histogram'
     hist = draw_histogram(bin_limits=[-2, -1, 0, 1, 2],
                           counts=[9, 20, 30, 40])
     assert '[-2 , -1[ ( 9): ****************' in hist
Exemple #3
0
 def test_ascii_histogram(self):
     'It plots an ASCII histogram'
     hist = draw_histogram(bin_limits=[-2, -1, 0, 1, 2],
                           counts=[9, 20, 30, 40])
     assert '[-2 , -1[ ( 9): ****************' in hist