Esempio n. 1
0
    def test_draw_histogram_in_axes(self):
        values = [1, 2, 3, 1, 2, 3, 2, 3, 2, 3, 2, 1, 4]
        fhand = NamedTemporaryFile(suffix='.png')
        counter = IntCounter(values)
        distrib = counter.calculate_distribution()
        axes, canvas = draw_histogram_in_axes(distrib['counts'],
                                              distrib['bin_limits'],
                                              kind=LINE,
                                              distrib_label='test')
        axes.legend()
        canvas.print_figure(fhand, format='png')
        fhand.flush()
        # raw_input(fhand.name)

        # ylimit test
        values = [1, 2, 3, 1, 2, 3, 2, 3, 2, 3, 2, 1, 4, 0, 5, 4, 4, 4, 4, 4]
        fhand = NamedTemporaryFile(suffix='.png')
        counter = IntCounter(values)
        distrib = counter.calculate_distribution()
        axes, canvas = draw_histogram_in_axes(distrib['counts'],
                                              distrib['bin_limits'],
                                              kind=LINE,
                                              distrib_label='test',
                                              ylimits=(None, 4))
        axes.legend()
        canvas.print_figure(fhand, format='png')
        fhand.flush()
Esempio n. 2
0
    def test_draw_histogram_in_axes(self):
        values = [1, 2, 3, 1, 2, 3, 2, 3, 2, 3, 2, 1, 4]
        fhand = NamedTemporaryFile(suffix='.png')
        counter = IntCounter(values)
        distrib = counter.calculate_distribution()
        axes, canvas = draw_histogram_in_axes(distrib['counts'],
                                              distrib['bin_limits'],
                                              kind=LINE,
                                              distrib_label='test')
        axes.legend()
        canvas.print_figure(fhand, format='png')
        fhand.flush()
        # raw_input(fhand.name)

        # ylimit test
        values = [1, 2, 3, 1, 2, 3, 2, 3, 2, 3, 2, 1, 4, 0, 5, 4, 4, 4, 4, 4]
        fhand = NamedTemporaryFile(suffix='.png')
        counter = IntCounter(values)
        distrib = counter.calculate_distribution()
        axes, canvas = draw_histogram_in_axes(distrib['counts'],
                                              distrib['bin_limits'],
                                              kind=LINE,
                                              distrib_label='test',
                                              ylimits=(None, 4))
        axes.legend()
        canvas.print_figure(fhand, format='png')
        fhand.flush()
Esempio n. 3
0
 def test_draw_histogram_in_fhand(self):
     values = [1, 2, 3, 1, 2, 3, 2, 3, 2, 3, 2, 1, 4]
     fhand = NamedTemporaryFile(suffix='.png')
     counter = IntCounter(values)
     distrib = counter.calculate_distribution()
     draw_histogram_in_fhand(distrib['counts'], distrib['bin_limits'],
                             fhand=fhand)
Esempio n. 4
0
 def test_draw_histogram_in_fhand(self):
     values = [1, 2, 3, 1, 2, 3, 2, 3, 2, 3, 2, 1, 4]
     fhand = NamedTemporaryFile(suffix='.png')
     counter = IntCounter(values)
     distrib = counter.calculate_distribution()
     draw_histogram_in_fhand(distrib['counts'],
                             distrib['bin_limits'],
                             fhand=fhand)
Esempio n. 5
0
    def test_distribution(self):
        'It tests the histogram function'

        ints_counter = self.create_test_counter()
        distrib = ints_counter.calculate_distribution(bins=10,
                                                        outlier_threshold=5)

        assert distrib['counts'] == [7L, 13L, 7L, 10L, 7L, 22L, 6L, 4L, 5L,
                                      5L]
        assert distrib['bin_limits'] == [110, 118, 126, 134, 142, 150, 158,
                                         166, 174, 182, 190]
        assert 'average' in str(ints_counter)

        ints_counter = IntCounter({0: 2, 1: 1, 3: 1})
        result = [2, 1, 1]
        assert ints_counter.calculate_distribution(bins=3)['counts'] == result
Esempio n. 6
0
    def test_distribution(self):
        'It tests the histogram function'

        ints_counter = self.create_test_counter()
        distrib = ints_counter.calculate_distribution(bins=10,
                                                      outlier_threshold=5)

        assert distrib['counts'] == [7L, 13L, 7L, 10L, 7L, 22L, 6L, 4L, 5L, 5L]
        assert distrib['bin_limits'] == [
            110, 118, 126, 134, 142, 150, 158, 166, 174, 182, 190
        ]
        assert 'average' in str(ints_counter)

        ints_counter = IntCounter({0: 2, 1: 1, 3: 1})
        result = [2, 1, 1]
        assert ints_counter.calculate_distribution(bins=3)['counts'] == result
Esempio n. 7
0
def show_distances_distributions(bam_fpath, n=None, kind_of_interest=None):
    bamfile = pysam.Samfile(bam_fpath)
    type1 = 0
    type2a = []
    type2b = []
    type3a = []
    type3b = []
    type4 = []
    type5 = []
    type6 = 0
    others = 0
    h = 0

    #It tries to find out the kind of each pair of sequences
    for grouped_mates in _group_alignments_by_reads(bamfile):
        if n is not None and h == n:
            break
        h += 1
        mates_alignments = _split_mates(grouped_mates)
        i = 0
        pair = []
        for alignments_group in mates_alignments:
            i += 1
            mate = _get_mate(i, mates_alignments)
            primary_mate = _get_primary_alignment(mate)
            primary_alignment = _get_primary_alignment(alignments_group)
            mates = [primary_alignment, primary_mate]
            if _read_is_totally_mapped(alignments_group, 0.05):
                if  primary_alignment.mate_is_unmapped:
                    kind = '1'
                else:
                    if primary_alignment.rname != primary_alignment.rnext:
                        kind = '6'
                    else:
                        if _mates_are_outies(mates):
                            kind = '3a'
                        elif _mates_are_innies(mates):
                            kind = '2a'
                        else:
                            kind = '5'
            else:
                fragment = _find_secondary_fragment(alignments_group, 5, 100)
                if fragment is not None:
                    fragments = [primary_alignment, fragment]
                    if (_alignments_in_same_ref([fragments[0], primary_mate])
                        or _alignments_in_same_ref([fragments[1], primary_mate])):
                        kind = '4'
                    else:
                        kind = 'other'
                else:
                    if primary_alignment.is_unmapped:
                        kind = '1'
                    else:
                        if primary_alignment.rname == primary_alignment.rnext:
                            if _mates_are_outies(mates):
                                kind = '3b'
                            elif _mates_are_innies(mates):
                                kind = '2b'
                            else:
                                kind = '5'
                        else:
                            kind = '6'
            pair.append(kind)
        if '1' in pair:
            type1 += 1
        elif '6' in pair:
            type6 += 1
        elif 'other' in pair:
            others += 1
        else:
            distance = _find_distance(mates)
            if '4' in pair:
                type4.append(distance)
            elif '2b' in pair:
                type2b.append(distance)
            elif '3b' in pair:
                type3b.append(distance)
            elif '2a' in pair:
                type2a.append(distance)
            elif '3a' in pair:
                type3a.append(distance)
            elif '5' in pair:
                type5.append(distance)
    stats1 = {'1': type1, '6': type6, 'other': others}
    stats2 = {'4': type4, '2b': type2b, '3b': type3b, '2a': type2a,
              '3a': type3a, '5': type5}
    for key in stats1.keys():
        print key.ljust(5), stats1[key]
    for key in stats2.keys():
        print key.ljust(5), len(stats2[key])
    for key in stats2.keys():
        if key in kind_of_interest:
            print key, 'distance distribution'
            counter = IntCounter(iter(stats2[key]))
            distribution = counter.calculate_distribution(remove_outliers=True)
            counts = distribution['counts']
            bin_limits = distribution['bin_limits']
            print draw_histogram(bin_limits, counts)