def test_draw_histogram_in_axes(self): values = [1, 2, 3, 1, 2, 3, 2, 3, 2, 3, 2, 1, 4] fhand = NamedTemporaryFile(suffix='.png') counter = IntCounter(values) distrib = counter.calculate_distribution() axes, canvas = draw_histogram_in_axes(distrib['counts'], distrib['bin_limits'], kind=LINE, distrib_label='test') axes.legend() canvas.print_figure(fhand, format='png') fhand.flush() # raw_input(fhand.name) # ylimit test values = [1, 2, 3, 1, 2, 3, 2, 3, 2, 3, 2, 1, 4, 0, 5, 4, 4, 4, 4, 4] fhand = NamedTemporaryFile(suffix='.png') counter = IntCounter(values) distrib = counter.calculate_distribution() axes, canvas = draw_histogram_in_axes(distrib['counts'], distrib['bin_limits'], kind=LINE, distrib_label='test', ylimits=(None, 4)) axes.legend() canvas.print_figure(fhand, format='png') fhand.flush()
def test_draw_histogram_in_fhand(self): values = [1, 2, 3, 1, 2, 3, 2, 3, 2, 3, 2, 1, 4] fhand = NamedTemporaryFile(suffix='.png') counter = IntCounter(values) distrib = counter.calculate_distribution() draw_histogram_in_fhand(distrib['counts'], distrib['bin_limits'], fhand=fhand)
def test_distribution(self): 'It tests the histogram function' ints_counter = self.create_test_counter() distrib = ints_counter.calculate_distribution(bins=10, outlier_threshold=5) assert distrib['counts'] == [7L, 13L, 7L, 10L, 7L, 22L, 6L, 4L, 5L, 5L] assert distrib['bin_limits'] == [110, 118, 126, 134, 142, 150, 158, 166, 174, 182, 190] assert 'average' in str(ints_counter) ints_counter = IntCounter({0: 2, 1: 1, 3: 1}) result = [2, 1, 1] assert ints_counter.calculate_distribution(bins=3)['counts'] == result
def test_distribution(self): 'It tests the histogram function' ints_counter = self.create_test_counter() distrib = ints_counter.calculate_distribution(bins=10, outlier_threshold=5) assert distrib['counts'] == [7L, 13L, 7L, 10L, 7L, 22L, 6L, 4L, 5L, 5L] assert distrib['bin_limits'] == [ 110, 118, 126, 134, 142, 150, 158, 166, 174, 182, 190 ] assert 'average' in str(ints_counter) ints_counter = IntCounter({0: 2, 1: 1, 3: 1}) result = [2, 1, 1] assert ints_counter.calculate_distribution(bins=3)['counts'] == result
def show_distances_distributions(bam_fpath, n=None, kind_of_interest=None): bamfile = pysam.Samfile(bam_fpath) type1 = 0 type2a = [] type2b = [] type3a = [] type3b = [] type4 = [] type5 = [] type6 = 0 others = 0 h = 0 #It tries to find out the kind of each pair of sequences for grouped_mates in _group_alignments_by_reads(bamfile): if n is not None and h == n: break h += 1 mates_alignments = _split_mates(grouped_mates) i = 0 pair = [] for alignments_group in mates_alignments: i += 1 mate = _get_mate(i, mates_alignments) primary_mate = _get_primary_alignment(mate) primary_alignment = _get_primary_alignment(alignments_group) mates = [primary_alignment, primary_mate] if _read_is_totally_mapped(alignments_group, 0.05): if primary_alignment.mate_is_unmapped: kind = '1' else: if primary_alignment.rname != primary_alignment.rnext: kind = '6' else: if _mates_are_outies(mates): kind = '3a' elif _mates_are_innies(mates): kind = '2a' else: kind = '5' else: fragment = _find_secondary_fragment(alignments_group, 5, 100) if fragment is not None: fragments = [primary_alignment, fragment] if (_alignments_in_same_ref([fragments[0], primary_mate]) or _alignments_in_same_ref([fragments[1], primary_mate])): kind = '4' else: kind = 'other' else: if primary_alignment.is_unmapped: kind = '1' else: if primary_alignment.rname == primary_alignment.rnext: if _mates_are_outies(mates): kind = '3b' elif _mates_are_innies(mates): kind = '2b' else: kind = '5' else: kind = '6' pair.append(kind) if '1' in pair: type1 += 1 elif '6' in pair: type6 += 1 elif 'other' in pair: others += 1 else: distance = _find_distance(mates) if '4' in pair: type4.append(distance) elif '2b' in pair: type2b.append(distance) elif '3b' in pair: type3b.append(distance) elif '2a' in pair: type2a.append(distance) elif '3a' in pair: type3a.append(distance) elif '5' in pair: type5.append(distance) stats1 = {'1': type1, '6': type6, 'other': others} stats2 = {'4': type4, '2b': type2b, '3b': type3b, '2a': type2a, '3a': type3a, '5': type5} for key in stats1.keys(): print key.ljust(5), stats1[key] for key in stats2.keys(): print key.ljust(5), len(stats2[key]) for key in stats2.keys(): if key in kind_of_interest: print key, 'distance distribution' counter = IntCounter(iter(stats2[key])) distribution = counter.calculate_distribution(remove_outliers=True) counts = distribution['counts'] bin_limits = distribution['bin_limits'] print draw_histogram(bin_limits, counts)