def test_subtract_with_gaps(self): """ 238c238 < chr17 147132 147296 5:1.00|22:2.00|37:1.00|37:2.00|64:1.00 2.0 . 147180 224.0 --- > chr17 147132 147260 5:1.00|22:2.00|37:1.00|37:2.00|28:1.00 2.0 . 147180 188.0 experiment chr17 146970 147070 chr17 147058 147158 chr17 147132 147232 chr17 147196 147296 chr17 147303 147403 chr17 147445 147545 chr17 147461 147561 control chr17 146904 147004 chr17 147036 147136 chr17 147261 147361 chr17 147437 147537 chr17 147472 147572 chr17 147472 147572 """ exp_cached = Cluster(cached=True) control0 = Cluster(read=BED, cached=True) control1 = Cluster(read=BED, cached=True) control2 = Cluster(read=BED, cached=True) control3 = Cluster(read=BED, cached=True) control4 = Cluster(read=BED, cached=True) #experiment.bed exp_cached.read_line("chr17 146970 147296 88:1.00|13:2.00|61:1.00|27:2.00|37:1.00|37:2.00|64:1.00 2.0 . 147139 404.0") #print exp_cached.write_line() #control.bed r = Region("chr17", 1, 2000) control0.read_line("chr17 146904 147004") control1.read_line("chr17 147036 147136") control2.read_line("chr17 147261 147361") control3.read_line("chr17 147437 147537") control4.read_line("chr17 147472 147572") r.add_tags([control1, control2]) meta = r.get_metacluster() #print meta._profile exp_cached -= meta
def test_get_profile(self): r = Region(start=1, end=1999) c = Cluster(read=BED) c.read_line('chr4 1 40') r.add_tags(c, True) c = Cluster(read=BED, read_half_open=True) c.read_line('chr4 400 500') r.add_tags(c, True) meta = r.get_metacluster() self.assertEqual(meta._levels, [[40, 1.0], [360, 0.0], [100, 1.0]])
def test_FDR(self): r = Region('', 1, 1999) tags = [] for i in range(0, 50): c = Cluster() c.read_line('chr4 %s %s 20:1'%(i, i+50)) tags.append(c) r.add_tags(tags, True) c = Cluster() c.read_line('chr4 55555 55558 7:1') r.add_tags(c) result = r.get_FDR_clusters() self.assertEqual(len(result), 1)
def test_region_swap_rpkm(self): total_reads = 400000000 total_reads_b = 500000000 r = Region("chr1", 1, 1000, name2="bla") c = Cluster(read=BED) c.read_line('chr1 1 40') r.add_tags(c) c = Cluster(read=BED) c.read_line('chr1 2 40') r.add_tags(c) c = Cluster(read=BED) c.read_line('chr1 3 40') r.add_tags(c) r2 = Region("chr1", 1, 1000, name2="bla") c = Cluster(read=BED) c.read_line('chr1 100 140') r2.add_tags(c) c = Cluster(read=BED) c.read_line('chr1 101 140') r2.add_tags(c) c = Cluster(read=BED) c.read_line('chr1 102 140') r2.add_tags(c) swap1, swap2 = r.swap(r2) len(r.tags), len(r2.tags), len(swap1.tags), len(swap2.tags), r.rpkm(total_reads), r2.rpkm(total_reads_b), swap1.rpkm((total_reads+total_reads_b)/2), swap2.rpkm((total_reads+total_reads_b)/2) #TODO dont know how to test this
def test_normalized_counts(self): total_number_reads = 1e7 region = Region("chr1", 1, 300) region_bed12 = Region("chr1", 1, 300, exome_size = 200) c = Cluster(read=BED) for i in range(0, 5): c.read_line("chr1 1 100") region.add_tags(c, True) region_bed12.add_tags(c, True) c.clear() self.assertEqual(region.normalized_counts(), 5.) #simple-counts self.assertEqual(region.normalized_counts(region_norm=True, total_n_norm=True, total_reads = total_number_reads), 1.666666666666667) #rpkm self.assertEqual(region_bed12.normalized_counts(region_norm=True, total_n_norm=True, total_reads = total_number_reads), 2.5) #rpkm with exon_size self.assertEqual(region.normalized_counts(pseudocount=True), 6.) #with pseudocounts self.assertEqual(region.normalized_counts(region_norm=True, total_n_norm=True, total_reads = total_number_reads, regions_analyzed=10000, pseudocount=True), 1.998001998001998)