Esempio n. 1
0
    def test_subtract_with_gaps(self):
        """
        238c238
        < chr17	147132	147296	5:1.00|22:2.00|37:1.00|37:2.00|64:1.00	2.0	.	147180	224.0
        ---
        > chr17	147132	147260	5:1.00|22:2.00|37:1.00|37:2.00|28:1.00	2.0	.	147180	188.0
        
        experiment
        chr17 146970 147070
        chr17 147058 147158
        chr17 147132 147232
        chr17 147196 147296
        chr17 147303 147403
        chr17 147445 147545
        chr17 147461 147561

        control
        chr17 146904 147004
        chr17 147036 147136
        chr17 147261 147361
        chr17 147437 147537
        chr17 147472 147572
        chr17 147472 147572
        """        
        exp_cached = Cluster(cached=True) 
        control0 = Cluster(read=BED, cached=True)
        control1 = Cluster(read=BED, cached=True) 
        control2 = Cluster(read=BED, cached=True)
        control3 = Cluster(read=BED, cached=True)
        control4 = Cluster(read=BED, cached=True)

        #experiment.bed
        exp_cached.read_line("chr17	146970	147296	88:1.00|13:2.00|61:1.00|27:2.00|37:1.00|37:2.00|64:1.00	2.0	.	147139	404.0")
        #print exp_cached.write_line()
        #control.bed
        r = Region("chr17", 1, 2000)
        control0.read_line("chr17 146904 147004")
        control1.read_line("chr17 147036 147136")
        control2.read_line("chr17 147261 147361")
        control3.read_line("chr17 147437 147537")
        control4.read_line("chr17 147472 147572")

        r.add_tags([control1, control2])
        meta = r.get_metacluster()
        #print meta._profile
        exp_cached -= meta
Esempio n. 2
0
    def test_get_profile(self):
        r = Region(start=1, end=1999)
        c = Cluster(read=BED)
        c.read_line('chr4 1 40')
        r.add_tags(c, True)
        c = Cluster(read=BED, read_half_open=True)
        c.read_line('chr4 400 500')
        r.add_tags(c, True)

        meta = r.get_metacluster()
        self.assertEqual(meta._levels, [[40, 1.0], [360, 0.0], [100, 1.0]])
Esempio n. 3
0
 def test_FDR(self):
     r = Region('', 1, 1999)
     tags = []
     for i in range(0, 50):
         c = Cluster()
         c.read_line('chr4 %s %s 20:1'%(i, i+50))
         tags.append(c)
     r.add_tags(tags, True)
     c = Cluster()
     c.read_line('chr4 55555 55558 7:1')
     r.add_tags(c)
     result = r.get_FDR_clusters()
     self.assertEqual(len(result), 1)
Esempio n. 4
0
    def test_region_swap_rpkm(self):
        total_reads = 400000000
        total_reads_b = 500000000
        r = Region("chr1", 1, 1000, name2="bla")
        c = Cluster(read=BED)
        c.read_line('chr1 1 40')
        r.add_tags(c)
        c = Cluster(read=BED)
        c.read_line('chr1 2 40')
        r.add_tags(c)
        c = Cluster(read=BED)
        c.read_line('chr1 3 40')
        r.add_tags(c)
        r2 = Region("chr1", 1, 1000, name2="bla")
        c = Cluster(read=BED)
        c.read_line('chr1 100 140')
        r2.add_tags(c)
        c = Cluster(read=BED)
        c.read_line('chr1 101 140')
        r2.add_tags(c)
        c = Cluster(read=BED)
        c.read_line('chr1 102 140')
        r2.add_tags(c)

        swap1, swap2 = r.swap(r2)
        len(r.tags), len(r2.tags), len(swap1.tags), len(swap2.tags), r.rpkm(total_reads), r2.rpkm(total_reads_b), swap1.rpkm((total_reads+total_reads_b)/2), swap2.rpkm((total_reads+total_reads_b)/2) #TODO dont know how to test this
Esempio n. 5
0
    def test_normalized_counts(self):
        total_number_reads = 1e7
        region = Region("chr1", 1, 300)
        region_bed12 = Region("chr1", 1, 300, exome_size = 200)
        c = Cluster(read=BED)
        for i in range(0, 5):
            c.read_line("chr1 1 100")
            region.add_tags(c, True)
            region_bed12.add_tags(c, True)
            c.clear()

        self.assertEqual(region.normalized_counts(), 5.) #simple-counts
        self.assertEqual(region.normalized_counts(region_norm=True, total_n_norm=True, total_reads = total_number_reads), 1.666666666666667) #rpkm
        self.assertEqual(region_bed12.normalized_counts(region_norm=True, total_n_norm=True, total_reads = total_number_reads), 2.5) #rpkm with exon_size


        self.assertEqual(region.normalized_counts(pseudocount=True), 6.) #with pseudocounts
        self.assertEqual(region.normalized_counts(region_norm=True, total_n_norm=True, total_reads = total_number_reads, regions_analyzed=10000, pseudocount=True), 1.998001998001998)