def test_sub_and_print(self):
     cluster = Cluster()
     cluster2 = Cluster(write_half_open=False)
     cluster.read_line('chr1    1  1000  10:2|10:4|80:5|500:7|100:7|100:5')
     cluster2.read_line('chr1    11  1000  10:4|80:5|500:6|100:7|99:5|1:4.99')
     cluster2 = cluster - cluster2
     self.assertEqual(cluster2.write_line(), 'chr1\t1\t10\t10:2.00\t2.0\t.\t5\t20.0\nchr1\t101\t600\t500:1.00\t1.0\t.\t350\t500.0\nchr1\t800\t800\t1:0.01\t0.01\t.\t800\t0.01\n')
 def test_is_contiguous_wig(self):
     cluster = Cluster(read=WIG,  read_half_open=True)
     cluster.read_line('chr1    1599888 1599949 1.77')
     cluster2 = Cluster(read=WIG, write=WIG,  read_half_open=True)
     cluster2.read_line('chr1    1599949 1600001 2.65')
     #print cluster2.write_line()
     self.assertTrue(cluster.is_contiguous(cluster2))
 def test_subtract_with_gaps(self):
     cluster1 = Cluster()
     cluster2 = Cluster()
     cluster1.read_line("chr2 1 100 30:1|50:2|40:1|3000:3")
     cluster2.read_line("chr2 1 100 30:1|50:0|40:1|200:0|5000:1")
     cluster1 -= cluster2
     self.assertEqual(cluster1._profile, [[50, 2.0], [40, 0.0], [200, 3.0], [2800, 2.0]])
 def test_add_pk(self):
     cluster1 = Cluster(read=PK)
     cluster2 = Cluster(read=PK)
     cluster1.read_line('chr1\t1\t145\t9:2.00|41:3.00|50:2.00|45:1.00\n')
     cluster2.read_line('chr1\t1\t125\t9:4.00|41:3.00|30:2.00|45:1.00\n')
     result = cluster1 + cluster2
     self.assertEqual(result.write_line(), 'chr1\t1\t145\t50:6.00|30:4.00|20:3.00|25:2.00|20:1.00\t6.0\t.\t25\t550.0\n')
 def test_bug_contiguous_peaks(self):
     cluster = Cluster(rounding=True, read=PK, write=PK)
     cluster2 = Cluster(rounding=True, read=PK, write=PK)
     cluster.read_line('chr1    849917  850408  8:2|10:4|80:5|23:6|29:7|8:5|10:3|39:2|12:3|29:4|5:3|18:4|41:3|30:4|15:5|12:4|34:3|59:2|30:1')
     cluster2.read_line('chr1    850408  850648  66:2|25:3|59:4|66:2|25:1        +')
     result = cluster + cluster2
     self.assertTrue(cluster.intersects(cluster2))
Exemple #6
0
    def get_overlaping_clusters(self, region, overlap=1):
        clusters = []
        bam_tell, read_start = self.get_bam_tell(region)
        print "TELL", bam_tell, read_start
        if bam_tell or region.start < LINEAR_SIZE*4:
            r = BamReader(self.bam_path, self.logger, bam_tell, read_start, self.chr_dict_inv)
            for line in r:
                c = Cluster(read=SAM, cached=False, read_half_open=self.read_half_open, rounding=self.rounding)
                try:
                    c.read_line(line)
                except InvalidLine:
                    print "Invalid line, .bam or .bai corrupt"
                    break

                if c.overlap(region) >= overlap:                
                    clusters.append(c)
                elif c.start > region.end or c.name != region.name:
                    break

        if len(clusters) > 0:
            print "Num clusters", len(clusters)
            print "first:", clusters[0].start, clusters[0].end
            if len(clusters) > 1: print "end:", clusters[-1].start, clusters[-1].end
        else:
            print "No clusters found!"
        print

        return clusters
 def test_wig_read_write(self):
     cluster = Cluster(read=WIG, write=WIG, read_half_open=True, write_half_open=True, rounding=True)
     cluster.read_line('chr2 1 10 1')
     cluster.read_line('chr2 10 16 2')
     cluster.read_line('chr2 16 26 1')
     self.assertEqual(cluster.write_line(), 'chr2\t1\t10\t1\nchr2\t10\t16\t2\nchr2\t16\t26\t1\n')
     cluster.write_as(WIG, False)
     self.assertEqual(cluster.write_line(), 'chr2\t2\t10\t1\nchr2\t11\t16\t2\nchr2\t17\t26\t1\n')
 def test_sub_fast(self):
    #random/experiment.pk 
    experiment = Cluster(rounding=True)       
    experiment.read_line("chr1   1     1107     101:2|7:1  2.0     .       263     238.0")   
    control = Cluster() 
    control.read_line(  "chr1   46    1222      47:1|54:2|47:1 2.0     .       71331   202.0")
    experiment -= control
    self.assertEqual(experiment.write_line(), 'chr1\t1\t92\t45:2|47:1\t2.0\t.\t23\t137.0\n')#chr1    1       92      45:2|47:1       2.0     .       23      137.0
 def test_is_significant(self):
     cluster = Cluster(rounding=True)
     cluster.read_line('chr1 1 15 4:1|1:2|2:1|3:4|2:5|2:2|1:1') #area 35
     self.assertTrue(cluster.is_significant(5, "numreads"))
     self.assertTrue(cluster.is_significant(34, "numreads"))
     self.assertFalse(cluster.is_significant(36, "numreads"))
     self.assertFalse(cluster.is_significant(20))
     self.assertTrue(cluster.is_significant(1))
     self.assertTrue(cluster.is_significant(5))
Exemple #10
0
 def test_extend(self):
     cluster = Cluster(read=BED)
     cluster.read_line('chr3 1 35 noname 666 +')
     cluster2 = Cluster(read=BED)
     cluster2.read_line('chr3 56 100 noname 666 -')
     cluster.extend(100)
     cluster2.extend(100)
     self.assertEqual(cluster.write_line(), 'chr3\t1\t100\t100:1.00\t1.0\t+\t50\t100.0\n')
     self.assertEqual(cluster2.write_line(), 'chr3\t1\t100\t100:1.00\t1.0\t-\t50\t100.0\n')
Exemple #11
0
 def test_extend_bug(self):
     cluster = Cluster(read=BED, write=PK)
     cluster.read_line('chr3 1 35 666 noname +')
     cluster2 = Cluster(read=BED, write=PK)
     cluster2.read_line('chr3 156 200 666 noname -')
     cluster.extend(100)
     cluster2.extend(100)
     result = cluster + cluster2
     self.assertEqual(200, len(result))
Exemple #12
0
 def test_split_subtract_result(self):
     sub_result = Cluster(write_half_open=True, cached=True)
     sub_result.read_line('chr4 1 300 20:1|40:0|20:3|20:0.3|10:-6|80:1|10:0')
     clusters = sub_result.absolute_split(threshold=0)
     result = []
     result.append('chr4\t0\t20\t20:1.00\t1.0\t.\t10\t20.0\n')
     result.append('chr4\t60\t100\t20:3.00|20:0.30\t3.0\t.\t70\t66.0\n')
     result.append('chr4\t110\t190\t80:1.00\t1.0\t.\t150\t80.0\n')
     for i in range(0, len(clusters)):
         self.assertEqual(clusters[i].write_line(), result[i])
Exemple #13
0
 def test_intersects(self):
     self.assertTrue(Cluster('chr1', 1, 10).intersects(Cluster('chr1', 10, 14)))
     self.assertFalse(Cluster('chr2', 1, 10).intersects(Cluster('chr1', 4, 14)))
     self.assertTrue(Cluster('chr1', 1, 10).intersects(Cluster('chr1', 4, 14)))
     cluster = Cluster(read=PK, rounding=True)
     cluster.read_line('chr1  1 100 100:1')
     cluster2 = Cluster(read=PK,rounding=True)
     cluster2.read_line('chr1 100 199 100:1')
     result = cluster + cluster2
     self.assertTrue(cluster.intersects(cluster2))
Exemple #14
0
    def test_get_profile(self):
        r = Region(start=1, end=1999)
        c = Cluster(read=BED)
        c.read_line('chr4 1 40')
        r.add_tags(c, True)
        c = Cluster(read=BED, read_half_open=True)
        c.read_line('chr4 400 500')
        r.add_tags(c, True)

        meta = r.get_metacluster()
        self.assertEqual(meta._levels, [[40, 1.0], [360, 0.0], [100, 1.0]])
Exemple #15
0
    def test_split(self):
        double_cluster = Cluster(rounding=True)
        double_cluster.read_line('chr1  100  215  5:1|10:5|5:7|5:80|5:1|5:40|15:1|10:2|5:3|5:8|10:6|10:5|10:4|10:3|6:2')

        results = double_cluster.split(0.01)
        correct_clusters = [Cluster(rounding=True), Cluster(rounding=True), Cluster(rounding=True)]
        correct_clusters[0].read_line('chr1    100      125      5:1|10:5|5:7|5:80|2:1')
        correct_clusters[1].read_line('chr1    128      141      2:1|5:40|7:1')
        correct_clusters[2].read_line('chr1    143      215      7:1|10:2|5:3|5:8|10:6|10:5|10:4|10:3|6:2')
        for i in range (0,len(correct_clusters)):
            self.assertEqual(results[i].write_line(), correct_clusters[i].write_line())
Exemple #16
0
    def test_simple_ucsc_representation(self):
        """Confirmed visually at the UCSC browser

        track name=simple_read visibility=full
        chr3 101 200 noname 555 +
        track type=wiggle_0 name=the_test visibility=full
        chr3 101 200 1
        """
        cluster = Cluster(read=BED, write=WIG, read_half_open=True, write_half_open=True, rounding = True, cached=True)
        
        cluster.read_line('chr3 101 200 noname 555 +')
        self.assertEqual(cluster.write_line(), 'chr3\t101\t200\t1\n')
Exemple #17
0
 def test_FDR(self):
     r = Region('', 1, 1999)
     tags = []
     for i in range(0, 50):
         c = Cluster()
         c.read_line('chr4 %s %s 20:1'%(i, i+50))
         tags.append(c)
     r.add_tags(tags, True)
     c = Cluster()
     c.read_line('chr4 55555 55558 7:1')
     r.add_tags(c)
     result = r.get_FDR_clusters()
     self.assertEqual(len(result), 1)
Exemple #18
0
    def read_and_extend(self, cluster, line, extension):
        cluster_aux = Cluster(read=BED, write=BED, read_half_open=True, write_half_open=True, rounding = True)
        if cluster.is_empty():
            cluster.read_line(line)
            cluster.extend(extension)
            #print cluster.write_line(), cluster._profile
        else:
            cluster_aux.read_line(line)
            cluster_aux.extend(extension)
            #print cluster_aux.write_line(), cluster_aux._profile
            cluster += cluster_aux

        #print cluster.write_line()
        return cluster
Exemple #19
0
    def test_bed_to_half_open_wig(self):
        """Confirmed visually at the UCSC browser

        track name=simple_cluster visibility=full
        chr1 1 100 hola 666  +
        chr1 10 130 hola 666 +
        track type=wiggle_0 name=the_test  visibility=full
        chr1    1       10      1
        chr1    10      100     2
        chr1    100     130     1
        """
        cluster = Cluster(read=BED, write=WIG, read_half_open=True, write_half_open=True, rounding = True, cached=True)
        cluster.read_line('chr1 1 100 hola 666  +')
        cluster.read_line('chr1 10 130 hola 666 +')
        self.assertEqual(cluster.write_line(), 'chr1\t1\t10\t1\nchr1\t10\t100\t2\nchr1\t100\t130\t1\n')
Exemple #20
0
    def test_normalized_counts(self):
        total_number_reads = 1e7
        region = Region("chr1", 1, 300)
        region_bed12 = Region("chr1", 1, 300, exome_size = 200)
        c = Cluster(read=BED)
        for i in range(0, 5):
            c.read_line("chr1 1 100")
            region.add_tags(c, True)
            region_bed12.add_tags(c, True)
            c.clear()

        self.assertEqual(region.normalized_counts(), 5.) #simple-counts
        self.assertEqual(region.normalized_counts(region_norm=True, total_n_norm=True, total_reads = total_number_reads), 1.666666666666667) #rpkm
        self.assertEqual(region_bed12.normalized_counts(region_norm=True, total_n_norm=True, total_reads = total_number_reads), 2.5) #rpkm with exon_size


        self.assertEqual(region.normalized_counts(pseudocount=True), 6.) #with pseudocounts
        self.assertEqual(region.normalized_counts(region_norm=True, total_n_norm=True, total_reads = total_number_reads, regions_analyzed=10000, pseudocount=True), 1.998001998001998)
Exemple #21
0
 def test_add2(self):
     cluster =  Cluster(read=BED)
     cluster.read_line('chr1 1 20000 666 hola +')
     cluster.read_line('chr1 1 20000 666 hola +')
     cluster.read_line('chr1 1 20000 666 hola +')
     cluster.read_line('chr1 1001 20000 666 hola +')
     self.assertEqual(cluster.write_line(), 'chr1\t1\t20000\t1000:3.00|19000:4.00\t4.0\t+\t10500\t79000.0\n')
Exemple #22
0
    def get_overlaping_clusters(self, region, overlap=1):    
        clusters = []
        self.logger.debug('Launching Samtools for %s...'%region)
        proc = subprocess.Popen("samtools view %s %s:%s-%s"%(self.bam_path, region.name, region.start, region.end), stdout=subprocess.PIPE, shell=True)
        out, err = proc.communicate()
        self.logger.debug('... done')
        lines = filter(None, out.split("\n"))
        self.logger.debug('Numlines in %s: %s'%(region, len(lines)))
        for line in lines:
            c = Cluster(read=SAM, cached=False, read_half_open=self.read_half_open, rounding=self.rounding)
            try:
                c.read_line(line)
            except InvalidLine:
                print "Invalid line, .bam or .bai corrupt"
                break

            if c.overlap(region) >= overlap:                
                clusters.append(c)
            elif c.start > region.end or c.name != region.name:
                break

        return clusters
Exemple #23
0
 def test_strand_add(self):
     c = Cluster(read=BED, cached=True)
     c2 = Cluster(read=BED, cached=True)
     c3 = Cluster(read=BED, cached=True)
     c4 = Cluster(read=BED, cached=True)
     c.read_line("chr1 1 100 0 0 +")
     c2.read_line("chr1 1 100 0 0 +")
     c3.read_line("chr1 1 100 0 0 -")
     c4.read_line("chr1 1 100 0 0 -")
     plus = c + c2
     minus = c3 + c4
     dot = c2 + c3
     self.assertEqual(plus.strand, PLUS_STRAND)
     self.assertEqual(dot.strand, NO_STRAND)
     self.assertEqual(minus.strand, MINUS_STRAND)
Exemple #24
0
 def test_eq(self):
     cluster = Cluster(read=PK)
     cluster2 = Cluster(read=PK)
     cluster3 = Cluster(read=PK)
     cluster.read_line('chr1 1 15 4:1|1:2|2:1|3:4|2:5|2:2|1:1')
     cluster2.read_line('chr1 1 15 4:1|1:2|2:1|3:4|2:5|2:2|1:1')
     cluster3.read_line('chr1 1 15 4:1|1:2|2:1|3:4|2:5|2:2')
     self.assertEqual(cluster, cluster2)
     self.assertNotEqual(cluster, cluster3)
Exemple #25
0
 def test_internal_representations(self):
     "Este test prueba la integridad interna de los datos. Si no pasa, MAL"
     bed = Cluster(read=BED, cached=True, read_half_open=True)
     bed.read_line('chr1 0 100 2345 hola +')
     bed._flush_tag_cache()
     pk = Cluster(read=PK)
     pk.read_line('chr1 1 100 100:1')
     open_wig = Cluster(read=WIG, read_half_open=True)
     open_wig.read_line('chr1 0 100 1')
     wig= Cluster(read=WIG)
     wig.read_line('chr1 1 100 1')
     self.assertEqual(bed._levels, pk._levels)
     self.assertEqual(wig._levels, pk._levels)
     self.assertEqual(wig._levels, bed._levels)
     self.assertEqual(wig._levels, open_wig._levels)
     self.assertEqual(wig.start, open_wig.start)
     self.assertEqual(wig.end, open_wig.end)
     self.assertEqual(wig._levels[0], [100, 1.0])
Exemple #26
0
 def test_is_artifact(self):
     artifact = Cluster(read=PK, write=PK, rounding=True)
     artifact.read_line('chr1        1        111        5:1|100:4|6:2')
     almost_not_artifact = Cluster(read=PK, write=PK, rounding=True)
     almost_not_artifact.read_line('chr1        1        100        35:1|30:4|35:1')
     almost_artifact = Cluster(read=PK, write=PK, rounding=True)
     almost_artifact.read_line('chr1        1        99        45:1|25:4|30:1')
     almost_artifact._recalculate_end()
     self.assertTrue(artifact.is_artifact())
     self.assertFalse(almost_not_artifact.is_artifact()) #with the changes for version 0.6.1, this line IS an artifact
     self.assertFalse(almost_artifact.is_artifact())
Exemple #27
0
    def test_bed_half_open_to_wig_half_open2(self):
        cluster = Cluster(read=BED, write=WIG, read_half_open=True, write_half_open=True, rounding=True, cached=True)
        cluster2 = Cluster(read=BED, write=WIG, read_half_open=True, write_half_open=True, rounding=True, cached=True)
        cluster3 = Cluster(read=BED, write=WIG, read_half_open=True, write_half_open=True, rounding=True, cached=True)
        result = Cluster(read=BED, write=WIG, read_half_open=True, write_half_open=True, rounding=True)
        extension = 130
        result.read_line('chr1    156     192     id:7043691      1000    +')
        result.extend(extension)
        cluster.read_line('chr1    241     277     id:916714       1000    +')
        cluster.extend(extension)
        result +=cluster
        cluster2.read_line('chr1    241     277     id:916714       1000    +')
        cluster2.extend(extension)
        result +=cluster2
        cluster3.read_line('chr1    241     277     id:6880101      1000    +')
        cluster3.extend(extension)
        result +=cluster3

        self.assertEqual(result.write_line(), 'chr1\t156\t241\t1\nchr1\t241\t286\t4\nchr1\t286\t371\t3\n')
Exemple #28
0
 def test_is_empty(self):
     cluster = Cluster(read=BED)
     self.assertEqual(cluster.is_empty(), True)
     cluster.read_line('chr1 10 130 666 hola +')
     self.assertEqual(cluster.is_empty(), False)
     cluster = Cluster(read=PK)
     self.assertEqual(cluster.is_empty(), True)
     cluster.read_line('chr1 1 15 4:1|1:2|2:1|3:4|2:5|2:2|1:1')
     self.assertEqual(cluster.is_empty(), False)
     cluster2 = Cluster(read=PK)
     cluster2.read_line('chr1 1 15 4:1|1:2|2:1|3:4|2:5|2:2|1:1')
     result = cluster - cluster2
     self.assertEqual(result.is_empty(), True)
Exemple #29
0
 def test_comparison(self):
     cluster = Region("chr1", 1, 100)
     clusterdup = Cluster(read=BED)
     clusterdup.read_line("chr1 1 100")
     cluster2 = Cluster(read=BED)
     cluster2.read_line("chr4 1000 1010")
     cluster3 = Cluster(read=BED)
     cluster3.read_line("chr5 3 103")
     self.assertTrue(cluster < cluster2)
     self.assertTrue(cluster2 < cluster3)
     self.assertFalse(cluster > cluster3)
     self.assertFalse(cluster < clusterdup)
     self.assertTrue(cluster <= clusterdup)
Exemple #30
0
    def test_bed_to_wig_extended(self):
        """Confirmed visually at the UCSC browser

        track name=unextended_cluster visibility=full
        chr1 1 36 hola 666  +
        chr1 2 37 hola 666  +
        chr1 71 106 hola 666 -
        chr1 73 108 hola 666 -
        track name=extended_cluster visibility=full
        chr1 1 101 hola 666  +
        chr1 2 102 hola 666  +
        chr1 6 106 hola 666 -
        chr1 8 108 hola 666 -
        track type=wiggle_0 name=extended_wig  visibility=full
        chr1    1       2       1
        chr1    2       6       2
        chr1    6       8       3
        chr1    8       101     4
        chr1    101     102     3
        chr1    102     106     2
        chr1    106     108     1
        """
        cluster = Cluster(read=BED, write=WIG, read_half_open=True, write_half_open=True, rounding = True)
        cluster = self.read_and_extend(cluster, 'chr1 1 36 hola 666  +', 100)
        cluster = self.read_and_extend(cluster, 'chr1 2 37 hola 666  +', 100)
        cluster = self.read_and_extend(cluster, 'chr1 71 106 hola 666 -', 100)
        cluster = self.read_and_extend(cluster, 'chr1 73 108 hola 666 -', 100)
        result = Cluster(read=WIG, write=WIG, read_half_open=True, write_half_open=True, rounding = True)
        result.read_line('chr1    1       2       1')
        result.read_line('chr1    2       6       2')
        result.read_line('chr1    6       8       3')
        result.read_line('chr1    8       101     4')
        result.read_line('chr1    101     102     3')
        result.read_line('chr1    102     106     2')
        result.read_line('chr1    106     108     1')
        self.assertEqual(cluster.write_line(), result.write_line())