コード例 #1
0
ファイル: test_io.py プロジェクト: mkiyer/pytrackfactory
 def test_interval_to_array(self):        
     """Testing interval to array code"""
     ref = "chr1"        
     endpos = 1000        
     chunkstep = 10
     dtype = "i"
     # test different interval sizes
     for intervalsize in xrange(1,100,10):
         fullarr = np.zeros((endpos+intervalsize,1), dtype=dtype)
         intervals = []
         val = 0
         for start in xrange(0, endpos, intervalsize):
             intervals.append(Interval(ref, start, start+intervalsize, POS_STRAND, val))
             fullarr[start:start+intervalsize,0] = val
             val += 2
         # test different chunk sizes
         for chunksize in xrange(chunkstep, endpos, chunkstep):
             testarr = np.zeros((endpos+intervalsize,1), dtype=dtype)
             write_interval_data_to_array(iter(intervals), 
                                          {"chr1": testarr}, 
                                          dtype=dtype, 
                                          chunksize=chunksize,
                                          num_channels=1,
                                          channel_dict=get_channel_dict())
             self.assertTrue(np.all(testarr == fullarr))   
     #
     # test intervals on different chromosomes
     #
     refiter = itertools.cycle(itertools.chain(itertools.repeat("chr1", 3),
                                               itertools.repeat("chr2", 3) ,                                       
                                               itertools.repeat("chr3", 3)))                                        
     for intervalsize in (1,10,50,100):
         #print "intervalsize", intervalsize
         fullarr = {"chr1":np.zeros((endpos+intervalsize,1), dtype=dtype),
                    "chr2":np.zeros((endpos+intervalsize,1), dtype=dtype),
                    "chr3":np.zeros((endpos+intervalsize,1), dtype=dtype)}                       
         intervals = []
         val = 0
         for start in xrange(0, endpos, intervalsize):
             ref = refiter.next()
             intervals.append(Interval(ref, start, start+intervalsize, POS_STRAND, val))
             fullarr[ref][start:start+intervalsize] = val
             val += 2
         # test different chunk sizes
         for chunksize in (1, 8, 16, 32, 64, 128, 256, 512, 1024):
             testarr = {"chr1":np.zeros((endpos+intervalsize,1), dtype=dtype),
                        "chr2":np.zeros((endpos+intervalsize,1), dtype=dtype),
                        "chr3":np.zeros((endpos+intervalsize,1), dtype=dtype)}
             write_interval_data_to_array(iter(intervals), 
                                          testarr,
                                          dtype=dtype, 
                                          chunksize=chunksize,
                                          num_channels=1,
                                          channel_dict=get_channel_dict())
             for chrom in testarr:
                 self.assertTrue(np.all(testarr[chrom] == fullarr[chrom]))
コード例 #2
0
 def test_get_channel_dict(self):
     # default
     d = get_channel_dict(is_pe=False, is_strand=False, is_allele=False)
     for v in d.values():
         self.assertEqual(v, (0,))
     # paired end
     d = get_channel_dict(is_pe=True, is_strand=False, is_allele=False)
     self.assertEqual(d[(0,NO_STRAND,"A")], (0,))
     self.assertEqual(d[(0,POS_STRAND,"G")], (0,))
     self.assertEqual(d[(1,NEG_STRAND,"C")], (1,))
     self.assertEqual(d[(1,POS_STRAND,"T")], (1,))
     self.assertEqual(d[(None,NEG_STRAND,"N")], (0,1))
     # strand
     d = get_channel_dict(is_pe=False, is_strand=True, is_allele=False)
     self.assertEqual(d[(0,NO_STRAND,"A")], (0,1))
     self.assertEqual(d[(1,POS_STRAND,"G")], (0,))
     self.assertEqual(d[(0,NEG_STRAND,"C")], (1,))
     # allele
     d = get_channel_dict(is_pe=False, is_strand=False, is_allele=True)
     self.assertEqual(d[(0,NO_STRAND,"A")], (0,))
     self.assertEqual(d[(1,POS_STRAND,"G")], (1,))
     self.assertEqual(d[(0,NEG_STRAND,"C")], (2,))
     self.assertEqual(d[(0,NEG_STRAND,"T")], (3,))
     self.assertEqual(d[(0,NO_STRAND,"N")], (0,1,2,3,))
     # pe/strand
     d = get_channel_dict(is_pe=True, is_strand=True, is_allele=False)
     self.assertEqual(d[(None,NO_STRAND,"A")], (0,1,2,3))
     self.assertEqual(d[(1,POS_STRAND,"G")], (1,))
     self.assertEqual(d[(0,NEG_STRAND,"C")], (2,))
     self.assertEqual(d[(1,NEG_STRAND,"T")], (3,))
     self.assertEqual(d[(None,NEG_STRAND,"N")], (2,3))
     # pe/allele
     d = get_channel_dict(is_pe=True, is_strand=False, is_allele=True)
     self.assertEqual(d[(0,NO_STRAND,"A")], (0,))
     self.assertEqual(d[(0,POS_STRAND,"G")], (2,))
     self.assertEqual(d[(1,NEG_STRAND,"C")], (5,))
     self.assertEqual(d[(1,POS_STRAND,"T")], (7,))
     self.assertEqual(d[(None,NEG_STRAND,"N")], (0,1,2,3,4,5,6,7))
     # strand/allele
     d = get_channel_dict(is_pe=False, is_strand=True, is_allele=True)
     self.assertEqual(d[(None,NO_STRAND,"A")], (0,1))
     self.assertEqual(d[(1,POS_STRAND,"G")], (2,))
     self.assertEqual(d[(0,NEG_STRAND,"C")], (5,))
     self.assertEqual(d[(1,NEG_STRAND,"T")], (7,))
     self.assertEqual(d[(None,NEG_STRAND,"N")], (1,3,5,7))
     # pe/strand/allele
     d = get_channel_dict(is_pe=True, is_strand=True, is_allele=True)
     self.assertEqual(d[(None,NO_STRAND,"A")], (0,1,2,3))
     self.assertEqual(d[(1,POS_STRAND,"G")], (5,))
     self.assertEqual(d[(0,NEG_STRAND,"C")], (10,))
     self.assertEqual(d[(1,NO_STRAND,"T")], (13,15))
     self.assertEqual(d[(None,NO_STRAND,"N")], tuple(range(0,16)))
     self.assertEqual(d[(None,NEG_STRAND,"N")], (2,3,6,7,10,11,14,15))
     self.assertEqual(d[(1,POS_STRAND,"N")], (1,5,9,13))
コード例 #3
0
 def test_stranded_allele_intervals(self):
     """testing coverage with allele frequencies"""
     dtype = "f"
     channel_dict = get_channel_dict(False, True, True)
     pos_strand_channels = channel_dict[(None,POS_STRAND,None)]
     neg_strand_channels = channel_dict[(None,NEG_STRAND,None)]
     intervals1, correct1 = \
         random_stranded_allele_intervals(100, self.length, 
                                          self.isize_max, dtype)
     total_cov = correct1.sum()
     t = self.tf.create_track("a", VectorTrack, strand=True, allele=True)
     # test loading from intervals
     t.fromintervals(iter(intervals1))
     self.assertTrue(np.all(t["gene1"] == correct1))
     # test count function
     intervals2, correct2 = random_intervals(10, self.length, self.isize_max, dtype)
     for ival in intervals2:
         ref = ival.ref
         start = ival.start
         end = ival.end
         strand = ival.strand
         val = ival.value
         # check plus strand
         mycount = t.count((ref, start, end, POS_STRAND, val))
         correctcount = correct1[start:end,pos_strand_channels].sum()
         self.assertAlmostEqual(mycount, correctcount)
         mycov = t.coverage((ref, start, end, POS_STRAND, val), multiplier=1.0)
         correctcov = correct1[start:end,pos_strand_channels].sum(axis=1) / float(total_cov)
         self.assertTrue(np.allclose(mycov, correctcov, atol=1e-4))
         mydens = t.density((ref, start, end, POS_STRAND, val), multiplier=1.0)
         correctdens = correctcount / float(total_cov * (end - start))
         self.assertTrue(np.allclose(mydens, correctdens, atol=1e-4))
         # check minus strand
         mycount = t.count((ref, start, end, NEG_STRAND, val))
         correctcount = correct1[start:end,neg_strand_channels].sum()
         self.assertAlmostEqual(mycount, correctcount)
         mycov = t.coverage((ref, start, end, NEG_STRAND, val), multiplier=1.0)
         correctcov = correct1[start:end,neg_strand_channels].sum(axis=1) / float(total_cov)
         self.assertTrue(np.allclose(mycov, correctcov, atol=1e-4))
         mydens = t.density((ref, start, end, NEG_STRAND, val), multiplier=1.0)
         correctdens = correctcount / float(total_cov * (end - start))
         self.assertTrue(np.allclose(mydens, correctdens, atol=1e-4))            
         # check both strands
         mycount = t.count((ref, start, end, NO_STRAND, val))
         correctcount = correct1[start:end].sum()
         self.assertAlmostEqual(mycount, correctcount)
         mycov = t.coverage((ref, start, end, NO_STRAND, val), multiplier=1.0)
         correctcov = correct1[start:end].sum(axis=1) / float(total_cov)
         self.assertTrue(np.allclose(mycov, correctcov, atol=1e-4))
         mydens = t.density((ref, start, end, NO_STRAND, val), multiplier=1.0)
         correctdens = correctcount / float(total_cov * (end - start))
         self.assertTrue(np.allclose(mydens, correctdens, atol=1e-4))
コード例 #4
0
def random_stranded_allele_intervals(n, length, isize_max, dtype):
    channel_dict = get_channel_dict(False, True, True)
    intervals = []
    correct = np.zeros((length,8), dtype)
    for i in xrange(n):
        start = np.random.randint(0, length-isize_max)
        end = start + np.random.randint(1, isize_max)        
        seq = []
        for x in xrange(start, end):            
            dna = random.choice("ATGCN")
            pos_channels = channel_dict[(None,POS_STRAND,dna)]            
            neg_channels = channel_dict[(None,NEG_STRAND,dna)]
            correct[x,pos_channels] += 2.0 / len(pos_channels)
            correct[x,neg_channels] += -1.0 / len(neg_channels)
            seq.append(dna)
        seq = ''.join(seq)       
        intervals.append(SequenceInterval('gene1', start, end, POS_STRAND, 2, seq=seq))
        intervals.append(SequenceInterval('gene1', start, end, NEG_STRAND, -1, seq=seq))
    return intervals, correct