def test_interval_to_array(self): """Testing interval to array code""" ref = "chr1" endpos = 1000 chunkstep = 10 dtype = "i" # test different interval sizes for intervalsize in xrange(1,100,10): fullarr = np.zeros((endpos+intervalsize,1), dtype=dtype) intervals = [] val = 0 for start in xrange(0, endpos, intervalsize): intervals.append(Interval(ref, start, start+intervalsize, POS_STRAND, val)) fullarr[start:start+intervalsize,0] = val val += 2 # test different chunk sizes for chunksize in xrange(chunkstep, endpos, chunkstep): testarr = np.zeros((endpos+intervalsize,1), dtype=dtype) write_interval_data_to_array(iter(intervals), {"chr1": testarr}, dtype=dtype, chunksize=chunksize, num_channels=1, channel_dict=get_channel_dict()) self.assertTrue(np.all(testarr == fullarr)) # # test intervals on different chromosomes # refiter = itertools.cycle(itertools.chain(itertools.repeat("chr1", 3), itertools.repeat("chr2", 3) , itertools.repeat("chr3", 3))) for intervalsize in (1,10,50,100): #print "intervalsize", intervalsize fullarr = {"chr1":np.zeros((endpos+intervalsize,1), dtype=dtype), "chr2":np.zeros((endpos+intervalsize,1), dtype=dtype), "chr3":np.zeros((endpos+intervalsize,1), dtype=dtype)} intervals = [] val = 0 for start in xrange(0, endpos, intervalsize): ref = refiter.next() intervals.append(Interval(ref, start, start+intervalsize, POS_STRAND, val)) fullarr[ref][start:start+intervalsize] = val val += 2 # test different chunk sizes for chunksize in (1, 8, 16, 32, 64, 128, 256, 512, 1024): testarr = {"chr1":np.zeros((endpos+intervalsize,1), dtype=dtype), "chr2":np.zeros((endpos+intervalsize,1), dtype=dtype), "chr3":np.zeros((endpos+intervalsize,1), dtype=dtype)} write_interval_data_to_array(iter(intervals), testarr, dtype=dtype, chunksize=chunksize, num_channels=1, channel_dict=get_channel_dict()) for chrom in testarr: self.assertTrue(np.all(testarr[chrom] == fullarr[chrom]))
def test_get_channel_dict(self): # default d = get_channel_dict(is_pe=False, is_strand=False, is_allele=False) for v in d.values(): self.assertEqual(v, (0,)) # paired end d = get_channel_dict(is_pe=True, is_strand=False, is_allele=False) self.assertEqual(d[(0,NO_STRAND,"A")], (0,)) self.assertEqual(d[(0,POS_STRAND,"G")], (0,)) self.assertEqual(d[(1,NEG_STRAND,"C")], (1,)) self.assertEqual(d[(1,POS_STRAND,"T")], (1,)) self.assertEqual(d[(None,NEG_STRAND,"N")], (0,1)) # strand d = get_channel_dict(is_pe=False, is_strand=True, is_allele=False) self.assertEqual(d[(0,NO_STRAND,"A")], (0,1)) self.assertEqual(d[(1,POS_STRAND,"G")], (0,)) self.assertEqual(d[(0,NEG_STRAND,"C")], (1,)) # allele d = get_channel_dict(is_pe=False, is_strand=False, is_allele=True) self.assertEqual(d[(0,NO_STRAND,"A")], (0,)) self.assertEqual(d[(1,POS_STRAND,"G")], (1,)) self.assertEqual(d[(0,NEG_STRAND,"C")], (2,)) self.assertEqual(d[(0,NEG_STRAND,"T")], (3,)) self.assertEqual(d[(0,NO_STRAND,"N")], (0,1,2,3,)) # pe/strand d = get_channel_dict(is_pe=True, is_strand=True, is_allele=False) self.assertEqual(d[(None,NO_STRAND,"A")], (0,1,2,3)) self.assertEqual(d[(1,POS_STRAND,"G")], (1,)) self.assertEqual(d[(0,NEG_STRAND,"C")], (2,)) self.assertEqual(d[(1,NEG_STRAND,"T")], (3,)) self.assertEqual(d[(None,NEG_STRAND,"N")], (2,3)) # pe/allele d = get_channel_dict(is_pe=True, is_strand=False, is_allele=True) self.assertEqual(d[(0,NO_STRAND,"A")], (0,)) self.assertEqual(d[(0,POS_STRAND,"G")], (2,)) self.assertEqual(d[(1,NEG_STRAND,"C")], (5,)) self.assertEqual(d[(1,POS_STRAND,"T")], (7,)) self.assertEqual(d[(None,NEG_STRAND,"N")], (0,1,2,3,4,5,6,7)) # strand/allele d = get_channel_dict(is_pe=False, is_strand=True, is_allele=True) self.assertEqual(d[(None,NO_STRAND,"A")], (0,1)) self.assertEqual(d[(1,POS_STRAND,"G")], (2,)) self.assertEqual(d[(0,NEG_STRAND,"C")], (5,)) self.assertEqual(d[(1,NEG_STRAND,"T")], (7,)) self.assertEqual(d[(None,NEG_STRAND,"N")], (1,3,5,7)) # pe/strand/allele d = get_channel_dict(is_pe=True, is_strand=True, is_allele=True) self.assertEqual(d[(None,NO_STRAND,"A")], (0,1,2,3)) self.assertEqual(d[(1,POS_STRAND,"G")], (5,)) self.assertEqual(d[(0,NEG_STRAND,"C")], (10,)) self.assertEqual(d[(1,NO_STRAND,"T")], (13,15)) self.assertEqual(d[(None,NO_STRAND,"N")], tuple(range(0,16))) self.assertEqual(d[(None,NEG_STRAND,"N")], (2,3,6,7,10,11,14,15)) self.assertEqual(d[(1,POS_STRAND,"N")], (1,5,9,13))
def test_stranded_allele_intervals(self): """testing coverage with allele frequencies""" dtype = "f" channel_dict = get_channel_dict(False, True, True) pos_strand_channels = channel_dict[(None,POS_STRAND,None)] neg_strand_channels = channel_dict[(None,NEG_STRAND,None)] intervals1, correct1 = \ random_stranded_allele_intervals(100, self.length, self.isize_max, dtype) total_cov = correct1.sum() t = self.tf.create_track("a", VectorTrack, strand=True, allele=True) # test loading from intervals t.fromintervals(iter(intervals1)) self.assertTrue(np.all(t["gene1"] == correct1)) # test count function intervals2, correct2 = random_intervals(10, self.length, self.isize_max, dtype) for ival in intervals2: ref = ival.ref start = ival.start end = ival.end strand = ival.strand val = ival.value # check plus strand mycount = t.count((ref, start, end, POS_STRAND, val)) correctcount = correct1[start:end,pos_strand_channels].sum() self.assertAlmostEqual(mycount, correctcount) mycov = t.coverage((ref, start, end, POS_STRAND, val), multiplier=1.0) correctcov = correct1[start:end,pos_strand_channels].sum(axis=1) / float(total_cov) self.assertTrue(np.allclose(mycov, correctcov, atol=1e-4)) mydens = t.density((ref, start, end, POS_STRAND, val), multiplier=1.0) correctdens = correctcount / float(total_cov * (end - start)) self.assertTrue(np.allclose(mydens, correctdens, atol=1e-4)) # check minus strand mycount = t.count((ref, start, end, NEG_STRAND, val)) correctcount = correct1[start:end,neg_strand_channels].sum() self.assertAlmostEqual(mycount, correctcount) mycov = t.coverage((ref, start, end, NEG_STRAND, val), multiplier=1.0) correctcov = correct1[start:end,neg_strand_channels].sum(axis=1) / float(total_cov) self.assertTrue(np.allclose(mycov, correctcov, atol=1e-4)) mydens = t.density((ref, start, end, NEG_STRAND, val), multiplier=1.0) correctdens = correctcount / float(total_cov * (end - start)) self.assertTrue(np.allclose(mydens, correctdens, atol=1e-4)) # check both strands mycount = t.count((ref, start, end, NO_STRAND, val)) correctcount = correct1[start:end].sum() self.assertAlmostEqual(mycount, correctcount) mycov = t.coverage((ref, start, end, NO_STRAND, val), multiplier=1.0) correctcov = correct1[start:end].sum(axis=1) / float(total_cov) self.assertTrue(np.allclose(mycov, correctcov, atol=1e-4)) mydens = t.density((ref, start, end, NO_STRAND, val), multiplier=1.0) correctdens = correctcount / float(total_cov * (end - start)) self.assertTrue(np.allclose(mydens, correctdens, atol=1e-4))
def random_stranded_allele_intervals(n, length, isize_max, dtype): channel_dict = get_channel_dict(False, True, True) intervals = [] correct = np.zeros((length,8), dtype) for i in xrange(n): start = np.random.randint(0, length-isize_max) end = start + np.random.randint(1, isize_max) seq = [] for x in xrange(start, end): dna = random.choice("ATGCN") pos_channels = channel_dict[(None,POS_STRAND,dna)] neg_channels = channel_dict[(None,NEG_STRAND,dna)] correct[x,pos_channels] += 2.0 / len(pos_channels) correct[x,neg_channels] += -1.0 / len(neg_channels) seq.append(dna) seq = ''.join(seq) intervals.append(SequenceInterval('gene1', start, end, POS_STRAND, 2, seq=seq)) intervals.append(SequenceInterval('gene1', start, end, NEG_STRAND, -1, seq=seq)) return intervals, correct