Esempio n. 1
0
 def test_distance(self):
     """Dinuc distance should calculate Euclidean dist. correctly"""
     s1 = 'AA' + 'GG' * 10
     s2 = 'AA' * 5 + 'GG' * 7
     d1 = DinucUsage(s1, Overlapping=False)
     d2 = DinucUsage(s2, Overlapping=False)
     self.assertEqual(d1.distance(d1), 0)
     self.assertEqual(d1.distance(d2), 5)
     self.assertEqual(d2.distance(d1), 5)
Esempio n. 2
0
 def test_init_from_seq(self):
     """DinucUsage should init correctly from string."""
     s1 = 'AAAAA'
     s2 = 'ACTACG'
     fd = filter_dict
     self.assertEqual(fd(DinucUsage(s1)), {'AA': 4})
     #NOTE: will map DNA seq tp RNA.
     self.assertEqual(fd(DinucUsage(s2)), {
         'AC': 2,
         'CU': 1,
         'UA': 1,
         'CG': 1
     })
     #check that it works for non-overlapping
     self.assertEqual(fd(DinucUsage(s1, Overlapping=False)), {'AA': 2})
     self.assertEqual(fd(DinucUsage(s2, Overlapping=False)), \
         {'AC':1,'UA':1,'CG':1})
     #check that it works for the 3-1 case
     self.assertEqual(fd(DinucUsage(s1, Overlapping='3-1')), {'AA': 1})
     self.assertEqual(fd(DinucUsage(s2, Overlapping='3-1')), \
         {'UA':1})
     s3 = 'ACG' * 5
     self.assertEqual(fd(DinucUsage(s3, Overlapping='3-1')), \
         {'GA':4})
     s4 = s3 + 'GAA'
     self.assertEqual(fd(DinucUsage(s4, Overlapping='3-1')), \
         {'GA':4,'GG':1})
Esempio n. 3
0
 def __init__(self, seq, overlapping=True, normalize=True):
     self.seq = seq.upper()
     self.overlapping = overlapping
     self.normalize = normalize
     # Sequence length
     self.len = len(seq)
     # Calculate dinuc. frequencies
     self.du = DinucUsage(seq, Overlapping=overlapping)
     if normalize:
         self.du.normalize()
     # Calculate frequencies for individual bases
     A_base = "A"
     T_base = "T"
     U_base = "U"
     G_base = "G"
     C_base = "C"
     self.bases = (A_base, T_base, G_base, C_base)
     self.base_freqs = defaultdict(int)
     for curr_base in self.bases:
         self.base_freqs[curr_base] = \
             (self.seq.count(curr_base) / float(self.len))
     # Equalize T/U -- pick the greater of the two frequencies
     # and then set them as equal
     self.base_freqs[T_base] = max(
         (self.base_freqs["T"], self.base_freqs["U"]))
     self.base_freqs[U_base] = self.base_freqs[T_base]