def test_init(self): """JointEnumeration init should work as expected""" # should work for alphabet object a = JointEnumeration([DnaBases, RnaBases]) self.assertEqual(len(a), 16) self.assertEqual(a.shape, (4, 4)) self.assertEqual(a[0], ("T", "U")) self.assertEqual(a[-1], ("G", "G")) assert_equal(a._sub_enum_factors, array([[4], [1]])) # should work for arbitrary sequences a = JointEnumeration(["TCAG", "UCAG"]) self.assertEqual(len(a), 16) self.assertEqual(a[0], ("T", "U")) self.assertEqual(a[-1], ("G", "G")) assert_equal(a._sub_enum_factors, array([[4], [1]])) # should work for different length sequences a = JointEnumeration(["TCA", "UCAG"]) self.assertEqual(a.shape, (3, 4)) self.assertEqual(len(a), 12) self.assertEqual(a[0], ("T", "U")) self.assertEqual(a[-1], ("A", "G")) assert_equal(a._sub_enum_factors, array([[4], [1]])) # note: _not_ [3,1]
def test_to_string(self): """CharAlphabet to_string should convert an input array to string""" r = CharAlphabet("UCAG") self.assertEqual(r.to_string(array([[0, 0, 1], [0, 3, 2]], "B")), "UUC\nUGA") # should work with single seq self.assertEqual(r.to_string(array([[0, 0, 1, 0, 3, 2]], "B")), "UUCUGA") # should work with single seq self.assertEqual(r.to_string(array([0, 0, 1, 0, 3, 2], "B")), "UUCUGA") # should work with empty seq self.assertEqual(r.to_string(array([], "B")), "")
def test_init(self): """CharAlphabet init should make correct translation tables""" r = CharAlphabet("UCAG") i2c, c2i = r._indices_nums_to_chars, r._chars_to_indices s = array([0, 0, 1, 0, 3, 2], "b").tostring() self.assertEqual(s.translate(i2c), b"UUCUGA") self.assertEqual("UUCUGA".translate(c2i), "\000\000\001\000\003\002")
def test_unpack_arrays(self): """JointEnumeration unpack_arrays should return correct arrays.""" a = JointEnumeration(["xyz", "abcd", "ef"]) v = [7, 15, 18, 0] result = a.unpack_arrays(v) self.assertEqual(result, array([[0, 1, 2, 0], [3, 3, 1, 0], [1, 1, 0, 0]]))
def test_pack_arrays(self): """JointEnumeration pack_arrays should return correct array.""" a = JointEnumeration(["xyz", "abcd", "ef"]) v = [[0, 1, 2, 0], [3, 3, 1, 0], [1, 1, 0, 0]] result = a.pack_arrays(v) assert_equal(result, array([7, 15, 18, 0]))
def test_from_seq_to_array(self): """convert a sequence into indices""" dna = get_moltype("dna") seq = dna.make_seq("ACGG") got = dna.alphabet.from_seq_to_array(seq) assert_equal(got, array([dna.alphabet.index(b) for b in seq]))
def test_to_chars(self): """CharAlphabet to_chars should convert an input array to chars""" r = CharAlphabet("UCAG") c = r.to_chars(array([[0, 0, 1], [0, 3, 2]], "B")) assert_equal(c, array(["UUC", "UGA"], "c"))
def test_from_array(self): """CharAlphabet from_array should return correct array""" r = CharAlphabet("UCAG") got = r.from_array(array(["UUC", "UGA"], "c")) assert_equal(got, array([[0, 0, 1], [0, 3, 2]], "B"))
def test_from_string(self): """CharAlphabet from_string should return correct array""" r = CharAlphabet("UCAG") assert_equal(r.from_string("UUCUGA"), array([0, 0, 1, 0, 3, 2], "B"))
def test_counts(self): """Enumeration counts should count freqs in array""" a = DnaBases f = array([[0, 0, 1, 0, 0, 3]]) assert_equal(a.counts(f), array([4, 1, 0, 1])) # check that it works with byte array f = array([[0, 0, 1, 0, 0, 3]], "B") assert_equal(a.counts(f), array([4, 1, 0, 1])) # should ignore out-of-bounds items g = [0, 4] assert_equal(a.counts(g), array([1, 0, 0, 0])) # make sure it works for long sequences, i.e. no wraparound at 255 h = [0, 3] * 70000 assert_equal(a.counts(h), array([70000, 0, 0, 70000])) h2 = array(h).astype("B") assert_equal(a.counts(h2), array([70000, 0, 0, 70000])) i = array([0, 3] * 75000) assert_equal(a.counts(i), array([75000, 0, 0, 75000])) # make sure it works for long _binary_ sequences, e.g. the results # of array comparisons. a = array([0, 1, 2, 3] * 10000) b = array([0, 0, 0, 0] * 10000) same = a == b