def test_seq_equality(self): model1 = ArraySequence("UCG", name="rna1", alphabet=RNA.alphabets.degen_gapped) model2 = ArraySequence("UCG", name="rna1", alphabet=RNA.alphabets.degen_gapped) # Shouldn't the above two sequences be equal? self.assertEqual(model1, model2) # string comparison is True self.assertEqual(str(model1), str(model2))
def test_ArrayAlignment_without_moltype(self): """Expect MolType to be picked up from the sequences.""" m1 = ArraySequence("UCAG", alphabet=RNA.alphabets.degen_gapped, name="rna1") m2 = ArraySequence("CCCR", alphabet=RNA.alphabets.degen_gapped, name="rna2") da = ArrayAlignment([m1, m2]) exp_lines = [">rna1", "UCAG", ">rna2", "CCCR"] self.assertEqual(str(da), "\n".join(exp_lines) + "\n")
def test_subset_positions_ArrayAlignment(self): # because dict order volatile, need to grab the # the index for ambig characters from the object # The full data comes from these seqs # 'UCAGGG' # 'YCU-RG' # 'CAA-NR' get_index = RNA.alphabets.degen_gapped.index G = get_index("-") N = get_index("N") R = get_index("R") Y = get_index("Y") full_data = array([[0, 1, 2, 3, 3, 3], [Y, 1, 0, G, R, 3], [1, 2, 2, G, N, R]]) model1 = ArraySequence("UCG", name="rna1", alphabet=RNA.alphabets.degen_gapped) model2 = ArraySequence("YCG", name="rna2", alphabet=RNA.alphabets.degen_gapped) model3 = ArraySequence("CAR", name="rna3", alphabet=RNA.alphabets.degen_gapped) sub_da = ArrayAlignment([model1, model2, model3], moltype=RNA, alphabet=RNA.alphabets.degen_gapped) sub_data = array([[0, 1, 3], [Y, 1, 3], [1, 2, R]]) # First check some data self.assertEqual(self.da.array_seqs, full_data) self.assertEqual(self.da.array_positions, transpose(full_data)) self.assertEqual(sub_da.array_seqs, sub_data) self.assertEqual(sub_da.array_positions, transpose(sub_data)) obs_sub_da_TP = self.da.take_positions([0, 1, 5]) obs_sub_da_SA = self.da.get_sub_alignment(pos=[0, 1, 5]) # When using the get_sub_alignment method the data is right self.assertEqual(obs_sub_da_SA, sub_da) self.assertNotEqual(obs_sub_da_SA, self.da) self.assertEqual(obs_sub_da_SA.array_seqs, sub_data) self.assertEqual(obs_sub_da_SA.array_positions, transpose(sub_data)) # For the take_positions method: Why does this work self.assertEqual(obs_sub_da_TP, sub_da) self.assertNotEqual(obs_sub_da_TP, self.da) # If the data doesn't match? self.assertEqual(obs_sub_da_TP.array_seqs, sub_data) self.assertEqual(obs_sub_da_TP.array_positions, transpose(sub_data))
def test_seq_ungapping(self): rna1 = RnaSequence("U-C-A-G-", name="rna1") model1 = ArraySequence("U-C-A-G-", name="rna1", alphabet=RNA.alphabets.degen_gapped) self.assertEqual(rna1, "U-C-A-G-") self.assertEqual(rna1.degap(), "UCAG") # check is produces the right string from the beginning self.assertEqual(str(model1), "U-C-A-G-") self.assertEqual(model1._data, [0, 4, 1, 4, 2, 4, 3, 4]) # ArraySequence should maybe have the same degap method as normal seq self.assertEqual(str(model1.degap()), "UCAG")
def test_subset_seqs_ArrayAlignment(self): model1 = ArraySequence("UCG", name="rna1", alphabet=RNA.alphabets.degen_gapped) model2 = ArraySequence("YCG", name="rna2", alphabet=RNA.alphabets.degen_gapped) model3 = ArraySequence("CAR", name="rna3", alphabet=RNA.alphabets.degen_gapped) sub_da = ArrayAlignment( [model1, model2, model3], moltype=RNA, alphabet=RNA.alphabets.degen_gapped ) # take_seqs by name should have the same effect as # get_sub_alignment by seq idx? obs_sub_da_TS = self.da.take_seqs(["rna1"]) obs_sub_da_SA = self.da.get_sub_alignment(seqs=[0]) # These two are now the same. Fixed mapping of key to char array. self.assertEqual(obs_sub_da_TS, obs_sub_da_SA) self.assertEqual(str(obs_sub_da_TS), str(obs_sub_da_SA))
def setUp(self): """setUp method for all tests""" # named sequences self.rna1 = RnaSequence("UCAGGG", name="rna1") self.rna2 = RnaSequence("YCU-RG", name="rna2") self.rna3 = RnaSequence("CAA-NR", name="rna3") self.model1 = ArraySequence("UCAGGG", name="rna1", alphabet=RNA.alphabets.degen_gapped) self.model2 = ArraySequence("YCU-RG", name="rna2", alphabet=RNA.alphabets.degen_gapped) self.model3 = ArraySequence("CAA-NR", name="rna3", alphabet=RNA.alphabets.degen_gapped) self.aln = Alignment([self.rna1, self.rna2, self.rna3], moltype=RNA) self.da = ArrayAlignment( [self.model1, self.model2, self.model3], moltype=RNA, alphabet=RNA.alphabets.degen_gapped, ) # seqs no name self.nn_rna1 = RnaSequence("UCAGGG") self.nn_rna2 = RnaSequence("YCU-RG") self.nn_rna3 = RnaSequence("CAA-NR") self.nn_model1 = ArraySequence("UCAGGG", alphabet=RNA.alphabets.degen_gapped) self.nn_model2 = ArraySequence("YCU-RG", alphabet=RNA.alphabets.degen_gapped) self.nn_model3 = ArraySequence("CAA-NR", alphabet=RNA.alphabets.degen_gapped) self.nn_aln = Alignment([self.nn_rna1, self.nn_rna2, self.nn_rna3], moltype=RNA) self.nn_da = ArrayAlignment( [self.nn_model1, self.nn_model2, self.nn_model3], moltype=RNA, alphabet=RNA.alphabets.degen_gapped, )