def test_AlignmentToProfile_weighted(self): """AlignmentToProfile: should work when sequences are weighted """ #Alignment: sequences are just strings and don't have an alphabet #Weights: a normal dictionary (could be a real Weights object as well) a = Alignment({'seq1':'TCAG','seq2':'TAR-','seq3':'YAG-'},\ Names=['seq1','seq2','seq3']) w = {'seq1': 0.5, 'seq2': .25, 'seq3': .25} #Basic situation in which all letters in the sequences occur in the #CharOrder, None have to be ignored. In that case it doesn't matter #whether we set split_degenerates to True or False, because if it's #True it's overwritten by the fact that the char is in the CharOrder. exp = array([[0.75, 0, 0, 0, 0, .25, 0], [0, 0.5, 0.5, 0, 0, 0, 0], [0, 0.5, 0, 0.25, 0.25, 0, 0], [0, 0, 0, 0.5, 0, 0, 0.5]]) #split_degenerates = False self.assertEqual(AlnToProfile(a,DNA, char_order="TACGRY-",\ weights=w, split_degenerates=False).Data.tolist(),exp.tolist()) #split_degenerates = True self.assertEqual(AlnToProfile(a,DNA, char_order="TACGRY-",\ weights=w, split_degenerates=True).Data.tolist(),exp.tolist()) #Only non-degenerate symbols in the CharOrder. Degenerates are split. #Gaps are ignored exp = array([[0.875, 0, 0.125, 0], [0, 0.5, 0.5, 0], [0, 0.625, 0, 0.375], [0, 0, 0, 1]]) self.assertEqual(AlnToProfile(a,DNA, char_order="TACG",\ weights=w, split_degenerates=True).Data.tolist(),exp.tolist()) #An Error is raised if all chars in an alignment column are ignored #CharOrder=AT, degenerates are not split. self.assertRaises(ValueError,AlnToProfile,a,DNA,\ char_order="AT",weights=w, split_degenerates=True)
def test_AlignmentToProfile_basic(self): """AlignmentToProfile: should work under basic conditions """ #sequences in the alignment are unweighted #Alphabet is the alphabet of the sequences (RNA) #CharOrder is set explicitly #Degenerate bases are split up #Gaps are ignored #In all of the columns at least one character is in the CharOrder a = Alignment({'a':RnaSequence('UCAGRYN-'),'b':RnaSequence('ACUGAAAA')}) exp =\ array([[.5,0,.5,0], [0,1,0,0], [.5,0,.5,0], [0,0,0,1], [0,0,.75,.25], [.25,.25,.5,0], [.125,.125,.625,.125], [0,0,1,0]]) self.assertEqual(AlnToProfile(a,alphabet=RNA,\ split_degenerates=True).Data.tolist(),exp.tolist())