Ejemplo n.º 1
0
    def test_AlignmentToProfile_weighted(self):
        """AlignmentToProfile: should work when sequences are weighted
        """
        #Alignment: sequences are just strings and don't have an alphabet
        #Weights: a normal dictionary (could be a real Weights object as well)
        a = Alignment({'seq1':'TCAG','seq2':'TAR-','seq3':'YAG-'},\
        Names=['seq1','seq2','seq3'])
        w = {'seq1': 0.5, 'seq2': .25, 'seq3': .25}

        #Basic situation in which all letters in the sequences occur in the
        #CharOrder, None have to be ignored. In that case it doesn't matter
        #whether we set split_degenerates to True or False, because if it's
        #True it's overwritten by the fact that the char is in the CharOrder.
        exp = array([[0.75, 0, 0, 0, 0, .25, 0], [0, 0.5, 0.5, 0, 0, 0, 0],
                     [0, 0.5, 0, 0.25, 0.25, 0, 0], [0, 0, 0, 0.5, 0, 0, 0.5]])
        #split_degenerates = False
        self.assertEqual(AlnToProfile(a,DNA, char_order="TACGRY-",\
            weights=w, split_degenerates=False).Data.tolist(),exp.tolist())
        #split_degenerates = True
        self.assertEqual(AlnToProfile(a,DNA, char_order="TACGRY-",\
            weights=w, split_degenerates=True).Data.tolist(),exp.tolist())

        #Only non-degenerate symbols in the CharOrder. Degenerates are split.
        #Gaps are ignored
        exp = array([[0.875, 0, 0.125, 0], [0, 0.5, 0.5, 0],
                     [0, 0.625, 0, 0.375], [0, 0, 0, 1]])
        self.assertEqual(AlnToProfile(a,DNA, char_order="TACG",\
            weights=w, split_degenerates=True).Data.tolist(),exp.tolist())

        #An Error is raised if all chars in an alignment column are ignored
        #CharOrder=AT, degenerates are not split.
        self.assertRaises(ValueError,AlnToProfile,a,DNA,\
            char_order="AT",weights=w, split_degenerates=True)
Ejemplo n.º 2
0
 def test_AlignmentToProfile_basic(self):
     """AlignmentToProfile: should work under basic conditions
     """
     #sequences in the alignment are unweighted
     #Alphabet is the alphabet of the sequences (RNA)
     #CharOrder is set explicitly
     #Degenerate bases are split up
     #Gaps are ignored
     #In all of the columns at least one character is in the CharOrder
     a = Alignment({'a':RnaSequence('UCAGRYN-'),'b':RnaSequence('ACUGAAAA')})
     exp =\
     array([[.5,0,.5,0],
      [0,1,0,0],
      [.5,0,.5,0],
      [0,0,0,1],
      [0,0,.75,.25],
      [.25,.25,.5,0],
      [.125,.125,.625,.125],
      [0,0,1,0]])
     self.assertEqual(AlnToProfile(a,alphabet=RNA,\
         split_degenerates=True).Data.tolist(),exp.tolist())