예제 #1
0
    def test_DnaSequence(self):
        """DnaSequence should behave as expected"""
        x = DnaSequence('tcag')
        #note: no longer preserves case
        self.assertEqual(x, 'TCAG')

        x = DnaSequence('aaa') + DnaSequence('ccc')
        #note: doesn't preserve case
        self.assertEqual(x, 'AAACCC')
        assert x.MolType is DNA
        self.assertRaises(AlphabetError, x.__add__, 'z')
        self.assertEqual(DnaSequence('TTTAc').rc(), 'GTAAA')
예제 #2
0
    def test_single_constructor(self):
        """RdbParser should use constructors if supplied"""
        to_dna = lambda x, Info: DnaSequence(str(x).replace('U','T'), \
            Info=Info)
        f = list(RdbParser(self.oneseq, to_dna))
        self.assertEqual(len(f), 1)
        a = f[0]
        self.assertEqual(a, 'AGTCATCTAGATHCATHC')
        self.assertEqual(a.Info, Info({'Species':'H.Sapiens',\
            'OriginalSeq':'AGUCAUCUAGAUHCAUHC'}))

        def alternativeConstr(header_lines):
            info = Info()
            for line in header_lines:
                all = line.strip().split(':', 1)
                #strip out empty lines, lines without name, lines without colon
                if not all[0] or len(all) != 2:
                    continue
                name = all[0].upper()
                value = all[1].strip().upper()
                info[name] = value
            return info

        f = list(RdbParser(self.oneseq, to_dna, alternativeConstr))
        self.assertEqual(len(f), 1)
        a = f[0]
        self.assertEqual(a, 'AGTCATCTAGATHCATHC')
        exp_info = Info({'OriginalSeq':'AGUCAUCUAGAUHCAUHC',\
            'Refs':{}, 'SEQ':'H.SAPIENS'})
        self.assertEqual(a.Info, Info({'OriginalSeq':'AGUCAUCUAGAUHCAUHC',\
            'Refs':{}, 'SEQ':'H.SAPIENS'}))
예제 #3
0
    def test_SeqToProfile(self):
        """SequenceToProfile: should work with different parameter settings
        """
        seq = DnaSequence("ATCGRYN-")

        #Only non-degenerate bases in the char order, all other
        #characters are ignored. In a sequence this means that
        #several positions will contain only zeros in the profile.
        exp = zeros([len(seq), 4], Float64)
        for x, y in zip(range(len(seq)), [2, 0, 1, 3]):
            exp[x, y] = 1
        self.assertEqual(SeqToProfile(seq,char_order="TCAG",\
            split_degenerates=False).Data.tolist(),exp.tolist())

        #Same thing should work as well when the char order is not passed in
        exp = zeros([len(seq), 4], Float64)
        for x, y in zip(range(len(seq)), [2, 0, 1, 3]):
            exp[x, y] = 1
        self.assertEqual(SeqToProfile(seq, split_degenerates=False)\
            .Data.tolist(),exp.tolist())

        #All symbols in the sequence are in the char order, no row
        #should contain only zeros. Degenerate symbols are not split.
        exp = zeros([len(seq), 8], Float64)
        for x, y in zip(range(len(seq)), [2, 0, 1, 3, 4, 5, 6, 7]):
            exp[x, y] = 1
        self.assertEqual(SeqToProfile(seq,char_order="TCAGRYN-",\
            split_degenerates=False).Data.tolist(), exp.tolist())

        #splitting all degenerate symbols, having only non-degenerate symbols
        #in the character order (and -)
        exp = array([[0, 0, 1, 0, 0], [1, 0, 0, 0, 0], [0, 1, 0, 0, 0],
                     [0, 0, 0, 1, 0], [0, 0, .5, .5, 0], [.5, .5, 0, 0, 0],
                     [.25, .25, .25, .25, 0], [0, 0, 0, 0, 1]])
        self.assertEqual(SeqToProfile(seq,char_order="TCAG-",\
            split_degenerates=True).Data.tolist(),exp.tolist())

        #splitting degenerates, but having one of the degenerate
        #symbols in the character order. In that case the degenerate symbol
        #is not split.
        exp = array([[0, 0, 1, 0, 0, 0], [1, 0, 0, 0, 0, 0],
                     [0, 1, 0, 0, 0, 0], [0, 0, 0, 1, 0, 0],
                     [0, 0, .5, .5, 0, 0], [.5, .5, 0, 0, 0, 0],
                     [0, 0, 0, 0, 1, 0], [0, 0, 0, 0, 0, 1]])
        self.assertEqual(SeqToProfile(seq,char_order="TCAGN-",\
            split_degenerates=True).Data.tolist(),exp.tolist())
예제 #4
0
 def dnastrict(x, **kwargs):
     try:
         return DnaSequence(x, **kwargs)
     except Exception:
         raise RecordError, "Could not convert sequence"
예제 #5
0
def Dna(seq, *args, **kwargs):
    seq = seq.replace('u', 't')
    seq = seq.replace('U', 'T')
    d = DnaSequence(seq, *args, **kwargs)
    return d
예제 #6
0
 def default_distance_function(first, second):
     first = DnaSequence(first)
     return first.fracDiff(second)