Beispiel #1
0
def parse_residues(residue_lines, num_base, unpaired_symbol):
    """Return RnaSequence and Pairs object from residue lines.

    residue_lines -- list of lines or anything that behaves like it. 
        Lines should contain:
        residue_position, residue_identiy, residue_partner.
    num_base -- int, basis of the residue numbering. In bpseq files from
        the CRW website, the numbering starts at 1.
    unpaired_symbol -- string, symbol in the 'partner' column that indicates
        that a base is unpaired. In bpseq files from the CRW website, the
        unpaired_symbol is '0'. This parameter should be a string to allow
        other symbols that can't be casted to an integer to indicate
        unpaired bases.
    
    Checks for double entries both in the sequence and the structure, and
    checks that the structre is valid in the sense that if (up,down) in there,
    that (down,up) is the same.
    """
    #create dictionary/list for sequence and structure
    seq_dict = {}
    pairs = Pairs()

    for line in residue_lines:
        try:
            pos, res, partner = line.strip().split()
            if partner == unpaired_symbol:
                # adjust pos, not partner
                pos = int(pos) - num_base
                partner = None
            else:
                # adjust pos and partner
                pos = int(pos) - num_base
                partner = int(partner) - num_base
            pairs.append((pos, partner))

            #fill seq_dict
            if pos in seq_dict:
                raise BpseqParseError(\
                    "Double entry for residue %s (%s in bpseq file)"\
                    %(str(pos), str(pos+1)))
            else:
                seq_dict[pos] = res

        except ValueError:
            raise BpseqParseError("Failed to parse line: %s" % (line))

    #check for conflicts, remove unpaired bases
    if pairs.hasConflicts():
        raise BpseqParseError("Conflicts in the list of basepairs")
    pairs = pairs.directed()
    pairs.sort()

    # construct sequence from seq_dict
    seq = RnaSequence(construct_sequence(seq_dict))

    return seq, pairs
Beispiel #2
0
def parse_residues(residue_lines, num_base, unpaired_symbol):
    """Return RnaSequence and Pairs object from residue lines.

    residue_lines -- list of lines or anything that behaves like it. 
        Lines should contain:
        residue_position, residue_identiy, residue_partner.
    num_base -- int, basis of the residue numbering. In bpseq files from
        the CRW website, the numbering starts at 1.
    unpaired_symbol -- string, symbol in the 'partner' column that indicates
        that a base is unpaired. In bpseq files from the CRW website, the
        unpaired_symbol is '0'. This parameter should be a string to allow
        other symbols that can't be casted to an integer to indicate
        unpaired bases.
    
    Checks for double entries both in the sequence and the structure, and
    checks that the structre is valid in the sense that if (up,down) in there,
    that (down,up) is the same.
    """
    #create dictionary/list for sequence and structure
    seq_dict = {}
    pairs = Pairs()
    
    for line in residue_lines:
        try:
            pos, res, partner = line.strip().split()
            if partner == unpaired_symbol:
                # adjust pos, not partner
                pos = int(pos) - num_base
                partner = None
            else:
                # adjust pos and partner
                pos = int(pos) - num_base
                partner = int(partner) - num_base
            pairs.append((pos,partner))
            
            #fill seq_dict
            if pos in seq_dict:
                raise BpseqParseError(\
                    "Double entry for residue %s (%s in bpseq file)"\
                    %(str(pos), str(pos+1)))
            else:
                seq_dict[pos] = res
        
        except ValueError:
            raise BpseqParseError("Failed to parse line: %s"%(line))
    
    #check for conflicts, remove unpaired bases 
    if pairs.hasConflicts():
        raise BpseqParseError("Conflicts in the list of basepairs")
    pairs = pairs.directed()
    pairs.sort()
    
    # construct sequence from seq_dict
    seq = RnaSequence(construct_sequence(seq_dict))
    
    return seq, pairs
Beispiel #3
0
class PairsTests(TestCase):
    """Tests for Pairs object"""
    def setUp(self):
        """Pairs SetUp method for all tests"""
        self.Empty = Pairs([])
        self.OneList = Pairs([[1, 2]])
        self.OneTuple = Pairs([(1, 2)])
        self.MoreLists = Pairs([[2, 4], [3, 9], [6, 36], [7, 49]])
        self.MoreTuples = Pairs([(2, 4), (3, 9), (6, 36), (7, 49)])
        self.MulNoOverlap = Pairs([(1, 10), (2, 9), (3, 7), (4, 12)])
        self.MulOverlap = Pairs([(1, 2), (2, 3)])
        self.Doubles = Pairs([[1, 2], [1, 2], [2, 3], [1, 3]])
        self.Undirected = Pairs([(2, 1), (6, 4), (1, 7), (8, 3)])
        self.UndirectedNone = Pairs([(5, None), (None, 3)])
        self.UndirectedDouble = Pairs([(2, 1), (1, 2)])

        self.NoPseudo = Pairs([(1, 20), (2, 19), (3, 7), (4, 6), (10, 15),
                               (11, 14)])
        self.NoPseudo2 = Pairs([(1, 3), (4, 6)])
        #((.(.)).)
        self.p0 = Pairs([(0, 6), (1, 5), (3, 8)])
        #(.((..(.).).))
        self.p1 = Pairs([(0, 9), (2, 12), (3, 10), (5, 7)])
        #((.(.(.).)).)
        self.p2 = Pairs([(0, 10), (1, 9), (3, 12), (5, 7)])
        #((.((.(.)).).))
        self.p3 = Pairs([(0, 9), (1, 8), (3, 14), (4, 13), (6, 11)])
        #(.(((.((.))).)).(((.((((..))).)))).)
        self.p4 = Pairs([(0,35),(2,11),(3,10),(4,9),(6,14),(7,13),(16,28),\
            (17,27),(18,26),(20,33),(21,32),(22,31),(23,30)])
        #(.((.).))
        self.p5 = Pairs([(0, 5), (2, 8), (3, 7)])
        self.p6 = Pairs([(0,19),(2,6),(3,5),(8,14),(9,13),(10,12),\
            (16,22),(17,21)])
        self.p7 = Pairs([(0,20),(2,6),(3,5),(8,14),(9,10),(11,16),(12,15),\
            (17,23),(18,22)])

    def test_init(self):
        """Pairs should initalize with both lists and tuples"""
        self.assertEqual(self.Empty, [])
        self.assertEqual(self.OneList, [[1, 2]])
        self.assertEqual(self.OneTuple, [(1, 2)])
        self.assertEqual(self.MulNoOverlap, [(1, 10), (2, 9), (3, 7), (4, 12)])
        self.assertEqual(self.MulOverlap, [(1, 2), (2, 3)])

    def test_toPartners(self):
        """Pairs toPartners() should return a Partners object"""
        a = Pairs([(1, 5), (3, 4), (6, 9), (7, 8)])  #normal
        b = Pairs([(0, 4), (2, 6)])  #pseudoknot
        c = Pairs([(1, 6), (3, 6), (4, 5)])  #conflict

        self.assertEqual(a.toPartners(10),
                         [None, 5, None, 4, 3, 1, 9, 8, 7, 6])
        self.assertEqual(a.toPartners(13,3),\
        [None,None,None,None,8,None,7,6,4,12,11,10,9])
        assert isinstance(a.toPartners(10), Partners)
        self.assertEqual(b.toPartners(7), [4, None, 6, None, 0, None, 2])
        self.assertRaises(ValueError, c.toPartners, 7)
        self.assertEqual(c.toPartners(7, strict=False),
                         [None, None, None, 6, 5, 4, 3])

        #raises an error when try to insert something at non-existing indices
        self.assertRaises(IndexError, c.toPartners, 0)

    def test_toVienna(self):
        """Pairs toVienna() should return a ViennaStructure if possible"""
        a = Pairs([(1, 5), (3, 4), (6, 9), (7, 8)])  #normal
        b = Pairs([(0, 4), (2, 6)])  #pseudoknot
        c = Pairs([(1, 6), (3, 6), (4, 5)])  #conflict
        d = Pairs([(1, 6), (3, None)])
        e = Pairs([(1, 9), (8, 2), (7, 3)])  #not directed
        f = Pairs([(1, 6), (2, 5), (10, 15), (14, 11)])  # not directed

        self.assertEqual(a.toVienna(10), '.(.())(())')
        self.assertEqual(a.toVienna(13, offset=3), '....(.())(())')

        self.assertRaises(PairError, b.toVienna, 7)  #pseudoknot NOT accepted
        self.assertRaises(Exception, b.toVienna, 7)  #old test for exception
        self.assertRaises(ValueError, c.toVienna, 7)

        #pairs containging None are being skipped
        self.assertEqual(d.toVienna(7), '.(....)')

        #raises error when trying to insert at non-existing indices
        self.assertRaises(IndexError, a.toVienna, 3)

        self.assertEqual(Pairs().toVienna(3), '...')

        #test when parsing in the sequence
        self.assertEqual(a.toVienna('ACGUAGCUAG'), '.(.())(())')
        self.assertEqual(a.toVienna(Rna('AACCGGUUAGCUA'), offset=3),\
            '....(.())(())')

        self.assertEqual(e.toVienna(10), '.(((...)))')
        self.assertEqual(f.toVienna(20), '.((..))...((..))....')

    def test_tuples(self):
        """Pairs tuples() should transform the elements of list to tuples"""
        x = Pairs([])
        x.tuples()
        assert x == []

        x = Pairs([[1, 2], [3, 4]])
        x.tuples()
        assert x == [(1, 2), (3, 4)]

        x = Pairs([(1, 2), (3, 4)])
        x.tuples()
        assert x == [(1, 2), (3, 4)]
        assert x != [[1, 2], [3, 4]]

    def test_unique(self):
        """Pairs unique() should remove double occurences of certain tuples"""
        self.assertEqual(self.Empty.unique(), [])
        self.assertEqual(self.MoreTuples.unique(), self.MoreTuples)
        self.assertEqual(self.Doubles.unique(), Pairs([(1, 2), (2, 3),
                                                       (1, 3)]))

    def test_directed(self):
        """Pairs directed() should change all pairs so that a<b in (a,b)"""
        self.assertEqual(self.Empty.directed(), [])
        res = self.Undirected.directed()
        res.sort()
        self.assertEqual(res, Pairs([(1, 2), (1, 7), (3, 8), (4, 6)]))
        res = self.UndirectedNone.directed()
        self.assertEqual(res, Pairs([]))
        res = self.UndirectedDouble.directed()
        self.assertEqual(res, Pairs([(1, 2)]))

    def test_symmetric(self):
        """Pairs symmetric() should add (down,up) for each (up,down)"""
        self.assertEqual(self.Empty.symmetric(), [])
        self.assertEqualItems(self.OneTuple.symmetric(), [(2, 1), (1, 2)])
        self.assertEqualItems(
            Pairs([(1, 2), (1, 2)]).symmetric(), [(1, 2), (2, 1)])
        self.assertEqualItems(Pairs([(1,2),(3,4)]).symmetric(),\
        [(1,2),(2,1),(3,4),(4,3)])
        self.assertEqualItems(Pairs([(1, None)]).symmetric(), [])

    def test_paired(self):
        """Pairs paired() should omit all pairs containing None"""
        self.assertEqual(self.Empty.paired(), [])
        self.assertEqual(Pairs([(1,2),(2,None),(None,3),(None,None)]).paired()\
        ,[(1,2)])

    def test_hasConflicts(self):
        """Pairs hasConflicts() should return True if there are conflicts"""
        assert not self.Empty.hasConflicts()
        assert not Pairs([(1, 2), (3, 4)]).hasConflicts()
        assert Pairs([(1, 2), (2, 3)]).hasConflicts()
        assert Pairs([(1, 2), (2, None)]).hasConflicts()

    def test_mismatches(self):
        """Pairs mismatches() should return #pairs that can't be formed"""
        # with plain string
        self.assertEqual(Pairs([(0, 1)]).mismatches('AC', {}), 1)
        self.assertEqual(
            Pairs([(0, 1)]).mismatches('AC', {('A', 'C'): None}), 0)
        self.assertEqual(
            Pairs([(0, 1)]).mismatches('AC', {('A', 'G'): None}), 1)
        self.assertEqual(Pairs([(0,1),(2,3),(3,1)]).\
        mismatches('ACGU',{('A','U'):None}),3)

        # using sequence with alphabet
        sequence = Rna('ACGUA')
        self.assertEqual(
            Pairs([(0, 1), (0, 4), (0, 3)]).mismatches(sequence), 2)

    def test_hasPseudoknots(self):
        """Pairs hasPseudoknots() should return True if there's a pseudoknot"""

        assert not self.NoPseudo.hasPseudoknots()
        assert not self.NoPseudo2.hasPseudoknots()
        #add tests for ((.))() etc
        assert self.p0.hasPseudoknots()
        assert self.p1.hasPseudoknots()
        assert self.p2.hasPseudoknots()
        assert self.p3.hasPseudoknots()
        assert self.p4.hasPseudoknots()
        assert self.p5.hasPseudoknots()
        assert self.p6.hasPseudoknots()
        assert self.p7.hasPseudoknots()
Beispiel #4
0
class PairsTests(TestCase):
    """Tests for Pairs object"""

    def setUp(self):
        """Pairs SetUp method for all tests"""
        self.Empty = Pairs([])
        self.OneList = Pairs([[1,2]])
        self.OneTuple = Pairs([(1,2)])
        self.MoreLists = Pairs([[2,4],[3,9],[6,36],[7,49]])
        self.MoreTuples = Pairs([(2,4),(3,9),(6,36),(7,49)])
        self.MulNoOverlap = Pairs([(1,10),(2,9),(3,7),(4,12)])
        self.MulOverlap = Pairs([(1,2),(2,3)])
        self.Doubles = Pairs([[1,2],[1,2],[2,3],[1,3]])
        self.Undirected = Pairs([(2,1),(6,4),(1,7),(8,3)])
        self.UndirectedNone = Pairs([(5,None),(None,3)])
        self.UndirectedDouble = Pairs([(2,1),(1,2)])
    
        self.NoPseudo = Pairs([(1,20),(2,19),(3,7),(4,6),(10,15),(11,14)])
        self.NoPseudo2 = Pairs([(1,3),(4,6)])
        #((.(.)).)
        self.p0 = Pairs([(0,6),(1,5),(3,8)])
        #(.((..(.).).))
        self.p1 = Pairs([(0,9),(2,12),(3,10),(5,7)])
        #((.(.(.).)).)
        self.p2 = Pairs([(0,10),(1,9),(3,12),(5,7)])
        #((.((.(.)).).))
        self.p3 = Pairs([(0,9),(1,8),(3,14),(4,13),(6,11)])
        #(.(((.((.))).)).(((.((((..))).)))).)
        self.p4 = Pairs([(0,35),(2,11),(3,10),(4,9),(6,14),(7,13),(16,28),\
            (17,27),(18,26),(20,33),(21,32),(22,31),(23,30)])
        #(.((.).))
        self.p5 = Pairs([(0,5),(2,8),(3,7)])
        self.p6 = Pairs([(0,19),(2,6),(3,5),(8,14),(9,13),(10,12),\
            (16,22),(17,21)])
        self.p7 = Pairs([(0,20),(2,6),(3,5),(8,14),(9,10),(11,16),(12,15),\
            (17,23),(18,22)])

         
    def test_init(self):
        """Pairs should initalize with both lists and tuples"""
        self.assertEqual(self.Empty,[])
        self.assertEqual(self.OneList,[[1,2]])
        self.assertEqual(self.OneTuple,[(1,2)])
        self.assertEqual(self.MulNoOverlap,[(1,10),(2,9),(3,7),(4,12)])
        self.assertEqual(self.MulOverlap,[(1,2),(2,3)])

    def test_toPartners(self):
        """Pairs toPartners() should return a Partners object"""
        a = Pairs([(1,5),(3,4),(6,9),(7,8)]) #normal
        b = Pairs([(0,4),(2,6)]) #pseudoknot
        c = Pairs([(1,6),(3,6),(4,5)]) #conflict

        self.assertEqual(a.toPartners(10),[None,5,None,4,3,1,9,8,7,6])
        self.assertEqual(a.toPartners(13,3),\
        [None,None,None,None,8,None,7,6,4,12,11,10,9])
        assert isinstance(a.toPartners(10),Partners)
        self.assertEqual(b.toPartners(7),[4,None,6,None,0,None,2])
        self.assertRaises(ValueError,c.toPartners,7)
        self.assertEqual(c.toPartners(7,strict=False),[None,None,None,6,5,4,3])

        #raises an error when try to insert something at non-existing indices
        self.assertRaises(IndexError,c.toPartners,0)

    def test_toVienna(self):
        """Pairs toVienna() should return a ViennaStructure if possible"""
        a = Pairs([(1,5),(3,4),(6,9),(7,8)]) #normal
        b = Pairs([(0,4),(2,6)]) #pseudoknot
        c = Pairs([(1,6),(3,6),(4,5)]) #conflict
        d = Pairs([(1,6),(3,None)])
        e = Pairs([(1,9),(8,2),(7,3)]) #not directed
        f = Pairs([(1,6),(2,5),(10,15),(14,11)]) # not directed

        self.assertEqual(a.toVienna(10),'.(.())(())')
        self.assertEqual(a.toVienna(13,offset=3),'....(.())(())')
        
        self.assertRaises(PairError,b.toVienna,7) #pseudoknot NOT accepted
        self.assertRaises(Exception,b.toVienna,7) #old test for exception
        self.assertRaises(ValueError,c.toVienna,7)
        
        #pairs containging None are being skipped
        self.assertEquals(d.toVienna(7),'.(....)')
        
        #raises error when trying to insert at non-existing indices
        self.assertRaises(IndexError,a.toVienna,3)

        self.assertEqual(Pairs().toVienna(3),'...')
        
        #test when parsing in the sequence
        self.assertEqual(a.toVienna('ACGUAGCUAG'),'.(.())(())')
        self.assertEqual(a.toVienna(Rna('AACCGGUUAGCUA'), offset=3),\
            '....(.())(())')
       
        self.assertEqual(e.toVienna(10),'.(((...)))')
        self.assertEqual(f.toVienna(20),'.((..))...((..))....')

    def test_tuples(self):
        """Pairs tuples() should transform the elements of list to tuples"""
        x = Pairs([])
        x.tuples()
        assert x == []
        
        x = Pairs([[1,2],[3,4]])
        x.tuples()
        assert x == [(1,2),(3,4)]
        
        x = Pairs([(1,2),(3,4)])
        x.tuples()
        assert x == [(1,2),(3,4)]
        assert x != [[1,2],[3,4]]

    def test_unique(self):
        """Pairs unique() should remove double occurences of certain tuples"""
        self.assertEqual(self.Empty.unique(),[])
        self.assertEqual(self.MoreTuples.unique(),self.MoreTuples)
        self.assertEqual(self.Doubles.unique(),Pairs([(1,2),(2,3),(1,3)]))

    def test_directed(self):
        """Pairs directed() should change all pairs so that a<b in (a,b)"""
        self.assertEqual(self.Empty.directed(),[])
        res = self.Undirected.directed()
        res.sort()
        self.assertEqual(res,Pairs([(1,2),(1,7),(3,8),(4,6)]))
        res = self.UndirectedNone.directed()
        self.assertEqual(res,Pairs([]))
        res = self.UndirectedDouble.directed()
        self.assertEqual(res,Pairs([(1,2)]))

    def test_symmetric(self):
        """Pairs symmetric() should add (down,up) for each (up,down)"""
        self.assertEqual(self.Empty.symmetric(),[])
        self.assertEqualItems(self.OneTuple.symmetric(),[(2,1),(1,2)])
        self.assertEqualItems(Pairs([(1,2),(1,2)]).symmetric(),[(1,2),(2,1)])
        self.assertEqualItems(Pairs([(1,2),(3,4)]).symmetric(),\
        [(1,2),(2,1),(3,4),(4,3)])
        self.assertEqualItems(Pairs([(1,None)]).symmetric(),[])

    def test_paired(self):
        """Pairs paired() should omit all pairs containing None"""
        self.assertEqual(self.Empty.paired(),[])
        self.assertEqual(Pairs([(1,2),(2,None),(None,3),(None,None)]).paired()\
        ,[(1,2)])

    def test_hasConflicts(self):
        """Pairs hasConflicts() should return True if there are conflicts"""
        assert not self.Empty.hasConflicts()
        assert not Pairs([(1,2),(3,4)]).hasConflicts()
        assert Pairs([(1,2),(2,3)]).hasConflicts()
        assert Pairs([(1,2),(2,None)]).hasConflicts()

    def test_mismatches(self):
        """Pairs mismatches() should return #pairs that can't be formed"""
        # with plain string
        self.assertEqual(Pairs([(0,1)]).mismatches('AC',{}),1)
        self.assertEqual(Pairs([(0,1)]).mismatches('AC',{('A','C'):None}),0)
        self.assertEqual(Pairs([(0,1)]).mismatches('AC',{('A','G'):None}),1)
        self.assertEqual(Pairs([(0,1),(2,3),(3,1)]).\
        mismatches('ACGU',{('A','U'):None}),3)

        # using sequence with alphabet
        sequence = Rna('ACGUA')
        self.assertEqual(Pairs([(0,1),(0,4),(0,3)]).mismatches(sequence),2)

    def test_hasPseudoknots(self):
        """Pairs hasPseudoknots() should return True if there's a pseudoknot"""
                
        assert not self.NoPseudo.hasPseudoknots()
        assert not self.NoPseudo2.hasPseudoknots()
        #add tests for ((.))() etc
        assert self.p0.hasPseudoknots()
        assert self.p1.hasPseudoknots() 
        assert self.p2.hasPseudoknots()
        assert self.p3.hasPseudoknots()
        assert self.p4.hasPseudoknots()
        assert self.p5.hasPseudoknots()
        assert self.p6.hasPseudoknots()
        assert self.p7.hasPseudoknots()