Beispiel #1
0
    def test_adjust_base_None(self):
        """adjust_base: should keep Nones or duplicates, ignore conflicts"""
        pairs = Pairs([(2,8),(3,7),(6,None),(None,None),(2,10)])
        expected = Pairs([(1,7),(2,6),(5,None),(None, None),(1,9)])
        self.assertEqual(adjust_base(pairs,-1), expected)

        p = Pairs([(1,2),(2,1),(1,2),(2,None)])
        self.assertEqual(adjust_base(p, 1), [(2,3),(3,2),(2,3),(3,None)])
Beispiel #2
0
def ct_parser(lines=None):
    """Ct format parser

    Takes lines from a ct file as input
    
    Returns a list containing sequence,structure and if available the energy.
    [[seq1,[struct1],energy1],[seq2,[struct2],energy2],...]
    """

    count = 0
    length = ''
    energy = None
    seq = ''
    struct = []
    result = []

    for line in lines:
        count+=1
        sline = line.split(None,6) #sline = split line
        if count==1 or new_struct(line):#first line or new struct line.
            if count > 1:
                struct = adjust_base(struct,-1)
                struct = Pairs(struct).directed()
                struct.sort()
                if energy is not None:
                    result.append([seq,struct,energy])
                    energy = None
                else:
                    result.append([seq,pairs])
                struct = []
                seq = ''
            #checks if energy for predicted struct is given
            if sline.__contains__('dG') or sline.__contains__('ENERGY'):
                energy = atof(sline[3])
            if sline.__contains__('Structure'):
                energy = atof(sline[2])
        else:
            seq = ''.join([seq,sline[1]])
            if not int(sline[4]) == 0:#unpaired base
                pair = ( int(sline[0]),int(sline[4]) )
                struct.append(pair) 
    #structs are one(1) based, adjust to zero based
    struct = adjust_base(struct,-1)
    struct = Pairs(struct).directed()
    struct.sort()

    if energy is not None:
        result.append([seq,struct,energy])
    else:
        result.append([seq,struct])
    return result 
Beispiel #3
0
def ct_parser(lines=None):
    """Ct format parser

    Takes lines from a ct file as input
    
    Returns a list containing sequence,structure and if available the energy.
    [[seq1,[struct1],energy1],[seq2,[struct2],energy2],...]
    """

    count = 0
    length = ''
    energy = None
    seq = ''
    struct = []
    result = []

    for line in lines:
        count+=1
        sline = line.split(None,6) #sline = split line
        if count==1 or new_struct(line):#first line or new struct line.
            if count > 1:
                struct = adjust_base(struct,-1)
                struct = Pairs(struct).directed()
                struct.sort()
                if energy is not None:
                    result.append([seq,struct,energy])
                    energy = None
                else:
                    result.append([seq,pairs])
                struct = []
                seq = ''
            #checks if energy for predicted struct is given
            if sline.__contains__('dG') or sline.__contains__('ENERGY'):
                energy = atof(sline[3])
            if sline.__contains__('Structure'):
                energy = atof(sline[2])
        else:
            seq = ''.join([seq,sline[1]])
            if not int(sline[4]) == 0:#unpaired base
                pair = ( int(sline[0]),int(sline[4]) )
                struct.append(pair) 
    #structs are one(1) based, adjust to zero based
    struct = adjust_base(struct,-1)
    struct = Pairs(struct).directed()
    struct.sort()

    if energy is not None:
        result.append([seq,struct,energy])
    else:
        result.append([seq,struct])
    return result 
Beispiel #4
0
def column_parser(lines):
    """Parser column format"""

    record = False
    result = []
    struct = []
    seq = ''
    for line in lines:
        if line.startswith('; ------'): #structure part beginns
            record = True
            continue
        if line.startswith('; ******'): #structure part ends
            record = False
            struct =  adjust_base(struct,-1)
            struct = Pairs(struct).directed()#remove duplicates
            struct.sort()

            result.append([seq,struct])
            struct = []
            seq = ''
            continue
        if record:
            sline = line.split()
            if sline[4] == '.': #skip not paired
                seq = ''.join([seq,sline[1]])
                continue
            seq = ''.join([seq,sline[1]])
            pair = (int(sline[3]),int(sline[4])) #(alignpos,align_bp)
            struct.append(pair)
        
    return result
Beispiel #5
0
    def test_adjust_base(self):
        """adjust_base: should work for pairs object or list of pairs"""
        p = Pairs()
        self.assertEqual(adjust_base(p, 10), [])

        pairs = [(1, 21), (2, 15), (3, 13), (4, 11), (5, 10), (6, 9)]
        offset = -1
        expected = [(0, 20), (1, 14), (2, 12), (3, 10), (4, 9), (5, 8)]
        obs_pairs = adjust_base(pairs, offset)
        self.assertEqual(obs_pairs, expected)

        pairs = Pairs([(0, 10), (1, 9)])
        self.assertEqual(adjust_base(pairs, -1), Pairs([(-1, 9), (0, 8)]))
        self.assertEqual(adjust_base(pairs, 5), Pairs([(5, 15), (6, 14)]))

        self.assertRaises(PairsAdjustmentError, adjust_base, pairs, 3.5)
Beispiel #6
0
def column_parser(lines):
    """Parser column format"""

    record = False
    result = []
    struct = []
    seq = ''
    for line in lines:
        if line.startswith('; ------'):  #structure part beginns
            record = True
            continue
        if line.startswith('; ******'):  #structure part ends
            record = False
            struct = adjust_base(struct, -1)
            struct = Pairs(struct).directed()  #remove duplicates
            struct.sort()

            result.append([seq, struct])
            struct = []
            seq = ''
            continue
        if record:
            sline = line.split()
            if sline[4] == '.':  #skip not paired
                seq = ''.join([seq, sline[1]])
                continue
            seq = ''.join([seq, sline[1]])
            pair = (int(sline[3]), int(sline[4]))  #(alignpos,align_bp)
            struct.append(pair)

    return result
Beispiel #7
0
    def test_adjust_base(self):
        """adjust_base: should work for pairs object or list of pairs"""
        p = Pairs()
        self.assertEqual(adjust_base(p,10),[])

        pairs = [(1,21),(2,15),(3,13),(4,11),(5,10),(6,9)]
        offset = -1
        expected = [(0,20),(1,14),(2,12),(3,10),(4,9),(5,8)]
        obs_pairs = adjust_base(pairs, offset)
        self.assertEqual(obs_pairs, expected)
        
        pairs = Pairs([(0,10),(1,9)])
        self.assertEqual(adjust_base(pairs, -1), Pairs([(-1,9),(0,8)]))
        self.assertEqual(adjust_base(pairs, 5), Pairs([(5,15),(6,14)]))

        self.assertRaises(PairsAdjustmentError, adjust_base, pairs, 3.5)
Beispiel #8
0
def ilm_parser(lines=None, pseudo=True):
    """Ilm format parser

    Takes lines as input and returns a list with Pairs object.
    Pseudo - if True returns pairs with possible pseudoknot
             if False removes pseudoknots       
    """
    pairs = []
    for line in lines:
        if line.startswith('Final') or len(line) == 1:  #skip these lines
            continue
        line = line.strip('\n')
        line = map(int, line.split(None, 2))
        if line[1] == 0:
            continue  #Skip this line, not a pair
        else:
            pairs.append(line)

    pairs = adjust_base(pairs, -1)
    tmp = Pairs(pairs).directed()
    tmp.sort()
    if not pseudo:
        tmp = opt_single_random(tmp)
        tmp.sort()
    result = []
    result.append(tmp)

    return result
Beispiel #9
0
def ilm_parser(lines=None,pseudo=True):
    """Ilm format parser

    Takes lines as input and returns a list with Pairs object.
    Pseudo - if True returns pairs with possible pseudoknot
             if False removes pseudoknots       
    """
    pairs = []
    for line in lines:
        if line.startswith('Final') or len(line)==1:#skip these lines
            continue
        line = line.strip('\n')
        line = map(int,line.split(None,2))
        if line[1] == 0:
            continue #Skip this line, not a pair
        else:
            pairs.append(line) 

    pairs = adjust_base(pairs,-1)
    tmp = Pairs(pairs).directed()
    tmp.sort()
    if not pseudo:
        tmp = opt_single_random(tmp)
        tmp.sort()
    result = []
    result.append(tmp)

    return result