def test_adjust_base_None(self): """adjust_base: should keep Nones or duplicates, ignore conflicts""" pairs = Pairs([(2,8),(3,7),(6,None),(None,None),(2,10)]) expected = Pairs([(1,7),(2,6),(5,None),(None, None),(1,9)]) self.assertEqual(adjust_base(pairs,-1), expected) p = Pairs([(1,2),(2,1),(1,2),(2,None)]) self.assertEqual(adjust_base(p, 1), [(2,3),(3,2),(2,3),(3,None)])
def ct_parser(lines=None): """Ct format parser Takes lines from a ct file as input Returns a list containing sequence,structure and if available the energy. [[seq1,[struct1],energy1],[seq2,[struct2],energy2],...] """ count = 0 length = '' energy = None seq = '' struct = [] result = [] for line in lines: count+=1 sline = line.split(None,6) #sline = split line if count==1 or new_struct(line):#first line or new struct line. if count > 1: struct = adjust_base(struct,-1) struct = Pairs(struct).directed() struct.sort() if energy is not None: result.append([seq,struct,energy]) energy = None else: result.append([seq,pairs]) struct = [] seq = '' #checks if energy for predicted struct is given if sline.__contains__('dG') or sline.__contains__('ENERGY'): energy = atof(sline[3]) if sline.__contains__('Structure'): energy = atof(sline[2]) else: seq = ''.join([seq,sline[1]]) if not int(sline[4]) == 0:#unpaired base pair = ( int(sline[0]),int(sline[4]) ) struct.append(pair) #structs are one(1) based, adjust to zero based struct = adjust_base(struct,-1) struct = Pairs(struct).directed() struct.sort() if energy is not None: result.append([seq,struct,energy]) else: result.append([seq,struct]) return result
def column_parser(lines): """Parser column format""" record = False result = [] struct = [] seq = '' for line in lines: if line.startswith('; ------'): #structure part beginns record = True continue if line.startswith('; ******'): #structure part ends record = False struct = adjust_base(struct,-1) struct = Pairs(struct).directed()#remove duplicates struct.sort() result.append([seq,struct]) struct = [] seq = '' continue if record: sline = line.split() if sline[4] == '.': #skip not paired seq = ''.join([seq,sline[1]]) continue seq = ''.join([seq,sline[1]]) pair = (int(sline[3]),int(sline[4])) #(alignpos,align_bp) struct.append(pair) return result
def test_adjust_base(self): """adjust_base: should work for pairs object or list of pairs""" p = Pairs() self.assertEqual(adjust_base(p, 10), []) pairs = [(1, 21), (2, 15), (3, 13), (4, 11), (5, 10), (6, 9)] offset = -1 expected = [(0, 20), (1, 14), (2, 12), (3, 10), (4, 9), (5, 8)] obs_pairs = adjust_base(pairs, offset) self.assertEqual(obs_pairs, expected) pairs = Pairs([(0, 10), (1, 9)]) self.assertEqual(adjust_base(pairs, -1), Pairs([(-1, 9), (0, 8)])) self.assertEqual(adjust_base(pairs, 5), Pairs([(5, 15), (6, 14)])) self.assertRaises(PairsAdjustmentError, adjust_base, pairs, 3.5)
def column_parser(lines): """Parser column format""" record = False result = [] struct = [] seq = '' for line in lines: if line.startswith('; ------'): #structure part beginns record = True continue if line.startswith('; ******'): #structure part ends record = False struct = adjust_base(struct, -1) struct = Pairs(struct).directed() #remove duplicates struct.sort() result.append([seq, struct]) struct = [] seq = '' continue if record: sline = line.split() if sline[4] == '.': #skip not paired seq = ''.join([seq, sline[1]]) continue seq = ''.join([seq, sline[1]]) pair = (int(sline[3]), int(sline[4])) #(alignpos,align_bp) struct.append(pair) return result
def test_adjust_base(self): """adjust_base: should work for pairs object or list of pairs""" p = Pairs() self.assertEqual(adjust_base(p,10),[]) pairs = [(1,21),(2,15),(3,13),(4,11),(5,10),(6,9)] offset = -1 expected = [(0,20),(1,14),(2,12),(3,10),(4,9),(5,8)] obs_pairs = adjust_base(pairs, offset) self.assertEqual(obs_pairs, expected) pairs = Pairs([(0,10),(1,9)]) self.assertEqual(adjust_base(pairs, -1), Pairs([(-1,9),(0,8)])) self.assertEqual(adjust_base(pairs, 5), Pairs([(5,15),(6,14)])) self.assertRaises(PairsAdjustmentError, adjust_base, pairs, 3.5)
def ilm_parser(lines=None, pseudo=True): """Ilm format parser Takes lines as input and returns a list with Pairs object. Pseudo - if True returns pairs with possible pseudoknot if False removes pseudoknots """ pairs = [] for line in lines: if line.startswith('Final') or len(line) == 1: #skip these lines continue line = line.strip('\n') line = map(int, line.split(None, 2)) if line[1] == 0: continue #Skip this line, not a pair else: pairs.append(line) pairs = adjust_base(pairs, -1) tmp = Pairs(pairs).directed() tmp.sort() if not pseudo: tmp = opt_single_random(tmp) tmp.sort() result = [] result.append(tmp) return result
def ilm_parser(lines=None,pseudo=True): """Ilm format parser Takes lines as input and returns a list with Pairs object. Pseudo - if True returns pairs with possible pseudoknot if False removes pseudoknots """ pairs = [] for line in lines: if line.startswith('Final') or len(line)==1:#skip these lines continue line = line.strip('\n') line = map(int,line.split(None,2)) if line[1] == 0: continue #Skip this line, not a pair else: pairs.append(line) pairs = adjust_base(pairs,-1) tmp = Pairs(pairs).directed() tmp.sort() if not pseudo: tmp = opt_single_random(tmp) tmp.sort() result = [] result.append(tmp) return result