def fold_exterior_loop(seq, con, exterior_only=True): """ Constrained folding The default behavior is "exterior_only", which replaces all constrained helices with short 'NNN' stretches at the sequence level. This reduces the sequence length (n) and therefore the runtime O(n^3) :param seq: RNA sequence :param con: constraint :param exterior_only: only fold the extior loop region :return: secondary structure """ if exterior_only: spacer = 'NNN' pt = ril.make_pair_table(con, base=0) ext = '' # shrink the sequcnes skip = 0 for i, j in enumerate(pt): if i < skip: continue if j == -1: ext += seq[i] else: ext += spacer skip = j + 1 css, cfe = RNA.fold(ext) # replace characters in constraint c, skip = 0, 0 for i, j in enumerate(pt): if i < skip: continue if j == -1: con = con[:i] + css[c] + con[i + 1:] c += 1 else: c += len(spacer) skip = j + 1 ss = con else: # Force copy of string for ViennaRNA swig interface bug tmp = (con + '.')[:-1] RNA.cvar.fold_constrained = 1 ss, mfe = RNA.fold(seq, tmp) RNA.cvar.fold_constrained = 0 return ss
def open_breathing_helices(seq, ss, free=6): """ open all breathable helices, i.e. those that share a base-pair with an exterior loop region """ nbrs = set() pt = ril.make_pair_table(ss, base=0) # mutable secondary structure nbr = list(ss) rec_fill_nbrs(nbrs, ss, nbr, pt, (0, len(ss)), free) nbrs.add(''.join(nbr)) return nbrs
def aptamer_energy( seq, ss, verb=False, # Default Theophylline apt='GAUACCAG' + '&' + 'CCCUUGGCAGC', poc='(...((((' + '&' + ')...)))...)', bfe=-8.86): # at 25*C; -9.22 at 37*C """ Check if a sequence/structure pair contains the ligand binding pocket (apt/poc). If so, return the binding free energy (bfe), otherwise return 0. Multiple pockets will return bfe multiple times! TODO: allow hairpin pockets (e.g. tetracycline) """ [aptL, aptR] = apt.split('&') [pocL, pocR] = poc.split('&') patL = re.compile(aptL) patR = re.compile(aptR) sites = 0 for mL in patL.finditer(seq): if pocL != ss[mL.start():mL.end()]: continue for mR in patR.finditer(seq): if mR.start() < mL.end(): continue if pocR != ss[mR.start():mR.end()]: continue # Now make sure that these are really base-pairs ptable = ril.make_pair_table(ss, base=0) if mL.start() == ptable[mR.end() - 1] and \ mL.end() - 1 == ptable[mR.start()] and \ mR.start() == ptable[mL.end() - 1] and \ mR.end() - 1 == ptable[mL.start()]: # if verb : # print >> sys.stderr, "{:s} {:6.2f}".format(ss, bfe) sites += 1 return bfe * sites
def aptamer_energy(seq, ss, verb=False, # Default Theophylline apt='GAUACCAG' + '&' + 'CCCUUGGCAGC', poc='(...((((' + '&' + ')...)))...)', bfe=-8.86): # at 25*C; -9.22 at 37*C """ Check if a sequence/structure pair contains the ligand binding pocket (apt/poc). If so, return the binding free energy (bfe), otherwise return 0. Multiple pockets will return bfe multiple times! TODO: allow hairpin pockets (e.g. tetracycline) """ [aptL, aptR] = apt.split('&') [pocL, pocR] = poc.split('&') patL = re.compile(aptL) patR = re.compile(aptR) sites = 0 for mL in patL.finditer(seq): if pocL != ss[mL.start():mL.end()]: continue for mR in patR.finditer(seq): if mR.start() < mL.end(): continue if pocR != ss[mR.start():mR.end()]: continue # Now make sure that these are really base-pairs ptable = ril.make_pair_table(ss, base=0) if mL.start() == ptable[mR.end() - 1] and \ mL.end() - 1 == ptable[mR.start()] and \ mR.start() == ptable[mL.end() - 1] and \ mR.end() - 1 == ptable[mL.start()]: # if verb : # print >> sys.stderr, "{:s} {:6.2f}".format(ss, bfe) sites += 1 return bfe * sites
def fold_exterior_loop(md, seq, con, ext_moves): """ Constrained folding of the exterior loop. All constrained helices are replaced with the motif: NNNNNNN ((xxx)) for example a helix with the closing-stack CG-UG: CG ~ UG -> CGNNNUG (( ~ )) -> ((xxx)) This reduces the sequence length (n) and therefore the runtime O(n^3), and it enables the identification of independent structures with the same exterior loop features. Args: md (RNA.md()): ViennaRNA model details (temperature, noLP, etc.) seq (str): RNA sequence con (str): RNA structure constraint ext_moves (dict()): Dictionary storing all mappings from exterior-loop constraints (features) to parents. Returns: (str, str): """ spacer = 'NNN' pt = ril.make_pair_table(con, base=0) ext_seq = '' ext_con = '' # shrink the sequences skip = 0 for i, j in enumerate(pt): if i < skip: continue if j == -1: ext_seq += seq[i] ext_con += '.' else: ext_seq += seq[i] + seq[i + 1] ext_seq += spacer ext_seq += seq[j - 1] + seq[j] ext_con += '((' ext_con += 'x' * len(spacer) ext_con += '))' skip = j + 1 # If we have seen this exterior loop before, then we don't need to # calculate again, and we have to trace back if the parents are connected. if ext_seq in ext_moves: css = ext_moves[ext_seq][1] else: fc_tmp = RNA.fold_compound(ext_seq, md) fc_tmp.constraints_add( ext_con, RNA.CONSTRAINT_DB_DEFAULT | RNA.CONSTRAINT_DB_ENFORCE_BP) css, cfe = fc_tmp.mfe() ext_moves[ext_seq] = [set(), css] del fc_tmp # replace characters in constraint c, skip = 0, 0 for i, j in enumerate(pt): if i < skip: continue if j == -1: con = con[:i] + css[c] + con[i + 1:] c += 1 else: c += len(spacer) + 4 skip = j + 1 ss = con return ss, ext_seq