Ejemplo n.º 1
0
def fold_exterior_loop(seq, con, exterior_only=True):
    """ Constrained folding 
  
  The default behavior is "exterior_only", which replaces all constrained
  helices with short 'NNN' stretches at the sequence level. This reduces 
  the sequence length (n) and therefore the runtime O(n^3)
  
  :param seq: RNA sequence
  :param con: constraint
  :param exterior_only: only fold the extior loop region

  :return: secondary structure
  """

    if exterior_only:
        spacer = 'NNN'
        pt = ril.make_pair_table(con, base=0)
        ext = ''

        # shrink the sequcnes
        skip = 0
        for i, j in enumerate(pt):
            if i < skip: continue
            if j == -1:
                ext += seq[i]
            else:
                ext += spacer
                skip = j + 1
        css, cfe = RNA.fold(ext)

        # replace characters in constraint
        c, skip = 0, 0
        for i, j in enumerate(pt):
            if i < skip: continue
            if j == -1:
                con = con[:i] + css[c] + con[i + 1:]
                c += 1
            else:
                c += len(spacer)
                skip = j + 1
        ss = con

    else:
        # Force copy of string for ViennaRNA swig interface bug
        tmp = (con + '.')[:-1]
        RNA.cvar.fold_constrained = 1
        ss, mfe = RNA.fold(seq, tmp)
        RNA.cvar.fold_constrained = 0

    return ss
Ejemplo n.º 2
0
def open_breathing_helices(seq, ss, free=6):
    """ open all breathable helices, i.e. those that share a base-pair
      with an exterior loop region
    """
    nbrs = set()
    pt = ril.make_pair_table(ss, base=0)

    # mutable secondary structure
    nbr = list(ss)

    rec_fill_nbrs(nbrs, ss, nbr, pt, (0, len(ss)), free)

    nbrs.add(''.join(nbr))

    return nbrs
Ejemplo n.º 3
0
def open_breathing_helices(seq, ss, free=6):
    """ open all breathable helices, i.e. those that share a base-pair
      with an exterior loop region
    """
    nbrs = set()
    pt = ril.make_pair_table(ss, base=0)

    # mutable secondary structure
    nbr = list(ss)

    rec_fill_nbrs(nbrs, ss, nbr, pt, (0, len(ss)), free)

    nbrs.add(''.join(nbr))

    return nbrs
Ejemplo n.º 4
0
def aptamer_energy(
        seq,
        ss,
        verb=False,
        # Default Theophylline
        apt='GAUACCAG' + '&' + 'CCCUUGGCAGC',
        poc='(...((((' + '&' + ')...)))...)',
        bfe=-8.86):  # at 25*C; -9.22 at 37*C
    """
      Check if a sequence/structure pair contains the
      ligand binding pocket (apt/poc). If so, return the
      binding free energy (bfe), otherwise return 0.
      Multiple pockets will return bfe multiple times!

      TODO: allow hairpin pockets (e.g. tetracycline)
    """

    [aptL, aptR] = apt.split('&')
    [pocL, pocR] = poc.split('&')

    patL = re.compile(aptL)
    patR = re.compile(aptR)

    sites = 0
    for mL in patL.finditer(seq):
        if pocL != ss[mL.start():mL.end()]:
            continue
        for mR in patR.finditer(seq):
            if mR.start() < mL.end():
                continue
            if pocR != ss[mR.start():mR.end()]:
                continue
            # Now make sure that these are really base-pairs
            ptable = ril.make_pair_table(ss, base=0)
            if mL.start() == ptable[mR.end() - 1] and \
                    mL.end() - 1 == ptable[mR.start()] and \
                    mR.start() == ptable[mL.end() - 1] and \
                    mR.end() - 1 == ptable[mL.start()]:
                # if verb :
                #  print >> sys.stderr, "{:s} {:6.2f}".format(ss, bfe)
                sites += 1
    return bfe * sites
Ejemplo n.º 5
0
def aptamer_energy(seq, ss, verb=False,
                   # Default Theophylline
                   apt='GAUACCAG' + '&' + 'CCCUUGGCAGC',
                   poc='(...((((' + '&' + ')...)))...)',
                   bfe=-8.86):  # at 25*C; -9.22 at 37*C
    """
      Check if a sequence/structure pair contains the
      ligand binding pocket (apt/poc). If so, return the
      binding free energy (bfe), otherwise return 0.
      Multiple pockets will return bfe multiple times!

      TODO: allow hairpin pockets (e.g. tetracycline)
    """

    [aptL, aptR] = apt.split('&')
    [pocL, pocR] = poc.split('&')

    patL = re.compile(aptL)
    patR = re.compile(aptR)

    sites = 0
    for mL in patL.finditer(seq):
        if pocL != ss[mL.start():mL.end()]:
            continue
        for mR in patR.finditer(seq):
            if mR.start() < mL.end():
                continue
            if pocR != ss[mR.start():mR.end()]:
                continue
            # Now make sure that these are really base-pairs
            ptable = ril.make_pair_table(ss, base=0)
            if mL.start() == ptable[mR.end() - 1] and \
                    mL.end() - 1 == ptable[mR.start()] and \
                    mR.start() == ptable[mL.end() - 1] and \
                    mR.end() - 1 == ptable[mL.start()]:
                # if verb :
                #  print >> sys.stderr, "{:s} {:6.2f}".format(ss, bfe)
                sites += 1
    return bfe * sites
Ejemplo n.º 6
0
def fold_exterior_loop(md, seq, con, ext_moves):
    """ Constrained folding of the exterior loop.

    All constrained helices are replaced with the motif:
      NNNNNNN
      ((xxx))
    for example a helix with the closing-stack CG-UG:
      CG ~ UG -> CGNNNUG
      (( ~ )) -> ((xxx))
    This reduces the sequence length (n) and therefore the runtime O(n^3),
    and it enables the identification of independent structures with the same
    exterior loop features.

    Args:
      md (RNA.md()):      ViennaRNA model details (temperature, noLP, etc.)
      seq (str):          RNA sequence
      con (str):          RNA structure constraint
      ext_moves (dict()): Dictionary storing all mappings from exterior-loop
                          constraints (features) to parents.

    Returns:
      (str, str):
    """

    spacer = 'NNN'
    pt = ril.make_pair_table(con, base=0)
    ext_seq = ''
    ext_con = ''

    # shrink the sequences
    skip = 0
    for i, j in enumerate(pt):
        if i < skip:
            continue
        if j == -1:
            ext_seq += seq[i]
            ext_con += '.'
        else:
            ext_seq += seq[i] + seq[i + 1]
            ext_seq += spacer
            ext_seq += seq[j - 1] + seq[j]
            ext_con += '(('
            ext_con += 'x' * len(spacer)
            ext_con += '))'
            skip = j + 1

    # If we have seen this exterior loop before, then we don't need to
    # calculate again, and we have to trace back if the parents are connected.
    if ext_seq in ext_moves:
        css = ext_moves[ext_seq][1]
    else:
        fc_tmp = RNA.fold_compound(ext_seq, md)
        fc_tmp.constraints_add(
            ext_con, RNA.CONSTRAINT_DB_DEFAULT | RNA.CONSTRAINT_DB_ENFORCE_BP)
        css, cfe = fc_tmp.mfe()
        ext_moves[ext_seq] = [set(), css]
        del fc_tmp

    # replace characters in constraint
    c, skip = 0, 0
    for i, j in enumerate(pt):
        if i < skip:
            continue
        if j == -1:
            con = con[:i] + css[c] + con[i + 1:]
            c += 1
        else:
            c += len(spacer) + 4
            skip = j + 1
    ss = con

    return ss, ext_seq
Ejemplo n.º 7
0
def fold_exterior_loop(md, seq, con, ext_moves):
    """ Constrained folding of the exterior loop.

    All constrained helices are replaced with the motif:
      NNNNNNN
      ((xxx))
    for example a helix with the closing-stack CG-UG:
      CG ~ UG -> CGNNNUG
      (( ~ )) -> ((xxx))
    This reduces the sequence length (n) and therefore the runtime O(n^3),
    and it enables the identification of independent structures with the same
    exterior loop features.

    Args:
      md (RNA.md()):      ViennaRNA model details (temperature, noLP, etc.)
      seq (str):          RNA sequence
      con (str):          RNA structure constraint
      ext_moves (dict()): Dictionary storing all mappings from exterior-loop
                          constraints (features) to parents.

    Returns:
      (str, str):
    """

    spacer = 'NNN'
    pt = ril.make_pair_table(con, base=0)
    ext_seq = ''
    ext_con = ''

    # shrink the sequences
    skip = 0
    for i, j in enumerate(pt):
        if i < skip:
            continue
        if j == -1:
            ext_seq += seq[i]
            ext_con += '.'
        else:
            ext_seq += seq[i] + seq[i + 1]
            ext_seq += spacer
            ext_seq += seq[j - 1] + seq[j]
            ext_con += '(('
            ext_con += 'x' * len(spacer)
            ext_con += '))'
            skip = j + 1

    # If we have seen this exterior loop before, then we don't need to
    # calculate again, and we have to trace back if the parents are connected.
    if ext_seq in ext_moves:
        css = ext_moves[ext_seq][1]
    else:
        fc_tmp = RNA.fold_compound(ext_seq, md)
        fc_tmp.constraints_add(
            ext_con, RNA.CONSTRAINT_DB_DEFAULT | RNA.CONSTRAINT_DB_ENFORCE_BP)
        css, cfe = fc_tmp.mfe()
        ext_moves[ext_seq] = [set(), css]
        del fc_tmp

    # replace characters in constraint
    c, skip = 0, 0
    for i, j in enumerate(pt):
        if i < skip:
            continue
        if j == -1:
            con = con[:i] + css[c] + con[i + 1:]
            c += 1
        else:
            c += len(spacer) + 4
            skip = j + 1
    ss = con

    return ss, ext_seq