Beispiel #1
0
def ValidateSEQRESAlignment(aln, chain=None):
    """
  Checks if sequence in alignment has same connectivity as residues in chain.
  This looks for connected stretches in both the sequence and the chain and
  returns False if they don't match. This uses the connectivity of the protein
  backbone.

  :param aln: Alignment of two sequences with second one expected to map to
              residues in *chain*.
  :type aln: :class:`~ost.seq.AlignmentHandle`
  :param chain: Source of the sequence.
  :type chain: :class:`~ost.mol.ChainHandle`

  :returns: True if all residues (beside gapped ones) are connected, False
            otherwise.
  """
    from ost import LogWarning
    from ost import seq
    from ost import mol
    if aln.GetCount() != 2:
        raise ValueError('Alignment contains more than 2 sequences!')
    sequence = aln.GetSequence(1)
    if len(sequence) == 0:
        return True
    if chain == None:
        if sequence.HasAttachedView() == False:
            raise ValueError("Alignment is missing an attached chain view.")
        chain = sequence.GetAttachedView()
    residues = chain.residues
    # eat up all beginning gaps
    j = 1
    for s in sequence:
        if s != '-':
            break
        j += 1
    l = sequence[j - 1]
    i = 0
    # run over sequence & alignment
    for s in sequence[j:]:
        if s != '-':
            i += 1
            r1 = residues[i - 1]
            r2 = residues[i]
            if r1.one_letter_code == '?' or r2.one_letter_code == '?':
                continue
            if l != '-':
                if not mol.InSequence(r1.handle, r2.handle):
                    LogWarning('%s and %s are not connected by peptide bond' %
                               (str(r1), str(r2)))
                    return False
            else:
                if mol.InSequence(r1.handle, r2.handle):
                    LogWarning('%s and %s are connected by peptide bond' %
                               (str(r1), str(r2)))
                    return False
        l = s
    return True
Beispiel #2
0
def _RunACCALL(command, temp_dir, query):
    """
  Fast method to run the Naccess surface calculation.

  This method starts the accall binary directly and pipes in the input provided
  in *query*. This is faster than calling the "naccess" script since the script
  has a constant overhead of roughly 1.3s in each call.

  :param command:  Command to execute
  :param temp_dir: Command is executed with this working directory
  :param query:    User input to pipe into *command*
  :returns:        stdout of command
  :exception:      CalledProcessError for non-zero return value
  """

    proc = subprocess.Popen(command,
                            cwd=temp_dir,
                            stdout=subprocess.PIPE,
                            stderr=subprocess.PIPE,
                            stdin=subprocess.PIPE)
    stdout_value, stderr_value = proc.communicate(query.encode())

    # check for successful completion of naccess
    if proc.returncode != 0:
        LogWarning("WARNING: naccess error\n%s\n%s" %
                   (stdout_value.decode(), stderr_value.decode()))
        raise subprocess.CalledProcessError(proc.returncode, command)

    return stdout_value.decode()
Beispiel #3
0
def Transform(tf=None):
    from ost import LogWarning
    if Transform.mol_transform_warning_flag:
        LogWarning(
            "mol.Transform is deprecated, please use geom.Transform instead")
        Transform.mol_transform_warning_flag = False
    if tf:
        return _geom.Transform(tf)
    else:
        return _geom.Transform()
Beispiel #4
0
def _CheckNaccessRoot(naccess_root):
    """
  :return: True, if given directory contains "accall" binary and files
           "vdw.radii" and "standard.data".
  :param naccess_root: Path to naccess folder to check.
  """
    accall_exe = os.path.join(naccess_root, "accall")
    check = (os.path.exists(accall_exe) and os.access(accall_exe, os.X_OK) \
             and os.path.exists(os.path.join(naccess_root, "vdw.radii")) \
             and os.path.exists(os.path.join(naccess_root, "standard.data")))
    if not check:
        LogWarning("NACCESS: Could not find required files to launch accall " \
                   "directly in %s." % naccess_root)
    return check
Beispiel #5
0
def _ParseAsaFile(entity, file, asa_atom):
    """
  Reads Area file (.asa) and attach asa per atom to an entitiy

  :param entity:   EntityHandle or EntityView for attaching sasa on atom level
  :param file:     Filename of area file
  :param asa_atom: Name of the float property for SASA
  """

    asa_fh = open(file)
    asa_lines = asa_fh.readlines()
    asa_fh.close()

    for l in asa_lines:
        if l.startswith("ATOM"):
            # get res_number, chain_id and atom name
            atom_name = l[12:16]
            chain_id = l[21]
            res_number = l[22:27]
            asa = l[54:63]
            atom_name = atom_name.strip()
            chain_id = chain_id
            res_number = res_number.strip()
            asa = asa.strip()
            m = re.match(r'(?P<num>-?\d+)(?P<ins>\w)?', res_number)
            di = m.groupdict()

            if di["ins"] == None:
                resNum = mol.ResNum(int(di["num"]))
            else:
                resNum = mol.ResNum(int(di["num"]), di["ins"])

            a = entity.FindAtom(chain_id, resNum, atom_name)
            if (a.IsValid()):
                a.SetFloatProp(asa_atom, float(asa))
            else:
                LogWarning("NACCESS: invalid asa entry %s %s %s" \
                           % (chain_id, resNum, atom_name))
Beispiel #6
0
def _RunNACCESS(command, temp_dir):
    """
  Method to run the Naccess surface calculation.

  This method starts the external Naccess executable and returns the stdout.

  :param command:  Command to execute
  :param temp_dir: Command is executed with this working directory
  :returns:        stdout of command
  :exception:      CalledProcessError for non-zero return value
  """
    proc = subprocess.Popen(command,
                            cwd=temp_dir,
                            shell=True,
                            stdout=subprocess.PIPE)
    stdout_value, stderr_value = proc.communicate()

    # check for successful completion of naccess
    if proc.returncode != 0:
        LogWarning("WARNING: naccess error\n%s" % stdout_value.decode())
        raise subprocess.CalledProcessError(proc.returncode, command)

    return stdout_value.decode()
Beispiel #7
0
def AlignToSEQRES(chain, seqres, try_resnum_first=False, validate=True):
    """
  Aligns the residues of chain to the SEQRES sequence, inserting gaps where 
  needed. The function uses the connectivity of the protein backbone to find 
  consecutive peptide fragments. These fragments are then aligned to the SEQRES 
  sequence.
  
  All the non-ligand, peptide-linking residues of the chain must be listed in 
  SEQRES. If there are any additional residues in the chain, the function
  raises a ValueError.

  :param chain: Source of the sequence
  :type chain: :class:`~ost.mol.ChainHandle`
  :param seqres: SEQRES sequence
  :type seqres: :class:`str`
  :param try_resnum_first: If set to True, this first builds an alignment using
                           residue numbers and checks if the one-letter-codes
                           match. If they all match, this alignment is used
                           (and possibly validated). Otherwise, it displays a
                           warning and falls back to the connectivity-based
                           alignment.
  :type try_resnum_first: :class:`bool`
  :param validate: If set to True, the alignment is additionally checked by
                   :func:`~ost.seq.alg.ValidateSEQRESAlignment` and raises
                   a ValueError if the validation failed.
  :type validate: :class:`bool`

  :returns: The alignment of the residues in the chain and the SEQRES entries.
  :rtype: :class:`~ost.seq.AlignmentHandle`
  """
    def IsEqual(olc1, olc2):
        return olc1 in ('X', '?') or olc2 in ('X', '?') or olc1 == olc2

    from ost import seq
    from ost import mol
    from ost import LogWarning
    view = chain
    residues = view.residues
    if len(residues) == 0:
        return seq.CreateAlignment()
    if try_resnum_first:
        aln_seq = seq.CreateSequence('atoms', '-' * len(seqres))
        for r1 in residues:
            if r1.number.num <= len(seqres) and r1.number.num > 0:
                if IsEqual(seqres[r1.number.num - 1], r1.one_letter_code):
                    aln_seq[r1.number.num - 1] = r1.one_letter_code
                else:
                    LogWarning('Sequence mismatch: chain has "' +
                               r1.one_letter_code + '", while SEQRES is "' +
                               seqres[r1.number.num - 1] +
                               '" at the corresponding position.')
                    try_resnum_first = False
                    break
    if not try_resnum_first:
        fragments = [residues[0].one_letter_code]
        for r1, r2 in zip(residues[:-1], residues[1:]):
            if not mol.InSequence(r1.handle, r2.handle):
                fragments.append('')
            fragments[-1] += r2.one_letter_code
        ss = str(seqres)
        pos = 0
        aln_seq = ''
        for frag in fragments:
            new_pos = ss.find(frag, pos)
            if new_pos == -1:
                raise ValueError('"%s" is not a substring of "%s"' %
                                 (frag, ss))
            aln_seq += '-' * (new_pos - pos) + frag
            pos = new_pos + len(frag)
        aln_seq = seq.CreateSequence(
            'atoms', aln_seq + ('-' * (len(seqres) - len(aln_seq))))
    alignment = seq.CreateAlignment(seq.CreateSequence('SEQRES', str(seqres)),
                                    aln_seq)
    if validate and not ValidateSEQRESAlignment(alignment, view):
        raise ValueError(
            "SEQRES cannot be aligned with its corresponding chain.")
    return alignment
Beispiel #8
0
def CalculateSurfaceArea(entity,
                         radius=1.4,
                         include_hydrogens=False,
                         include_hetatm=False,
                         include_water=False,
                         selection="",
                         naccess_exe=None,
                         naccess_root=None,
                         keep_files=False,
                         asa_abs="asaAbs",
                         asa_rel="asaRel",
                         asa_atom="asaAtom",
                         scratch_dir=None,
                         max_number_of_atoms=50000):
    """
  Calculates analytical the solvent accessible surface area by using the
  external naccess program

  This method calculates the molecular surface areas by invoking the external
  program naccess. First, it is checked if the naccess executable is present, then,
  the necessary files are prepared in a temporary directory and naccess is
  executed. The last step is to remove the temporary directory.


  :param entity:              OST entity to calculate surface
  :param radius:              Surface probe radius
  :param include_hydrogens:   Calculate surface including hydrogens
  :param include_hetatm:      Calculate surface including hetatms
  :param include_water:       Calculate surface including water
  :param selection:           Calculate surface for subset of entity
  :param naccess_exe:         naccess executable (full path to executable)
  :param naccess_root:        Path to folder containing "accall" binary and
                              files "vdw.radii" and "standard.data". This is the
                              fastest way to call naccess!
  :param keep_files:          If True, do not delete temporary files
  :param asa_abs:             Attaches per residue absolute SASA to specified
                              FloatProp on residue level
  :param asa_rel:             Attaches per residue relative SASA to specified
                              FloatProp on residue level
  :param asa_atom:            Attaches per atom SASA to specified FloatProp at
                              atom level
  :param scratch_dir:         Scratch directory. A subfolder for temporary files
                              is created in there. If not specified, a default
                              directory is used (see :func:`tempfile.mkdtemp`).
  :param max_number_of_atoms: Max Number of atoms in the entity (i.e. is limited
                              in the default NACCESS version to 50 000)

  :returns:                   absolute SASA calculated using asa_atom
  """

    # check if naccess executable is specified
    if naccess_root and _CheckNaccessRoot(naccess_root):
        # use faster, direct call to accall binary
        fast_mode = True
    else:
        # get naccess executable
        naccess_executable = _GetExecutable(naccess_exe)
        # see if we can extract naccess_root from there (fallback to old mode)
        naccess_root = os.path.dirname(naccess_executable)
        fast_mode = _CheckNaccessRoot(naccess_root)

    # setup files for naccess
    (naccess_data_dir, naccess_data_file, naccess_data_base) \
      = _SetupFiles(entity, selection, scratch_dir, max_number_of_atoms)

    try:
        # call naccess
        if fast_mode:
            # cook up stdin query (same logic as naccess script)
            query = "PDBFILE %s\n" \
                    "VDWFILE %s\n" \
                    "STDFILE %s\n" \
                    "PROBE %f\n" \
                    "ZSLICE 0.05\n" \
                    % (naccess_data_file, os.path.join(naccess_root, "vdw.radii"),
                       os.path.join(naccess_root, "standard.data"), radius)
            if include_hydrogens:
                query += "HYDROGENS\n"
            if include_water:
                query += "WATERS\n"
            if include_hetatm:
                query += "HETATOMS\n"
            # call it
            command = os.path.join(naccess_root, "accall")
            _RunACCALL(command, naccess_data_dir, query)
        else:
            LogWarning("NACCESS: Falling back to slower call to %s." \
                       % naccess_executable)
            # set command line
            command = "%s %s -p %f " % \
                      (naccess_executable, naccess_data_file, radius)
            if include_hydrogens:
                command = "%s -y" % command
            if include_water:
                command = "%s -w" % command
            if include_hetatm:
                command = "%s -h" % command
            # execute naccess
            _RunNACCESS(command, naccess_data_dir)

        # parse outout
        new_asa = os.path.join(naccess_data_dir, "%s.asa" % naccess_data_base)
        _ParseAsaFile(entity, new_asa, asa_atom)

        new_rsa = os.path.join(naccess_data_dir, "%s.rsa" % naccess_data_base)
        _ParseRsaFile(entity, new_rsa, asa_abs, asa_rel)

    finally:
        # clean up
        if not keep_files:
            __CleanupFiles(naccess_data_dir)

    # sum up Asa for all atoms
    sasa = 0.0
    for a in entity.atoms:
        sasa += a.GetFloatProp(asa_atom, 0.0)

    return sasa