def ValidateSEQRESAlignment(aln, chain=None): """ Checks if sequence in alignment has same connectivity as residues in chain. This looks for connected stretches in both the sequence and the chain and returns False if they don't match. This uses the connectivity of the protein backbone. :param aln: Alignment of two sequences with second one expected to map to residues in *chain*. :type aln: :class:`~ost.seq.AlignmentHandle` :param chain: Source of the sequence. :type chain: :class:`~ost.mol.ChainHandle` :returns: True if all residues (beside gapped ones) are connected, False otherwise. """ from ost import LogWarning from ost import seq from ost import mol if aln.GetCount() != 2: raise ValueError('Alignment contains more than 2 sequences!') sequence = aln.GetSequence(1) if len(sequence) == 0: return True if chain == None: if sequence.HasAttachedView() == False: raise ValueError("Alignment is missing an attached chain view.") chain = sequence.GetAttachedView() residues = chain.residues # eat up all beginning gaps j = 1 for s in sequence: if s != '-': break j += 1 l = sequence[j - 1] i = 0 # run over sequence & alignment for s in sequence[j:]: if s != '-': i += 1 r1 = residues[i - 1] r2 = residues[i] if r1.one_letter_code == '?' or r2.one_letter_code == '?': continue if l != '-': if not mol.InSequence(r1.handle, r2.handle): LogWarning('%s and %s are not connected by peptide bond' % (str(r1), str(r2))) return False else: if mol.InSequence(r1.handle, r2.handle): LogWarning('%s and %s are connected by peptide bond' % (str(r1), str(r2))) return False l = s return True
def _RunACCALL(command, temp_dir, query): """ Fast method to run the Naccess surface calculation. This method starts the accall binary directly and pipes in the input provided in *query*. This is faster than calling the "naccess" script since the script has a constant overhead of roughly 1.3s in each call. :param command: Command to execute :param temp_dir: Command is executed with this working directory :param query: User input to pipe into *command* :returns: stdout of command :exception: CalledProcessError for non-zero return value """ proc = subprocess.Popen(command, cwd=temp_dir, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE) stdout_value, stderr_value = proc.communicate(query.encode()) # check for successful completion of naccess if proc.returncode != 0: LogWarning("WARNING: naccess error\n%s\n%s" % (stdout_value.decode(), stderr_value.decode())) raise subprocess.CalledProcessError(proc.returncode, command) return stdout_value.decode()
def Transform(tf=None): from ost import LogWarning if Transform.mol_transform_warning_flag: LogWarning( "mol.Transform is deprecated, please use geom.Transform instead") Transform.mol_transform_warning_flag = False if tf: return _geom.Transform(tf) else: return _geom.Transform()
def _CheckNaccessRoot(naccess_root): """ :return: True, if given directory contains "accall" binary and files "vdw.radii" and "standard.data". :param naccess_root: Path to naccess folder to check. """ accall_exe = os.path.join(naccess_root, "accall") check = (os.path.exists(accall_exe) and os.access(accall_exe, os.X_OK) \ and os.path.exists(os.path.join(naccess_root, "vdw.radii")) \ and os.path.exists(os.path.join(naccess_root, "standard.data"))) if not check: LogWarning("NACCESS: Could not find required files to launch accall " \ "directly in %s." % naccess_root) return check
def _ParseAsaFile(entity, file, asa_atom): """ Reads Area file (.asa) and attach asa per atom to an entitiy :param entity: EntityHandle or EntityView for attaching sasa on atom level :param file: Filename of area file :param asa_atom: Name of the float property for SASA """ asa_fh = open(file) asa_lines = asa_fh.readlines() asa_fh.close() for l in asa_lines: if l.startswith("ATOM"): # get res_number, chain_id and atom name atom_name = l[12:16] chain_id = l[21] res_number = l[22:27] asa = l[54:63] atom_name = atom_name.strip() chain_id = chain_id res_number = res_number.strip() asa = asa.strip() m = re.match(r'(?P<num>-?\d+)(?P<ins>\w)?', res_number) di = m.groupdict() if di["ins"] == None: resNum = mol.ResNum(int(di["num"])) else: resNum = mol.ResNum(int(di["num"]), di["ins"]) a = entity.FindAtom(chain_id, resNum, atom_name) if (a.IsValid()): a.SetFloatProp(asa_atom, float(asa)) else: LogWarning("NACCESS: invalid asa entry %s %s %s" \ % (chain_id, resNum, atom_name))
def _RunNACCESS(command, temp_dir): """ Method to run the Naccess surface calculation. This method starts the external Naccess executable and returns the stdout. :param command: Command to execute :param temp_dir: Command is executed with this working directory :returns: stdout of command :exception: CalledProcessError for non-zero return value """ proc = subprocess.Popen(command, cwd=temp_dir, shell=True, stdout=subprocess.PIPE) stdout_value, stderr_value = proc.communicate() # check for successful completion of naccess if proc.returncode != 0: LogWarning("WARNING: naccess error\n%s" % stdout_value.decode()) raise subprocess.CalledProcessError(proc.returncode, command) return stdout_value.decode()
def AlignToSEQRES(chain, seqres, try_resnum_first=False, validate=True): """ Aligns the residues of chain to the SEQRES sequence, inserting gaps where needed. The function uses the connectivity of the protein backbone to find consecutive peptide fragments. These fragments are then aligned to the SEQRES sequence. All the non-ligand, peptide-linking residues of the chain must be listed in SEQRES. If there are any additional residues in the chain, the function raises a ValueError. :param chain: Source of the sequence :type chain: :class:`~ost.mol.ChainHandle` :param seqres: SEQRES sequence :type seqres: :class:`str` :param try_resnum_first: If set to True, this first builds an alignment using residue numbers and checks if the one-letter-codes match. If they all match, this alignment is used (and possibly validated). Otherwise, it displays a warning and falls back to the connectivity-based alignment. :type try_resnum_first: :class:`bool` :param validate: If set to True, the alignment is additionally checked by :func:`~ost.seq.alg.ValidateSEQRESAlignment` and raises a ValueError if the validation failed. :type validate: :class:`bool` :returns: The alignment of the residues in the chain and the SEQRES entries. :rtype: :class:`~ost.seq.AlignmentHandle` """ def IsEqual(olc1, olc2): return olc1 in ('X', '?') or olc2 in ('X', '?') or olc1 == olc2 from ost import seq from ost import mol from ost import LogWarning view = chain residues = view.residues if len(residues) == 0: return seq.CreateAlignment() if try_resnum_first: aln_seq = seq.CreateSequence('atoms', '-' * len(seqres)) for r1 in residues: if r1.number.num <= len(seqres) and r1.number.num > 0: if IsEqual(seqres[r1.number.num - 1], r1.one_letter_code): aln_seq[r1.number.num - 1] = r1.one_letter_code else: LogWarning('Sequence mismatch: chain has "' + r1.one_letter_code + '", while SEQRES is "' + seqres[r1.number.num - 1] + '" at the corresponding position.') try_resnum_first = False break if not try_resnum_first: fragments = [residues[0].one_letter_code] for r1, r2 in zip(residues[:-1], residues[1:]): if not mol.InSequence(r1.handle, r2.handle): fragments.append('') fragments[-1] += r2.one_letter_code ss = str(seqres) pos = 0 aln_seq = '' for frag in fragments: new_pos = ss.find(frag, pos) if new_pos == -1: raise ValueError('"%s" is not a substring of "%s"' % (frag, ss)) aln_seq += '-' * (new_pos - pos) + frag pos = new_pos + len(frag) aln_seq = seq.CreateSequence( 'atoms', aln_seq + ('-' * (len(seqres) - len(aln_seq)))) alignment = seq.CreateAlignment(seq.CreateSequence('SEQRES', str(seqres)), aln_seq) if validate and not ValidateSEQRESAlignment(alignment, view): raise ValueError( "SEQRES cannot be aligned with its corresponding chain.") return alignment
def CalculateSurfaceArea(entity, radius=1.4, include_hydrogens=False, include_hetatm=False, include_water=False, selection="", naccess_exe=None, naccess_root=None, keep_files=False, asa_abs="asaAbs", asa_rel="asaRel", asa_atom="asaAtom", scratch_dir=None, max_number_of_atoms=50000): """ Calculates analytical the solvent accessible surface area by using the external naccess program This method calculates the molecular surface areas by invoking the external program naccess. First, it is checked if the naccess executable is present, then, the necessary files are prepared in a temporary directory and naccess is executed. The last step is to remove the temporary directory. :param entity: OST entity to calculate surface :param radius: Surface probe radius :param include_hydrogens: Calculate surface including hydrogens :param include_hetatm: Calculate surface including hetatms :param include_water: Calculate surface including water :param selection: Calculate surface for subset of entity :param naccess_exe: naccess executable (full path to executable) :param naccess_root: Path to folder containing "accall" binary and files "vdw.radii" and "standard.data". This is the fastest way to call naccess! :param keep_files: If True, do not delete temporary files :param asa_abs: Attaches per residue absolute SASA to specified FloatProp on residue level :param asa_rel: Attaches per residue relative SASA to specified FloatProp on residue level :param asa_atom: Attaches per atom SASA to specified FloatProp at atom level :param scratch_dir: Scratch directory. A subfolder for temporary files is created in there. If not specified, a default directory is used (see :func:`tempfile.mkdtemp`). :param max_number_of_atoms: Max Number of atoms in the entity (i.e. is limited in the default NACCESS version to 50 000) :returns: absolute SASA calculated using asa_atom """ # check if naccess executable is specified if naccess_root and _CheckNaccessRoot(naccess_root): # use faster, direct call to accall binary fast_mode = True else: # get naccess executable naccess_executable = _GetExecutable(naccess_exe) # see if we can extract naccess_root from there (fallback to old mode) naccess_root = os.path.dirname(naccess_executable) fast_mode = _CheckNaccessRoot(naccess_root) # setup files for naccess (naccess_data_dir, naccess_data_file, naccess_data_base) \ = _SetupFiles(entity, selection, scratch_dir, max_number_of_atoms) try: # call naccess if fast_mode: # cook up stdin query (same logic as naccess script) query = "PDBFILE %s\n" \ "VDWFILE %s\n" \ "STDFILE %s\n" \ "PROBE %f\n" \ "ZSLICE 0.05\n" \ % (naccess_data_file, os.path.join(naccess_root, "vdw.radii"), os.path.join(naccess_root, "standard.data"), radius) if include_hydrogens: query += "HYDROGENS\n" if include_water: query += "WATERS\n" if include_hetatm: query += "HETATOMS\n" # call it command = os.path.join(naccess_root, "accall") _RunACCALL(command, naccess_data_dir, query) else: LogWarning("NACCESS: Falling back to slower call to %s." \ % naccess_executable) # set command line command = "%s %s -p %f " % \ (naccess_executable, naccess_data_file, radius) if include_hydrogens: command = "%s -y" % command if include_water: command = "%s -w" % command if include_hetatm: command = "%s -h" % command # execute naccess _RunNACCESS(command, naccess_data_dir) # parse outout new_asa = os.path.join(naccess_data_dir, "%s.asa" % naccess_data_base) _ParseAsaFile(entity, new_asa, asa_atom) new_rsa = os.path.join(naccess_data_dir, "%s.rsa" % naccess_data_base) _ParseRsaFile(entity, new_rsa, asa_abs, asa_rel) finally: # clean up if not keep_files: __CleanupFiles(naccess_data_dir) # sum up Asa for all atoms sasa = 0.0 for a in entity.atoms: sasa += a.GetFloatProp(asa_atom, 0.0) return sasa