def formalCharge(molecule): """Compute the formal charge on a molecule. This function requires that the molecule has explicit hydrogen atoms. Parameters ---------- molecule : :class:`Molecule <BioSimSpace._SireWrappers.Molecule>` A molecule object. Returns ------- formal_charge : :class:`Charge <BioSimSpace.Types.Charge>` The total formal charge on the molecule. """ if type(molecule) is not _Molecule: raise TypeError( "'molecule' must be of type 'BioSimSpace._SireWrappers.Molecule'") from rdkit import Chem as _Chem # Create a temporary working directory. tmp_dir = _tempfile.TemporaryDirectory() work_dir = tmp_dir.name # Zero the total formal charge. formal_charge = 0 # Stdout/stderr redirection doesn't work from within Jupyter. if _is_notebook: # Run in the working directory. with _Utils.cd(work_dir): # Save the molecule to a PDB file. _IO.saveMolecules("tmp", molecule, "PDB") # Read the ligand PDB into an RDKit molecule. mol = _Chem.MolFromPDBFile("tmp.pdb") # Compute the formal charge. formal_charge = _Chem.rdmolops.GetFormalCharge(mol) else: # Run in the working directory and redirect stderr from RDKit. with _Utils.cd(work_dir), _Utils.stderr_redirected(): # Save the molecule to a PDB file. _IO.saveMolecules("tmp", molecule, "PDB") # Read the ligand PDB into an RDKit molecule. mol = _Chem.MolFromPDBFile("tmp.pdb") # Compute the formal charge. formal_charge = _Chem.rdmolops.GetFormalCharge(mol) return formal_charge * _electron_charge
def _clear_output(self): """Reset stdout and stderr.""" # Call the base class method. super()._clear_output() # Delete any restart and trajectory files in the working directory. file = "%s/sim_restart.s3" % self._work_dir if _os.path.isfile(file): _os.remove(file) file = "%s/SYSTEM.s3" % self._work_dir if _os.path.isfile(file): _os.remove(file) files = _IO.glob("%s/traj*.dcd" % self._work_dir) for file in files: if _os.path.isfile(file): _os.remove(file) # Additional files for free energy simulations. if type(self._protocol) is _Protocol.FreeEnergy: file = "%s/gradients.dat" % self._work_dir if _os.path.isfile(file): _os.remove(file) file = "%s/simfile.dat" % self._work_dir if _os.path.isfile(file): _os.remove(file)
def _find_force_field(forcefield): """Internal function to search LEaP compatible force field files. Parameters ---------- forcefield : str The name of the force field. Returns ------- file : str The full path of the matching force field file. """ # Whether the force field is old. is_old = False # Search for a compatible force field file. ff = _IO.glob("%s/*.%s" % (_cmd_dir, forcefield)) # Search the old force fields. First try a specific match. if len(ff) == 0: ff = _IO.glob("%s/oldff/leaprc.%s" % (_cmd_dir, forcefield)) is_old = True # No matches, try globbing all files with matching extension. if len(ff) == 0: ff = _IO.glob("%s/oldff/*.%s" % (_cmd_dir, forcefield)) # No force field found! if len(ff) == 0: raise ValueError("No force field file found for '%s'" % forcefield) # Multiple force fields found. elif len(ff) > 1: raise ValueError("Multiple force fields found for '%s': %s" % (forcefield, ff)) # Create the force field name. ff = _os.path.basename(ff[0]) if is_old: ff = "oldff/" + ff # Return the force field. return ff
def formalCharge(molecule): """Compute the formal charge on a molecule. This function requires that the molecule has explicit hydrogen atoms. Parameters ---------- molecule : :class:`Molecule <BioSimSpace._SireWrappers.Molecule>` A molecule object. Returns ------- formal_charge : :class:`Charge <BioSimSpace.Types.Charge>` The total formal charge on the molecule. """ if type(molecule) is not _Molecule: raise TypeError("'molecule' must be of type 'BioSimSpace._SireWrappers.Molecule'") from rdkit import Chem as _Chem from rdkit import RDLogger as _RDLogger # Disable RDKit warnings. _RDLogger.DisableLog('rdApp.*') # Create a temporary working directory. tmp_dir = _tempfile.TemporaryDirectory() work_dir = tmp_dir.name # Zero the total formal charge. formal_charge = 0 # Run in the working directory. with _Utils.cd(work_dir): # Save the molecule to a PDB file. _IO.saveMolecules("tmp", molecule, "PDB") # Read the ligand PDB into an RDKit molecule. mol = _Chem.MolFromPDBFile("tmp.pdb") # Compute the formal charge. formal_charge = _Chem.rdmolops.GetFormalCharge(mol) return formal_charge * _electron_charge
def getSystem(self, block="AUTO"): """Get the latest molecular system. Parameters ---------- block : bool Whether to block until the process has finished running. Returns ------- system : :class:`System <BioSimSpace._SireWrappers.System>` The latest molecular system. """ # Wait for the process to finish. if block is True: self.wait() elif block == "AUTO" and self._is_blocked: self.wait() # Try to grab the latest coordinates from the binary restart file. try: new_system = _IO.readMolecules( [self._restart_file, self._top_file]) # Since SOMD requires specific residue and water naming we copy the # coordinates back into the original system. old_system = self._system.copy() old_system._updateCoordinates(new_system) # Update the periodic box information in the original system. if "space" in new_system._sire_object.propertyKeys(): box = new_system._sire_object.property("space") old_system._sire_object.setProperty( self._property_map.get("space", "space"), box) return old_system except: return None
def run(self, molecule, work_dir=None, queue=None): """Run the parameterisation protocol. Parameters ---------- molecule : BioSimSpace._SireWrappers.Molecule The molecule to apply the parameterisation protocol to. work_dir : str The working directory. queue : queue.Queue The thread queue is which this method has been run. Returns ------- molecule : BioSimSpace._SireWrappers.Molecule The parameterised molecule. """ if type(molecule) is not _Molecule: raise TypeError( "'molecule' must be of type 'BioSimSpace._SireWrappers.Molecule'" ) if type(work_dir) is not None and type(work_dir) is not str: raise TypeError("'work_dir' must be of type 'str'") if type(queue) is not None and type(queue) is not _queue.Queue: raise TypeError("'queue' must be of type 'queue.Queue'") # Set work_dir to the current directory. if work_dir is None: work_dir = _os.getcwd() # Create the file prefix. prefix = work_dir + "/" # Create a copy of the molecule. new_mol = molecule.copy() # Use the net molecular charge passed as an option. if self._net_charge is not None: charge = self._net_charge else: # The user will likely have passed a bare PDB or Mol2 file. # Antechamber expects the molecule to be uncharged, or integer # charged (where the charge, or number of electrons, is passed with # the -nc flag). # Get the total charge on the molecule. if "charge" in self._property_map: _property_map = {"charge": self._property_map["charge"]} prop = self._property_map["charge"] else: _property_map = {"charge": "charge"} prop = "charge" # The molecule has a charge property. if new_mol._getSireObject().hasProperty(prop): charge = new_mol.charge(property_map=_property_map).magnitude() # Charge is non-integer, try to fix it. if abs(round(charge) - charge) > 0: new_mol._fixCharge(property_map=_property_map) charge = round(charge) else: charge = None # Only try "formal_charge" when "charge" is missing. Unlikely to have # both if this is a bare molecule, but the user could be re-parameterising # an existing molecule. if charge is None: # Get the total formal charge on the molecule. if "formal_charge" in self._property_map: _property_map = { "charge": self._property_map["formal_charge"] } prop = self._property_map["charge"] else: _property_map = {"charge": "formal_charge"} prop = "formal_charge" if new_mol._getSireObject().hasProperty(prop): charge = new_mol.charge( property_map=_property_map).magnitude() # Compute the formal charge ourselves to check that it is consistent. formal_charge = _formalCharge(molecule).magnitude() if charge != formal_charge: _warnings.warn( "The formal charge on the molecule is %d " "but we estimate it to be %d" % (charge, formal_charge)) else: msg = ( "The molecule has no 'charge' or 'formal_charge' information, and " "no 'net_charge' option has been passed. You can use the " "'BioSimSpace.Parameters.formalCharge' function to compute the " "formal charge") raise _ParameterisationError(msg) # Create a new system and molecule group. s = _SireSystem.System("BioSimSpace System") m = _SireMol.MoleculeGroup("all") # Add the molecule. m.add(new_mol._getSireObject()) s.add(m) # Write the system to a PDB file. try: pdb = _SireIO.PDB2(s) pdb.writeToFile(prefix + "antechamber.pdb") except Exception as e: msg = "Failed to write system to 'PDB' format." if _isVerbose(): raise IOError(msg) from e else: raise IOError(msg) from None # Generate the Antechamber command. command = ("%s -at %d -i antechamber.pdb -fi pdb " + "-o antechamber.mol2 -fo mol2 -c %s -s 2 -nc %d") % ( _protocol._antechamber_exe, self._version, self._charge_method.lower(), charge) with open(prefix + "README.txt", "w") as file: # Write the command to file. file.write("# Antechamber was run with the following command:\n") file.write("%s\n" % command) # Create files for stdout/stderr. stdout = open(prefix + "antechamber.out", "w") stderr = open(prefix + "antechamber.err", "w") # Run Antechamber as a subprocess. proc = _subprocess.run(command, cwd=work_dir, shell=True, stdout=stdout, stderr=stderr) stdout.close() stderr.close() # Antechamber doesn't return sensible error codes, so we need to check that # the expected output was generated. if _os.path.isfile(prefix + "antechamber.mol2"): # Run parmchk to check for missing parameters. command = ("%s -s %d -i antechamber.mol2 -f mol2 " + "-o antechamber.frcmod") % (_protocol._parmchk_exe, self._version) with open(prefix + "README.txt", "a") as file: # Write the command to file. file.write("\n# ParmChk was run with the following command:\n") file.write("%s\n" % command) # Create files for stdout/stderr. stdout = open(prefix + "parmchk.out", "w") stderr = open(prefix + "parmchk.err", "w") # Run parmchk as a subprocess. proc = _subprocess.run(command, cwd=work_dir, shell=True, stdout=stdout, stderr=stderr) stdout.close() stderr.close() # The frcmod file was created. if _os.path.isfile(prefix + "antechamber.frcmod"): # Now call tLEaP using the partially parameterised molecule and the frcmod file. # tLEap will run in the same working directory, using the Mol2 file generated by # Antechamber. # Try to find a force field file. if self._version == 1: ff = _protocol._find_force_field("gaff") else: ff = _protocol._find_force_field("gaff2") # Write the LEaP input file. with open(prefix + "leap.txt", "w") as file: file.write("source %s\n" % ff) file.write("mol = loadMol2 antechamber.mol2\n") file.write("loadAmberParams antechamber.frcmod\n") file.write("saveAmberParm mol leap.top leap.crd\n") file.write("quit") # Generate the tLEaP command. command = "%s -f leap.txt" % _protocol._tleap_exe with open(prefix + "README.txt", "a") as file: # Write the command to file. file.write( "\n# tLEaP was run with the following command:\n") file.write("%s\n" % command) # Create files for stdout/stderr. stdout = open(prefix + "tleap.out", "w") stderr = open(prefix + "tleap.err", "w") # Run tLEaP as a subprocess. proc = _subprocess.run(command, cwd=work_dir, shell=True, stdout=stdout, stderr=stderr) stdout.close() stderr.close() # tLEaP doesn't return sensible error codes, so we need to check that # the expected output was generated. if _os.path.isfile(prefix + "leap.top") and _os.path.isfile(prefix + "leap.crd"): # Load the parameterised molecule. try: par_mol = _Molecule( _IO.readMolecules([ prefix + "leap.top", prefix + "leap.crd" ])._getSireObject()[_SireMol.MolIdx(0)]) except Exception as e: msg = "Failed to read molecule from: 'leap.top', 'leap.crd'" if _isVerbose(): raise IOError(msg) from e else: raise IOError(msg) from None # Make the molecule 'mol' compatible with 'par_mol'. This will create # a mapping between atom indices in the two molecules and add all of # the new properties from 'par_mol' to 'mol'. new_mol._makeCompatibleWith( par_mol, property_map=self._property_map, overwrite=True, verbose=False) # Record the forcefield used to parameterise the molecule. new_mol._forcefield = ff else: raise _ParameterisationError("tLEaP failed!") else: raise _ParameterisationError("Parmchk failed!") else: raise _ParameterisationError("Antechamber failed!") if queue is not None: queue.put(new_mol) return new_mol
def __init__(self, handle): """Constructor. Parameters ---------- handle : :class:`Process <BioSimSpace.Process>`, \ :class:`System <BioSimSpace._SireWrappers.System>` \ :class:`System <BioSimSpace._SireWrappers.Molecule>` \ :class:`System <BioSimSpace._SireWrappers.Molecules>` \ str, [str] A handle to a process, system, molecule, or molecule container, or the path to molecular input file(s). """ # Make sure we're running inside a Jupyter notebook. if not _is_notebook: _warnings.warn( "You can only use BioSimSpace.Notebook.View from within a Jupyter notebook." ) return None # Check the handle. # Convert tuple to list. if isinstance(handle, tuple): handle = list(handle) # Convert single string to list. if isinstance(handle, str): handle = [handle] # List of strings (file paths). if isinstance(handle, list) and all( isinstance(x, str) for x in handle): system = _IO.readMolecules(handle) self._handle = system._getSireObject() self._is_process = False # BioSimSpace process. elif isinstance(handle, _Process): self._handle = handle self._is_process = True # BioSimSpace system. elif type(handle) is _System: self._handle = handle._getSireObject() self._is_process = False else: try: handle = handle.toSystem() self._handle = handle._getSireObject() self._is_process = False except: raise TypeError( "The handle must be of type 'BioSimSpace.Process', " "'BioSimSpace._SireWrappers.System', " "'BioSimSpace._SireWrappers.Molecule', " "'BioSimSpace._SireWrappers.Molecules', " "'str', or a list of 'str' types.") # Create a temporary workspace for the view object. self._tmp_dir = _tempfile.TemporaryDirectory() self._work_dir = self._tmp_dir.name # Zero the number of views. self._num_views = 0
def matchAtoms(molecule0, molecule1, scoring_function="rmsd_align", matches=1, return_scores=False, prematch={}, timeout=5 * _Units.Time.second, property_map0={}, property_map1={}): """Find mappings between atom indices in molecule0 to those in molecule1. Molecules are aligned using a Maximum Common Substructure (MCS) search. When requesting more than one match, the mappings will be sorted using a scoring function and returned in order of best to worst score. (Note that, depending on the scoring function the "best" score may have the lowest value.) Parameters ---------- molecule0 : :class:`Molecule <BioSimSpace._SireWrappers.Molecule>` The molecule of interest. molecule1 : :class:`Molecule <BioSimSpace._SireWrappers.Molecule>` The reference molecule. scoring_function : str The scoring function used to match atoms. Available options are: - "rmsd" Calculate the root mean squared distance between the coordinates of atoms in molecule0 to those that they map to in molecule1. - "rmsd_align" Align molecule0 to molecule1 based on the mapping before computing the above RMSD score. - "rmsd_flex_align" Flexibly align molecule0 to molecule1 based on the mapping before computing the above RMSD score. (Requires the 'fkcombu'. package: http://strcomp.protein.osaka-u.ac.jp/kcombu) matches : int The maximum number of matches to return. (Sorted in order of score). return_scores : bool Whether to return a list containing the scores for each mapping. prematch : dict A dictionary of atom mappings that must be included in the match. timeout : BioSimSpace.Types.Time The timeout for the maximum common substructure search. property_map0 : dict A dictionary that maps "properties" in molecule0 to their user defined values. This allows the user to refer to properties with their own naming scheme, e.g. { "charge" : "my-charge" } property_map1 : dict A dictionary that maps "properties" in molecule1 to their user defined values. Returns ------- matches : dict, [dict], ([dict], list) The best atom mapping, a list containing a user specified number of the best mappings ranked by their score, or a tuple containing the list of best mappings and a list of the corresponding scores. Examples -------- Find the best maximum common substructure mapping between two molecules. >>> import BioSimSpace as BSS >>> mapping = BSS.Align.matchAtoms(molecule0, molecule1) Find the 5 best mappings. >>> import BioSimSpace as BSS >>> mappings = BSS.Align.matchAtoms(molecule0, molecule1, matches=5) Find the 5 best mappings along with their ranking scores. >>> import BioSimSpace as BSS >>> mappings, scores = BSS.Align.matchAtoms(molecule0, molecule1, matches=5, return_scores=True) Find the 5 best mappings along with their ranking scores. Score by flexibly aligning molecule0 to molecule1 based on each mapping and computing the root mean squared displacement of the matched atoms. >>> import BioSimSpace as BSS >>> mappings, scores = BSS.Align.matchAtoms(molecule0, molecule1, matches=5, return_scores=True, scoring_function="rmsd_flex_align") Find the best mapping that contains a prematch (this is a dictionary mapping atom indices in molecule0 to those in molecule1). >>> import BioSimSpace as BSS >>> mapping = BSS.Align.matchAtoms(molecule0, molecule1, prematch={0 : 10, 3 : 7}) """ # A list of supported scoring functions. scoring_functions = ["RMSD", "RMSDALIGN", "RMSDFLEXALIGN"] # Validate input. if type(molecule0) is not _Molecule: raise TypeError( "'molecule0' must be of type 'BioSimSpace._SireWrappers.Molecule'") if type(molecule1) is not _Molecule: raise TypeError( "'molecule1' must be of type 'BioSimSpace._SireWrappers.Molecule'") if type(scoring_function) is not str: raise TypeError("'scoring_function' must be of type 'str'") else: # Strip underscores and whitespace, then convert to upper case. _scoring_function = scoring_function.replace("_", "").upper() _scoring_function = _scoring_function.replace(" ", "").upper() if not _scoring_function in scoring_functions: raise ValueError( "Unsupported scoring function '%s'. Options are: %s" % (scoring_function, scoring_functions)) if _scoring_function == "RMSDFLEXALIGN" and _fkcombu_exe is None: raise _MissingSoftwareError( "'rmsd_flex_align' option requires the 'fkcombu' program: " "http://strcomp.protein.osaka-u.ac.jp/kcombu") if type(matches) is not int: raise TypeError("'matches' must be of type 'int'") else: if matches < 0: raise ValueError("'matches' must be positive!") if type(return_scores) is not bool: raise TypeError("'return_matches' must be of type 'bool'") if type(prematch) is not dict: raise TypeError("'prematch' must be of type 'dict'") else: _validate_mapping(molecule0, molecule1, prematch, "prematch") if type(timeout) is not _Units.Time._Time: raise TypeError("'timeout' must be of type 'BioSimSpace.Types.Time'") if type(property_map0) is not dict: raise TypeError("'property_map0' must be of type 'dict'") if type(property_map1) is not dict: raise TypeError("'property_map1' must be of type 'dict'") # Extract the Sire molecule from each BioSimSpace molecule. mol0 = molecule0._getSireObject() mol1 = molecule1._getSireObject() # Convert the timeout to seconds and take the magnitude as an integer. timeout = int(timeout.seconds().magnitude()) # Create a temporary working directory. tmp_dir = _tempfile.TemporaryDirectory() work_dir = tmp_dir.name # Use RDKkit to find the maximum common substructure. try: # Run inside a temporary directory. with _Utils.cd(work_dir): # Write both molecules to PDB files. _IO.saveMolecules("tmp0", molecule0, "PDB", property_map=property_map0) _IO.saveMolecules("tmp1", molecule1, "PDB", property_map=property_map1) # Load the molecules with RDKit. # Note that the C++ function overloading seems to be broken, so we # need to pass all arguments by position, rather than keyword. # The arguments are: "filename", "sanitize", "removeHs", "flavor" mols = [ _Chem.MolFromPDBFile("tmp0.pdb", False, False, 0), _Chem.MolFromPDBFile("tmp1.pdb", False, False, 0) ] # Generate the MCS match. mcs = _rdFMCS.FindMCS(mols, atomCompare=_rdFMCS.AtomCompare.CompareAny, bondCompare=_rdFMCS.BondCompare.CompareAny, completeRingsOnly=True, ringMatchesRingOnly=True, matchChiralTag=False, matchValences=False, maximizeBonds=False, timeout=timeout) # Get the common substructure as a SMARTS string. mcs_smarts = _Chem.MolFromSmarts(mcs.smartsString) except: raise RuntimeError("RDKIT MCS mapping failed!") # Score the mappings and return them in sorted order (best to worst). mappings, scores = _score_rdkit_mappings(mol0, mol1, mols[0], mols[1], mcs_smarts, prematch, _scoring_function, property_map0, property_map1) # Sometimes RDKit fails to generate a mapping that includes the prematch. # If so, then try generating a mapping using the MCS routine from Sire. if len(mappings) == 1 and mappings[0] == prematch: # Convert timeout to a Sire Unit. timeout = timeout * _SireUnits.second # Regular match. Include light atoms, but don't allow matches between heavy # and light atoms. m0 = mol0.evaluate().findMCSmatches( mol1, _SireMol.AtomResultMatcher(_to_sire_mapping(prematch)), timeout, True, property_map0, property_map1, 6, False) # Include light atoms, and allow matches between heavy and light atoms. # This captures mappings such as O --> H in methane to methanol. m1 = mol0.evaluate().findMCSmatches( mol1, _SireMol.AtomResultMatcher(_to_sire_mapping(prematch)), timeout, True, property_map0, property_map1, 0, False) # Take the mapping with the larger number of matches. if len(m1) > 0: if len(m0) > 0: if len(m1[0]) > len(m0[0]): mappings = m1 else: mappings = m0 else: mappings = m1 else: mappings = m0 # Score the mappings and return them in sorted order (best to worst). mappings, scores = _score_sire_mappings(mol0, mol1, mappings, prematch, _scoring_function, property_map0, property_map1) if matches == 1: if return_scores: return (mappings[0], scores[0]) else: return mappings[0] else: # Return a list of matches from best to worst. if return_scores: return (mappings[0:matches], scores[0:matches]) # Return a tuple containing the list of matches from best to # worst along with the list of scores. else: return mappings[0:matches]
def flexAlign(molecule0, molecule1, mapping=None, fkcombu_exe=None, property_map0={}, property_map1={}): """Flexibly align atoms in molecule0 to those in molecule1 using the mapping between matched atom indices. Parameters ---------- molecule0 : :class:`Molecule <BioSimSpace._SireWrappers.Molecule>` The molecule to align. molecule1 : :class:`Molecule <BioSimSpace._SireWrappers.Molecule>` The reference molecule. mapping : dict A dictionary mapping atoms in molecule0 to those in molecule1. fkcombu_exe : str Path to the fkcombu executable. If None is passed, then BioSimSpace will attempt to find fkcombu by searching your PATH. property_map0 : dict A dictionary that maps "properties" in molecule0 to their user defined values. This allows the user to refer to properties with their own naming scheme, e.g. { "charge" : "my-charge" } property_map1 : dict A dictionary that maps "properties" in molecule1 to their user defined values. Returns ------- molecule : :class:`Molecule <BioSimSpace._SireWrappers.Molecule>` The aligned molecule. Examples -------- Align molecule0 to molecule1 based on a precomputed mapping. >>> import BioSimSpace as BSS >>> molecule0 = BSS.Align.flexAlign(molecule0, molecule1, mapping) Align molecule0 to molecule1. Since no mapping is passed one will be autogenerated using :class:`matchAtoms <BioSimSpace.Align.matchAtoms>` with default options. >>> import BioSimSpace as BSS >>> molecule0 = BSS.Align.flexAlign(molecule0, molecule1) """ # Check that we found fkcombu in the PATH. if fkcombu_exe is None: if _fkcombu_exe is None: raise _MissingSoftwareError( "'BioSimSpace.Align.flexAlign' requires the 'fkcombu' program: " "http://strcomp.protein.osaka-u.ac.jp/kcombu") else: fkcombu_exe = _fkcombu_exe # Check that the user supplied executable exists. else: if not _os.path.isfile(fkcombu_exe): raise IOError("'fkcombu' executable doesn't exist: '%s'" % fkcombu_exe) if type(molecule0) is not _Molecule: raise TypeError( "'molecule0' must be of type 'BioSimSpace._SireWrappers.Molecule'") if type(molecule1) is not _Molecule: raise TypeError( "'molecule1' must be of type 'BioSimSpace._SireWrappers.Molecule'") if type(property_map0) is not dict: raise TypeError("'property_map0' must be of type 'dict'") if type(property_map1) is not dict: raise TypeError("'property_map1' must be of type 'dict'") # The user has passed an atom mapping. if mapping is not None: if type(mapping) is not dict: raise TypeError("'mapping' must be of type 'dict'.") else: _validate_mapping(molecule0, molecule1, mapping, "mapping") # Get the best match atom mapping. else: mapping = matchAtoms(molecule0, molecule1, property_map0=property_map0, property_map1=property_map1) # Convert the mapping to AtomIdx key:value pairs. sire_mapping = _to_sire_mapping(mapping) # Create a temporary working directory. tmp_dir = _tempfile.TemporaryDirectory() work_dir = tmp_dir.name # Execute in the working directory. with _Utils.cd(work_dir): # Write the two molecules to PDB files. _IO.saveMolecules("molecule0", molecule0, "PDB", property_map=property_map0) _IO.saveMolecules("molecule1", molecule1, "PDB", property_map=property_map1) # Write the mapping to text. (Increment indices by one). with open("mapping.txt", "w") as file: for idx0, idx1 in sire_mapping.items(): file.write("%d %d\n" % (idx0.value() + 1, idx1.value() + 1)) # Create the fkcombu command string. command = "%s -T molecule0.pdb -R molecule1.pdb -alg F -iam mapping.txt -opdbT aligned.pdb" % fkcombu_exe # Run the command as a subprocess. proc = _subprocess.run(command, shell=True, stdout=_subprocess.PIPE, stderr=_subprocess.PIPE) # Check that the output file exists. if not _os.path.isfile("aligned.pdb"): raise _AlignmentError( "Failed to align molecules based on mapping: %r" % mapping) from None # Load the aligned molecule. aligned = _IO.readMolecules("aligned.pdb")[0] # Get the "coordinates" property for molecule0. prop = property_map0.get("coordinates", "coordinates") # Copy the coordinates back into the original molecule. molecule0._sire_object = molecule0._sire_object.edit() \ .setProperty(prop, aligned._sire_object.property("coordinates")).commit() # Return the aligned molecule. return _Molecule(molecule0)
def _solvate(molecule, box, shell, model, num_point, ion_conc, is_neutral, work_dir=None, property_map={}): """Internal function to add solvent using 'gmx solvate'. Parameters ---------- molecule : :class:`Molecule <BioSimSpace._SireWrappers.Molecule>`, \ :class:`System <BioSimSpace._SireWrappers.System>` A molecule, or system of molecules. box : [:class:`Length <BioSimSpace.Types.Length>`] A list containing the box size in each dimension. shell : :class:`Length` <BioSimSpace.Types.Length>` Thickness of the water shell around the solute. model : str The name of the water model. num_point : int The number of atoms in the water model. ion_conc : float The ion concentration in (mol per litre). is_neutral : bool Whether to neutralise the system. work_dir : str The working directory for the process. property_map : dict A dictionary that maps system "properties" to their user defined values. This allows the user to refer to properties with their own naming scheme, e.g. { "charge" : "my-charge" } Returns ------- system : :class:`System <BioSimSpace._SireWrappers.System>` The solvated system. """ if molecule is not None: # Store the centre of the molecule. center = molecule._getAABox(property_map).center() # Work out the vector from the centre of the molecule to the centre of the # water box, converting the distance in each direction to Angstroms. vec = [] for x, y in zip(box, center): vec.append(0.5 * x.angstroms().magnitude() - y) # Translate the molecule. This allows us to create a water box # around the molecule. molecule.translate(vec, property_map) if type(molecule) is _System: # Reformat all of the water molecules so that they match the # expected GROMACS topology template. waters = _SireIO.setGromacsWater( molecule._sire_object.search("water"), model) # Convert to a BioSimSpace molecules container. waters = _Molecules(waters.toMolecules()) # Remove the old water molecules then add those with the updated topology. molecule.removeWaterMolecules() molecule.addMolecules(waters) # Create a temporary working directory and store the directory name. if work_dir is None: tmp_dir = _tempfile.TemporaryDirectory() work_dir = tmp_dir.name # Run the solvation in the working directory. with _Utils.cd(work_dir): # Create the gmx command. if num_point == 3: mod = "spc216" else: mod = model command = "%s solvate -cs %s" % (_gmx_exe, mod) if molecule is not None: # Write the molecule/system to a GRO files. _IO.saveMolecules("input", molecule, "gro87") _os.rename("input.gro87", "input.gro") # Update the command. command += " -cp input.gro" # Add the box information. if box is not None: command += " -box %f %f %f" % (box[0].nanometers().magnitude(), box[1].nanometers().magnitude(), box[2].nanometers().magnitude()) # Add the shell information. if shell is not None: command += " -shell %f" % shell.nanometers().magnitude() # Just add box information. else: command += " -box %f %f %f" % (box[0].nanometers().magnitude(), box[1].nanometers().magnitude(), box[2].nanometers().magnitude()) # Add the output file. command += " -o output.gro" with open("README.txt", "w") as file: # Write the command to file. file.write("# gmx solvate was run with the following command:\n") file.write("%s\n" % command) # Create files for stdout/stderr. stdout = open("solvate.out", "w") stderr = open("solvate.err", "w") # Run gmx solvate as a subprocess. proc = _subprocess.run(command, shell=True, stdout=stdout, stderr=stderr) stdout.close() stderr.close() # gmx doesn't return sensible error codes, so we need to check that # the expected output was generated. if not _os.path.isfile("output.gro"): raise RuntimeError("'gmx solvate failed to generate output!") # Extract the water lines from the GRO file. water_lines = [] with open("output.gro", "r") as file: for line in file: if _re.search("SOL", line): # Store the SOL atom record. water_lines.append(line) # Add any box information. This is the last line in the GRO file. water_lines.append(line) # Write a GRO file that contains only the water atoms. if len(water_lines) - 1 > 0: with open("water.gro", "w") as file: file.write("BioSimSpace %s water box\n" % model.upper()) file.write("%d\n" % (len(water_lines) - 1)) for line in water_lines: file.write("%s" % line) else: raise ValueError( "No water molecules were generated. Try increasing " "the 'box' size or 'shell' thickness.") # Create a TOP file for the water model. By default we use the Amber03 # force field to generate a dummy topology for the water model. with open("water_ions.top", "w") as file: file.write("#define FLEXIBLE 1\n\n") file.write("; Include AmberO3 force field\n") file.write('#include "amber03.ff/forcefield.itp"\n\n') file.write("; Include %s water topology\n" % model.upper()) file.write('#include "amber03.ff/%s.itp"\n\n' % model) file.write("; Include ions\n") file.write('#include "amber03.ff/ions.itp"\n\n') file.write("[ system ] \n") file.write("BioSimSpace %s water box\n\n" % model.upper()) file.write("[ molecules ] \n") file.write(";molecule name nr.\n") file.write("SOL %d\n" % ((len(water_lines) - 1) / num_point)) # Load the water box. water = _IO.readMolecules(["water.gro", "water_ions.top"]) # Create a new system by adding the water to the original molecule. if molecule is not None: # Translate the molecule and water back to the original position. vec = [-x for x in vec] molecule.translate(vec, property_map) water.translate(vec) if type(molecule) is _System: # Extract the non-water molecules from the original system. non_waters = _Molecules( molecule.search("not water")._sire_object.toMolecules()) # Create a system by adding these to the water molecules from # gmx solvate, which will include the original waters. system = non_waters.toSystem() + water else: system = molecule.toSystem() + water # Add all of the water box properties to the new system. for prop in water._sire_object.propertyKeys(): prop = property_map.get(prop, prop) # Add the space property from the water system. system._sire_object.setProperty( prop, water._sire_object.property(prop)) else: system = water # Now we add ions to the system and neutralise the charge. if ion_conc > 0 or is_neutral: # Write the molecule + water system to file. _IO.saveMolecules("solvated", system, "gro87") _IO.saveMolecules("solvated", system, "grotop") _os.rename("solvated.gro87", "solvated.gro") _os.rename("solvated.grotop", "solvated.top") # First write an mdp file. with open("ions.mdp", "w") as file: file.write("; Neighbour searching\n") file.write("cutoff-scheme = Verlet\n") file.write("rlist = 1.1\n") file.write("pbc = xyz\n") file.write("verlet-buffer-tolerance = -1\n") file.write("\n; Electrostatics\n") file.write("coulombtype = cut-off\n") file.write("\n; VdW\n") file.write("rvdw = 1.0\n") # Create the grompp command. command = "%s grompp -f ions.mdp -po ions.out.mdp -c solvated.gro -p solvated.top -o ions.tpr" % _gmx_exe with open("README.txt", "a") as file: # Write the command to file. file.write( "\n# gmx grompp was run with the following command:\n") file.write("%s\n" % command) # Create files for stdout/stderr. stdout = open("grommp.out", "w") stderr = open("grommp.err", "w") # Run grompp as a subprocess. proc = _subprocess.run(command, shell=True, stdout=stdout, stderr=stderr) stdout.close() stderr.close() # Flag whether to break out of the ion adding stage. is_break = False # Check for the tpr output file. if not _os.path.isfile("ions.tpr"): if shell is None: raise RuntimeError( "'gmx grommp' failed to generate output! " "Perhaps your box is too small?") else: is_break = True _warnings.warn( "Unable to achieve target ion concentration, try using " "'box' option instead of 'shell'.") # Only continue if grommp was successful. This allows us to skip the remainder # of the code if the ion addition failed when the 'shell' option was chosen, i.e. # because the estimated simulation box was too small. if not is_break: is_break = False # The ion concentration is unset. if ion_conc == 0: # Get the current molecular charge. charge = system.charge() # Round to the nearest integer value. charge = round(charge.magnitude()) # Create the genion command. command = "echo SOL | %s genion -s ions.tpr -o solvated_ions.gro -p solvated.top -neutral" % _gmx_exe # Add enough counter ions to neutralise the charge. if charge > 0: command += " -nn %d" % abs(charge) else: command += " -np %d" % abs(charge) else: # Create the genion command. command = "echo SOL | %s genion -s ions.tpr -o solvated_ions.gro -p solvated.top -%s -conc %f" \ % (_gmx_exe, "neutral" if is_neutral else "noneutral", ion_conc) with open("README.txt", "a") as file: # Write the command to file. file.write( "\n# gmx genion was run with the following command:\n") file.write("%s\n" % command) # Create files for stdout/stderr. stdout = open("genion.out", "w") stderr = open("genion.err", "w") # Run genion as a subprocess. proc = _subprocess.run(command, shell=True, stdout=stdout, stderr=stderr) stdout.close() stderr.close() # Check for the output GRO file. if not _os.path.isfile("solvated_ions.gro"): if shell is None: raise RuntimeError( "'gmx genion' failed to add ions! Perhaps your box is too small?" ) else: is_break = True _warnings.warn( "Unable to achieve target ion concentration, try using " "'box' option instead of 'shell'.") if not is_break: # Counters for the number of SOL, NA, and CL atoms. num_sol = 0 num_na = 0 num_cl = 0 # We now need to loop through the GRO file to extract the lines # corresponding to water or ion atoms. water_ion_lines = [] with open("solvated_ions.gro", "r") as file: for line in file: # This is a Sodium atom. if _re.search("NA", line): water_ion_lines.append(line) num_na += 1 # This is a Chlorine atom. if _re.search("CL", line): water_ion_lines.append(line) num_cl += 1 # This is a water atom. elif _re.search("SOL", line): water_ion_lines.append(line) num_sol += 1 # Add any box information. This is the last line in the GRO file. water_ion_lines.append(line) # Write a GRO file that contains only the water and ion atoms. if len(water_ion_lines) - 1 > 0: with open("water_ions.gro", "w") as file: file.write("BioSimSpace %s water box\n" % model.upper()) file.write("%d\n" % (len(water_ion_lines) - 1)) for line in water_ion_lines: file.write("%s" % line) # Ions have been added. Update the TOP file fo the water model # with the new atom counts. if num_na > 0 or num_cl > 0: with open("water_ions.top", "w") as file: file.write("#define FLEXIBLE 1\n\n") file.write("; Include AmberO3 force field\n") file.write( '#include "amber03.ff/forcefield.itp"\n\n') file.write("; Include %s water topology\n" % model.upper()) file.write('#include "amber03.ff/%s.itp"\n\n' % model) file.write("; Include ions\n") file.write('#include "amber03.ff/ions.itp"\n\n') file.write("[ system ] \n") file.write("BioSimSpace %s water box\n\n" % model.upper()) file.write("[ molecules ] \n") file.write(";molecule name nr.\n") file.write("SOL %d\n" % (num_sol / num_point)) if num_na > 0: file.write("NA %d\n" % num_na) if num_cl > 0: file.write("CL %d\n" % num_cl) # Load the water/ion box. water_ions = _IO.readMolecules( ["water_ions.gro", "water_ions.top"]) # Create a new system by adding the water to the original molecule. if molecule is not None: if type(molecule) is _System: # Extract the non-water molecules from the original system. non_waters = _Molecules( molecule.search( "not water")._sire_object.toMolecules()) # Create a system by adding these to the water and ion # molecules from gmx solvate, which will include the # original waters. system = non_waters.toSystem() + water_ions else: system = molecule.toSystem() + water_ions # Add all of the water molecules' properties to the new system. for prop in water_ions._sire_object.propertyKeys(): prop = property_map.get(prop, prop) # Add the space property from the water system. system._sire_object.setProperty( prop, water_ions._sire_object.property(prop)) else: system = water_ions # Store the name of the water model as a system property. system._sire_object.setProperty("water_model", _SireBase.wrap(model)) return system
def getFrame(trajectory, topology, index): """Extract a single frame from a trajectory file. Parameters ---------- trajectory : str A trajectory file. topology : str A topology file. index : int The index of the frame. Returns ------- frame : :class:`System <BioSimSpace._SireWrappers.System>` The System object of the corresponding frame. """ if type(trajectory) is not str: raise TypeError("'trajectory' must be of type 'str'") if type(topology) is not str: raise TypeError("'topology' must be of type 'str'") if type(index) is not int: raise TypeError("'index' must be of type 'int'") # Try to load the frame. try: frame = _mdtraj.load_frame(trajectory, index, top=topology) except: # Get the file format of the topology file. try: # Load the topology file to determine the file format. file_format = _IO.readMolecules(topology).fileFormat() # Set the extension. extension = _extensions.get(file_format, file_format.lower()) # Set the path to the temporary topology file. top_file = _os.getcwd() + "/.topology." + extension # Copy the topology to a file with the correct extension. _shutil.copyfile(topology, top_file) frame = _mdtraj.load_frame(trajectory, index, top=top_file) except: _os.remove(top_file) raise IOError( "MDTraj failed to read frame %d from: traj=%s, top=%s" % (index, trajectory, topology)) # Remove the temporary topology file. _os.remove(top_file) # The name of the frame coordinate file. frame_file = ".frame.nc" # Save the coordinates to file. frame.save(frame_file) # Load the frame into a System object. try: system = _System(_SireIO.MoleculeParser.read([topology, frame_file])) except Exception as e: _os.remove(frame_file) msg = "Failed to read trajectory frame: '%s'" % frame_file if _isVerbose(): raise IOError(msg) from e else: raise IOError(msg) from None # Remove the temporary frame coordinate file. _os.remove(frame_file) # Return the system. return system
def getTrajectory(self, format="mdtraj"): """Get the current trajectory object. Parameters ---------- format : str Whether to return an 'MDTraj' or 'MDAnalysis' object. Returns ------- trajectory : mdtraj.core.trajectory.Trajectory, MDAnalysis.core.universe.Universe The trajectory in MDTraj or MDAnalysis format. """ if format.upper() not in ["MDTRAJ", "MDANALYSIS"]: _warnings.warn( "Invalid trajectory format. Using default (mdtraj).") format = "mdtraj" # Set the location of the trajectory and topology files. if self._process is not None: traj_file = self._process._traj_file # Weirdly, the GRO file is used as the topology. if self._process_name.upper() == "GROMACS": top_file = self._process._gro_file else: top_file = self._process._top_file else: traj_file = self._traj_file top_file = self._top_file # Check that the trajectory and topology files exist. if not _os.path.isfile(traj_file): raise IOError("Trajectory file doesn't exist: '%s'" % traj_file) if not _os.path.isfile(top_file): raise IOError("Topology file doesn't exist: '%s'" % top_file) # Load the topology file to determine the file format. file_format = _IO.readMolecules(top_file).fileFormat() # Set the extension. extension = _extensions.get(file_format, file_format.lower()) # Set the path to the temporary topology file. new_top_file = _os.getcwd() + "/.topology." + extension # Copy the topology to a file with the correct extension. _shutil.copyfile(top_file, new_top_file) # Return an MDTraj object. if format == "mdtraj": try: traj = _mdtraj.load(traj_file, top=new_top_file) except: _warnings.warn("MDTraj failed to read: traj=%s, top=%s" % (traj_file, top_file)) traj = None # Remove the temporary topology file. _os.remove(new_top_file) return traj # Return an MDAnalysis Universe. else: try: universe = _mdanalysis.Universe(new_top_file, traj_file) except: _warnings.warn("MDAnalysis failed to read: traj=%s, top=%s" % (traj_file, top_file)) universe = None # Remove the temporary topology file. _os.remove(new_top_file) return universe
def run(self, molecule, work_dir=None, queue=None): """Run the parameterisation protocol. Parameters ---------- molecule : BioSimSpace._SireWrappers.Molecule The molecule to apply the parameterisation protocol to. work_dir : str The working directory. queue : queue.Queue The thread queue is which this method has been run. Returns ------- molecule : BioSimSpace._SireWrappers.Molecule The parameterised molecule. """ if type(molecule) is not _Molecule: raise TypeError( "'molecule' must be of type 'BioSimSpace._SireWrappers.Molecule'" ) if type(work_dir) is not None and type(work_dir) is not str: raise TypeError("'work_dir' must be of type 'str'") if type(queue) is not None and type(queue) is not _queue.Queue: raise TypeError("'queue' must be of type 'queue.Queue'") # Set work_dir to the current directory. if work_dir is None: work_dir = _os.getcwd() # Create the file prefix. prefix = work_dir + "/" # Create a new molecule using a deep copy of the internal Sire Molecule. new_mol = _Molecule(molecule._getSireMolecule().__deepcopy__()) # Choose the program to run with depending on the force field compatibility. # If tLEaP and pdb2gmx are supported, default to tLEaP, then use pdb2gmx if # tLEaP fails to produce output. # First, try parameterise using tLEaP. if self._tleap: if _tleap_exe is not None: output = self._run_tleap(molecule, work_dir) # Otherwise, try using pdb2gmx. elif self._pdb2gmx: if _gmx_exe is not None: output = self._run_pdb2gmx(molecule, work_dir) else: raise _MissingSoftwareError( "Cannot parameterise. Missing AmberTools and GROMACS.") # Parameterise using pdb2gmx. elif self._pdb2gmx: if _gmx_exe is not None: output = self._run_pdb2gmx(molecule, work_dir) else: raise _MissingSoftwareError( "Cannot use pdb2gmx since GROMACS is not installed!") # Prepend the working directory to the output file names. output = [prefix + output[0], prefix + output[1]] try: # Load the parameterised molecule. par_mol = _Molecule( _IO.readMolecules(output)._getSireSystem()[_Sire.Mol.MolIdx( 0)]) except: raise IOError("Failed to read molecule from: '%s', '%s'" % (output[0], output[1])) from None # Make the molecule 'mol' compatible with 'par_mol'. This will create # a mapping between atom indices in the two molecules and add all of # the new properties from 'par_mol' to 'mol'. new_mol._makeCompatibleWith(par_mol, property_map=self._property_map, overwrite=True, verbose=False) # Record the forcefield used to parameterise the molecule. new_mol._forcefield = self._forcefield if queue is not None: queue.put(new_mol) return new_mol