def _prune_conformers(self, molecule: Molecule) -> None: no_conformers: int = molecule.n_conformers # This will be used to determined whether it should be pruned # from the RMSD calculations. If we find it should be pruned # just once, it is sufficient to avoid it later in the pairwise # processing. uniq: List = list([True] * no_conformers) # Needed to get the aligned best-fit RMSD rdmol = molecule.to_rdkit() rmsd = [] # This begins the pairwise RMSD pruner if no_conformers > 1 and self.cutoff >= 0.0: # The reference conformer for RMSD calculation for j in range(no_conformers - 1): # A previous loop has determine this specific conformer # is too close to another, so we can entirely skip it if not uniq[j]: continue # since k starts from j+1, we are only looking at the # upper triangle of the comparisons (j < k) for k in range(j + 1, no_conformers): rmsd_i = AlignMol(rdmol, rdmol, k, j) rmsd.append(rmsd_i) # Flag this conformer for pruning, and also # prevent it from being used as a reference in the # future comparisons if rmsd_i < self.cutoff: uniq[k] = False confs = [ molecule.conformers[j] for j, add_bool in enumerate(uniq) if add_bool ] molecule._conformers = confs.copy()
def add_molecule(self, molecule: off.Molecule) -> bool: """ Add a molecule to the molecule list after checking that it is not present already. If it is de-duplicate the record and condense the conformers and metadata. Args: molecule: The molecule and its conformers which we should try and add to the result. Returns: `True` if the molecule is already present and `False` if not. """ # always strip the atom map as it is not preserved in a workflow if "atom_map" in molecule.properties: del molecule.properties["atom_map"] # make a unique molecule hash independent of atom order or conformers molecule_hash = molecule.to_inchikey(fixed_hydrogens=True) if not self.skip_unique_check and molecule_hash in self._molecules: # we need to align the molecules and transfer the coords and properties # get the mapping, drop some comparisons to match inchikey isomorphic, mapping = off.Molecule.are_isomorphic( molecule, self._molecules[molecule_hash], return_atom_map=True, formal_charge_matching=False, bond_order_matching=False, ) assert isomorphic is True # transfer any torsion indexes for similar fragments if "dihedrals" in molecule.properties: # we need to transfer the properties; get the current molecule dihedrals indexer # if one is missing create a new one current_indexer = self._molecules[ molecule_hash].properties.get("dihedrals", TorsionIndexer()) # update it with the new molecule info current_indexer.update( torsion_indexer=molecule.properties["dihedrals"], reorder_mapping=mapping, ) # store it back self._molecules[molecule_hash].properties[ "dihedrals"] = current_indexer if molecule.n_conformers != 0: # transfer the coordinates for conformer in molecule.conformers: new_conformer = np.zeros((molecule.n_atoms, 3)) for i in range(molecule.n_atoms): new_conformer[i] = conformer[mapping[i]].value_in_unit( unit.angstrom) new_conf = unit.Quantity(value=new_conformer, unit=unit.angstrom) # check if the conformer is already on the molecule for old_conformer in self._molecules[ molecule_hash].conformers: if old_conformer.tolist() == new_conf.tolist(): break else: self._molecules[molecule_hash].add_conformer( new_conformer * unit.angstrom) else: # molecule already in list and coords not present so just return return True else: if molecule.n_conformers == 0: # make sure this is a list to avoid errors molecule._conformers = [] self._molecules[molecule_hash] = molecule return False
def compute_conformer_energies_from_file(filename): # Load in the molecule and its conformers. # Note that all conformers of the same molecule are loaded as separate Molecule objects # If using a OFF Toolkit version before 0.7.0, loading SDFs through RDKit and OpenEye may provide # different behavior in some cases. So, here we force loading through RDKit to ensure the correct behavior rdktkw = RDKitToolkitWrapper() loaded_molecules = Molecule.from_file(filename, toolkit_registry=rdktkw) # The logic below only works for lists of molecules, so if a # single molecule was loaded, cast it to list if type(loaded_molecules) is not list: loaded_molecules = [loaded_molecules] # Collatate all conformers of the same molecule # NOTE: This isn't necessary if you have already loaded or created multi-conformer molecules; # it is just needed because our SDF reader does not automatically collapse conformers. molecules = [loaded_molecules[0]] for molecule in loaded_molecules[1:]: if molecule == molecules[-1]: for conformer in molecule.conformers: molecules[-1].add_conformer(conformer) else: molecules.append(molecule) n_molecules = len(molecules) n_conformers = sum([mol.n_conformers for mol in molecules]) print( f'{n_molecules} unique molecule(s) loaded, with {n_conformers} total conformers' ) # Load the openff-1.1.0 force field appropriate for vacuum calculations (without constraints) from openff.toolkit.typing.engines.smirnoff import ForceField forcefield = ForceField('openff_unconstrained-1.1.0.offxml') # Loop over molecules and minimize each conformer for molecule in molecules: # If the molecule doesn't have a name, set mol.name to be the hill formula if molecule.name == '': molecule.name = Topology._networkx_to_hill_formula( molecule.to_networkx()) print('%s : %d conformers' % (molecule.name, molecule.n_conformers)) # Make a temporary copy of the molecule that we can update for each minimization mol_copy = Molecule(molecule) # Make an OpenFF Topology so we can parameterize the system off_top = molecule.to_topology() print( f"Parametrizing {molecule.name} (may take a moment to calculate charges)" ) system = forcefield.create_openmm_system(off_top) # Use OpenMM to compute initial and minimized energy for all conformers integrator = openmm.VerletIntegrator(1 * unit.femtoseconds) platform = openmm.Platform.getPlatformByName('Reference') omm_top = off_top.to_openmm() simulation = openmm.app.Simulation(omm_top, system, integrator, platform) # Print text header print( 'Conformer Initial PE Minimized PE RMS between initial and minimized conformer' ) output = [[ 'Conformer', 'Initial PE (kcal/mol)', 'Minimized PE (kcal/mol)', 'RMS between initial and minimized conformer (Angstrom)' ]] for conformer_index, conformer in enumerate(molecule.conformers): simulation.context.setPositions(conformer) orig_potential = simulation.context.getState( getEnergy=True).getPotentialEnergy() simulation.minimizeEnergy() min_state = simulation.context.getState(getEnergy=True, getPositions=True) min_potential = min_state.getPotentialEnergy() # Calculate the RMSD between the initial and minimized conformer min_coords = min_state.getPositions() min_coords = np.array([[atom.x, atom.y, atom.z] for atom in min_coords]) * unit.nanometer mol_copy._conformers = None mol_copy.add_conformer(conformer) mol_copy.add_conformer(min_coords) rdmol = mol_copy.to_rdkit() rmslist = [] rdMolAlign.AlignMolConformers(rdmol, RMSlist=rmslist) minimization_rms = rmslist[0] # Save the minimized conformer to file mol_copy._conformers = None mol_copy.add_conformer(min_coords) mol_copy.to_file( f'{molecule.name}_conf{conformer_index+1}_minimized.sdf', file_format='sdf') print( '%5d / %5d : %8.3f kcal/mol %8.3f kcal/mol %8.3f Angstroms' % (conformer_index + 1, molecule.n_conformers, orig_potential / unit.kilocalories_per_mole, min_potential / unit.kilocalories_per_mole, minimization_rms)) output.append([ str(conformer_index + 1), f'{orig_potential/unit.kilocalories_per_mole:.3f}', f'{min_potential/unit.kilocalories_per_mole:.3f}', f'{minimization_rms:.3f}' ]) # Write the results out to CSV with open(f'{molecule.name}.csv', 'w') as of: for line in output: of.write(','.join(line) + '\n') # Clean up OpenMM Simulation del simulation, integrator