def _get_rdkit_mcs_mapping(fragment: Molecule, parent: Molecule) -> Dict[int, int]: """ Use rdkit MCS function to find the maximum mapping between the fragment and parent molecule. """ from rdkit import Chem from rdkit.Chem import rdFMCS parent_rdkit = parent.to_rdkit() fragment_rdkit = fragment.to_rdkit() mcs = rdFMCS.FindMCS( [parent_rdkit, fragment_rdkit], atomCompare=rdFMCS.AtomCompare.CompareElements, bondCompare=rdFMCS.BondCompare.CompareAny, ringMatchesRingOnly=True, completeRingsOnly=True, ) # make a new molecule from the mcs match_mol = Chem.MolFromSmarts(mcs.smartsString) # get the mcs parent/fragment mapping matches_parent = parent_rdkit.GetSubstructMatch(match_mol) matches_fragment = fragment_rdkit.GetSubstructMatch(match_mol) mapping = dict(zip(matches_fragment, matches_parent)) return mapping
def _prune_conformers(self, molecule: Molecule) -> None: no_conformers: int = molecule.n_conformers # This will be used to determined whether it should be pruned # from the RMSD calculations. If we find it should be pruned # just once, it is sufficient to avoid it later in the pairwise # processing. uniq: List = list([True] * no_conformers) # Needed to get the aligned best-fit RMSD rdmol = molecule.to_rdkit() rmsd = [] # This begins the pairwise RMSD pruner if no_conformers > 1 and self.cutoff >= 0.0: # The reference conformer for RMSD calculation for j in range(no_conformers - 1): # A previous loop has determine this specific conformer # is too close to another, so we can entirely skip it if not uniq[j]: continue # since k starts from j+1, we are only looking at the # upper triangle of the comparisons (j < k) for k in range(j + 1, no_conformers): rmsd_i = AlignMol(rdmol, rdmol, k, j) rmsd.append(rmsd_i) # Flag this conformer for pruning, and also # prevent it from being used as a reference in the # future comparisons if rmsd_i < self.cutoff: uniq[k] = False confs = [ molecule.conformers[j] for j, add_bool in enumerate(uniq) if add_bool ] molecule._conformers = confs.copy()
def _get_new_single_graph_smirks( self, atoms: Tuple[int, ...], molecule: Molecule, ) -> str: """ Generate a new smirks pattern for the selected atoms of the given molecule. Parameters ---------- atoms: Tuple[int] The indices of the atoms that require a new smirks pattern. molecule: off.Molecule The molecule that that patten should be made for. Returns ------- str A single smirks string encapsulating the atoms requested in the given molecule. """ graph = SingleGraph(mol=molecule.to_rdkit(), smirks_atoms=atoms, layers=self.smirks_layers) return graph.as_smirks(compress=False)
def compute_conformer_energies_from_file(filename): # Load in the molecule and its conformers. # Note that all conformers of the same molecule are loaded as separate Molecule objects # If using a OFF Toolkit version before 0.7.0, loading SDFs through RDKit and OpenEye may provide # different behavior in some cases. So, here we force loading through RDKit to ensure the correct behavior rdktkw = RDKitToolkitWrapper() loaded_molecules = Molecule.from_file(filename, toolkit_registry=rdktkw) # Collatate all conformers of the same molecule # NOTE: This isn't necessary if you have already loaded or created multi-conformer molecules; # it is just needed because our SDF reader does not automatically collapse conformers. molecules = [loaded_molecules[0]] for molecule in loaded_molecules[1:]: if molecule == molecules[-1]: for conformer in molecule.conformers: molecules[-1].add_conformer(conformer) else: molecules.append(molecule) n_molecules = len(molecules) n_conformers = sum([mol.n_conformers for mol in molecules]) print( f'{n_molecules} unique molecule(s) loaded, with {n_conformers} total conformers' ) # Load the openff-1.1.0 force field appropriate for vacuum calculations (without constraints) from openforcefield.typing.engines.smirnoff import ForceField forcefield = ForceField('openff_unconstrained-1.1.0.offxml') # Loop over molecules and minimize each conformer for molecule in molecules: # If the molecule doesn't have a name, set mol.name to be the hill formula if molecule.name == '': molecule.name = Topology._networkx_to_hill_formula( molecule.to_networkx()) print('%s : %d conformers' % (molecule.name, molecule.n_conformers)) # Make a temporary copy of the molecule that we can update for each minimization mol_copy = Molecule(molecule) # Make an OpenFF Topology so we can parameterize the system off_top = molecule.to_topology() print( f"Parametrizing {molecule.name} (may take a moment to calculate charges)" ) system = forcefield.create_openmm_system(off_top) # Use OpenMM to compute initial and minimized energy for all conformers integrator = openmm.VerletIntegrator(1 * unit.femtoseconds) platform = openmm.Platform.getPlatformByName('Reference') omm_top = off_top.to_openmm() simulation = openmm.app.Simulation(omm_top, system, integrator, platform) # Print text header print( 'Conformer Initial PE Minimized PE RMS between initial and minimized conformer' ) output = [[ 'Conformer', 'Initial PE (kcal/mol)', 'Minimized PE (kcal/mol)', 'RMS between initial and minimized conformer (Angstrom)' ]] for conformer_index, conformer in enumerate(molecule.conformers): simulation.context.setPositions(conformer) orig_potential = simulation.context.getState( getEnergy=True).getPotentialEnergy() simulation.minimizeEnergy() min_state = simulation.context.getState(getEnergy=True, getPositions=True) min_potential = min_state.getPotentialEnergy() # Calculate the RMSD between the initial and minimized conformer min_coords = min_state.getPositions() min_coords = np.array([[atom.x, atom.y, atom.z] for atom in min_coords]) * unit.nanometer mol_copy._conformers = None mol_copy.add_conformer(conformer) mol_copy.add_conformer(min_coords) rdmol = mol_copy.to_rdkit() rmslist = [] rdMolAlign.AlignMolConformers(rdmol, RMSlist=rmslist) minimization_rms = rmslist[0] # Save the minimized conformer to file mol_copy._conformers = None mol_copy.add_conformer(min_coords) mol_copy.to_file( f'{molecule.name}_conf{conformer_index+1}_minimized.sdf', file_format='sdf') print( '%5d / %5d : %8.3f kcal/mol %8.3f kcal/mol %8.3f Angstroms' % (conformer_index + 1, molecule.n_conformers, orig_potential / unit.kilocalories_per_mole, min_potential / unit.kilocalories_per_mole, minimization_rms)) output.append([ str(conformer_index + 1), f'{orig_potential/unit.kilocalories_per_mole:.3f}', f'{min_potential/unit.kilocalories_per_mole:.3f}', f'{minimization_rms:.3f}' ]) # Write the results out to CSV with open(f'{molecule.name}.csv', 'w') as of: for line in output: of.write(','.join(line) + '\n') # Clean up OpenMM Simulation del simulation, integrator