def _prune_conformers(self, molecule: Molecule) -> None: no_conformers: int = molecule.n_conformers # This will be used to determined whether it should be pruned # from the RMSD calculations. If we find it should be pruned # just once, it is sufficient to avoid it later in the pairwise # processing. uniq: List = list([True] * no_conformers) # Needed to get the aligned best-fit RMSD rdmol = molecule.to_rdkit() rmsd = [] # This begins the pairwise RMSD pruner if no_conformers > 1 and self.cutoff >= 0.0: # The reference conformer for RMSD calculation for j in range(no_conformers - 1): # A previous loop has determine this specific conformer # is too close to another, so we can entirely skip it if not uniq[j]: continue # since k starts from j+1, we are only looking at the # upper triangle of the comparisons (j < k) for k in range(j + 1, no_conformers): rmsd_i = AlignMol(rdmol, rdmol, k, j) rmsd.append(rmsd_i) # Flag this conformer for pruning, and also # prevent it from being used as a reference in the # future comparisons if rmsd_i < self.cutoff: uniq[k] = False confs = [ molecule.conformers[j] for j, add_bool in enumerate(uniq) if add_bool ] molecule._conformers = confs.copy()
def _remap_single_result( self, mapping: Dict[int, int], new_molecule: off.Molecule, result: SingleResult, extras: Optional[Dict[str, Any]] = None, ) -> SingleResult: """ Given a single result and a mapping remap the result ordering to match the fitting schema order. Parameters: mapping: The mapping between the old and new molecule. new_molecule: The new molecule in the correct order. result: The single result which should be remapped. extras: Any extras that should be added to the result. """ new_molecule._conformers = [] # re map the geometry and attach new_conformer = np.zeros((new_molecule.n_atoms, 3)) for i in range(new_molecule.n_atoms): new_conformer[mapping[i]] = result.molecule.geometry[i] geometry = unit.Quantity(new_conformer, unit.bohr) new_molecule.add_conformer(geometry) # drop the bond order indices and just remap the gradient and hessian new_gradient = np.zeros((new_molecule.n_atoms, 3)) for i in range(new_molecule.n_atoms): new_gradient[i] = result.gradient[mapping[i]] # #remap the hessian # new_hessian = np.zeros((3 * new_molecule.n_atoms, 3 * new_molecule.n_atoms)) # # we need to move 3 entries at a time to keep them together # for i in range(new_molecule.n_atoms): # new_hessian[i * 3: (i * 3) + 3] = result.hessian[mapping[i * 3]: mapping[(i * 3) + 3]] return SingleResult( molecule=new_molecule.to_qcschema(), id=result.id, energy=result.energy, gradient=new_gradient, hessian=None, extras=extras, )
def compute_conformer_energies_from_file(filename): # Load in the molecule and its conformers. # Note that all conformers of the same molecule are loaded as separate Molecule objects # If using a OFF Toolkit version before 0.7.0, loading SDFs through RDKit and OpenEye may provide # different behavior in some cases. So, here we force loading through RDKit to ensure the correct behavior rdktkw = RDKitToolkitWrapper() loaded_molecules = Molecule.from_file(filename, toolkit_registry=rdktkw) # Collatate all conformers of the same molecule # NOTE: This isn't necessary if you have already loaded or created multi-conformer molecules; # it is just needed because our SDF reader does not automatically collapse conformers. molecules = [loaded_molecules[0]] for molecule in loaded_molecules[1:]: if molecule == molecules[-1]: for conformer in molecule.conformers: molecules[-1].add_conformer(conformer) else: molecules.append(molecule) n_molecules = len(molecules) n_conformers = sum([mol.n_conformers for mol in molecules]) print( f'{n_molecules} unique molecule(s) loaded, with {n_conformers} total conformers' ) # Load the openff-1.1.0 force field appropriate for vacuum calculations (without constraints) from openforcefield.typing.engines.smirnoff import ForceField forcefield = ForceField('openff_unconstrained-1.1.0.offxml') # Loop over molecules and minimize each conformer for molecule in molecules: # If the molecule doesn't have a name, set mol.name to be the hill formula if molecule.name == '': molecule.name = Topology._networkx_to_hill_formula( molecule.to_networkx()) print('%s : %d conformers' % (molecule.name, molecule.n_conformers)) # Make a temporary copy of the molecule that we can update for each minimization mol_copy = Molecule(molecule) # Make an OpenFF Topology so we can parameterize the system off_top = molecule.to_topology() print( f"Parametrizing {molecule.name} (may take a moment to calculate charges)" ) system = forcefield.create_openmm_system(off_top) # Use OpenMM to compute initial and minimized energy for all conformers integrator = openmm.VerletIntegrator(1 * unit.femtoseconds) platform = openmm.Platform.getPlatformByName('Reference') omm_top = off_top.to_openmm() simulation = openmm.app.Simulation(omm_top, system, integrator, platform) # Print text header print( 'Conformer Initial PE Minimized PE RMS between initial and minimized conformer' ) output = [[ 'Conformer', 'Initial PE (kcal/mol)', 'Minimized PE (kcal/mol)', 'RMS between initial and minimized conformer (Angstrom)' ]] for conformer_index, conformer in enumerate(molecule.conformers): simulation.context.setPositions(conformer) orig_potential = simulation.context.getState( getEnergy=True).getPotentialEnergy() simulation.minimizeEnergy() min_state = simulation.context.getState(getEnergy=True, getPositions=True) min_potential = min_state.getPotentialEnergy() # Calculate the RMSD between the initial and minimized conformer min_coords = min_state.getPositions() min_coords = np.array([[atom.x, atom.y, atom.z] for atom in min_coords]) * unit.nanometer mol_copy._conformers = None mol_copy.add_conformer(conformer) mol_copy.add_conformer(min_coords) rdmol = mol_copy.to_rdkit() rmslist = [] rdMolAlign.AlignMolConformers(rdmol, RMSlist=rmslist) minimization_rms = rmslist[0] # Save the minimized conformer to file mol_copy._conformers = None mol_copy.add_conformer(min_coords) mol_copy.to_file( f'{molecule.name}_conf{conformer_index+1}_minimized.sdf', file_format='sdf') print( '%5d / %5d : %8.3f kcal/mol %8.3f kcal/mol %8.3f Angstroms' % (conformer_index + 1, molecule.n_conformers, orig_potential / unit.kilocalories_per_mole, min_potential / unit.kilocalories_per_mole, minimization_rms)) output.append([ str(conformer_index + 1), f'{orig_potential/unit.kilocalories_per_mole:.3f}', f'{min_potential/unit.kilocalories_per_mole:.3f}', f'{minimization_rms:.3f}' ]) # Write the results out to CSV with open(f'{molecule.name}.csv', 'w') as of: for line in output: of.write(','.join(line) + '\n') # Clean up OpenMM Simulation del simulation, integrator