Ejemplo n.º 1
0
    def _prune_conformers(self, molecule: Molecule) -> None:

        no_conformers: int = molecule.n_conformers

        # This will be used to determined whether it should be pruned
        # from the RMSD calculations. If we find it should be pruned
        # just once, it is sufficient to avoid it later in the pairwise
        # processing.
        uniq: List = list([True] * no_conformers)

        # Needed to get the aligned best-fit RMSD
        rdmol = molecule.to_rdkit()

        rmsd = []
        # This begins the pairwise RMSD pruner
        if no_conformers > 1 and self.cutoff >= 0.0:

            # The reference conformer for RMSD calculation
            for j in range(no_conformers - 1):

                # A previous loop has determine this specific conformer
                # is too close to another, so we can entirely skip it
                if not uniq[j]:
                    continue

                # since k starts from j+1, we are only looking at the
                # upper triangle of the comparisons (j < k)
                for k in range(j + 1, no_conformers):

                    rmsd_i = AlignMol(rdmol, rdmol, k, j)
                    rmsd.append(rmsd_i)

                    # Flag this conformer for pruning, and also
                    # prevent it from being used as a reference in the
                    # future comparisons
                    if rmsd_i < self.cutoff:
                        uniq[k] = False

            confs = [
                molecule.conformers[j] for j, add_bool in enumerate(uniq)
                if add_bool
            ]

            molecule._conformers = confs.copy()
Ejemplo n.º 2
0
    def _remap_single_result(
        self,
        mapping: Dict[int, int],
        new_molecule: off.Molecule,
        result: SingleResult,
        extras: Optional[Dict[str, Any]] = None,
    ) -> SingleResult:
        """
        Given a single result and a mapping remap the result ordering to match the fitting schema order.

        Parameters:
            mapping: The mapping between the old and new molecule.
            new_molecule: The new molecule in the correct order.
            result: The single result which should be remapped.
            extras: Any extras that should be added to the result.
        """
        new_molecule._conformers = []
        # re map the geometry and attach
        new_conformer = np.zeros((new_molecule.n_atoms, 3))
        for i in range(new_molecule.n_atoms):
            new_conformer[mapping[i]] = result.molecule.geometry[i]
        geometry = unit.Quantity(new_conformer, unit.bohr)
        new_molecule.add_conformer(geometry)
        # drop the bond order indices and just remap the gradient and hessian
        new_gradient = np.zeros((new_molecule.n_atoms, 3))
        for i in range(new_molecule.n_atoms):
            new_gradient[i] = result.gradient[mapping[i]]

        # #remap the hessian
        # new_hessian = np.zeros((3 * new_molecule.n_atoms, 3 * new_molecule.n_atoms))
        # # we need to move 3 entries at a time to keep them together
        # for i in range(new_molecule.n_atoms):
        #     new_hessian[i * 3: (i * 3) + 3] = result.hessian[mapping[i * 3]: mapping[(i * 3) + 3]]
        return SingleResult(
            molecule=new_molecule.to_qcschema(),
            id=result.id,
            energy=result.energy,
            gradient=new_gradient,
            hessian=None,
            extras=extras,
        )
Ejemplo n.º 3
0
def compute_conformer_energies_from_file(filename):
    # Load in the molecule and its conformers.
    # Note that all conformers of the same molecule are loaded as separate Molecule objects
    # If using a OFF Toolkit version before 0.7.0, loading SDFs through RDKit and OpenEye may provide
    # different behavior in some cases. So, here we force loading through RDKit to ensure the correct behavior
    rdktkw = RDKitToolkitWrapper()
    loaded_molecules = Molecule.from_file(filename, toolkit_registry=rdktkw)
    # Collatate all conformers of the same molecule
    # NOTE: This isn't necessary if you have already loaded or created multi-conformer molecules;
    # it is just needed because our SDF reader does not automatically collapse conformers.
    molecules = [loaded_molecules[0]]
    for molecule in loaded_molecules[1:]:
        if molecule == molecules[-1]:
            for conformer in molecule.conformers:
                molecules[-1].add_conformer(conformer)
        else:
            molecules.append(molecule)

    n_molecules = len(molecules)
    n_conformers = sum([mol.n_conformers for mol in molecules])
    print(
        f'{n_molecules} unique molecule(s) loaded, with {n_conformers} total conformers'
    )

    # Load the openff-1.1.0 force field appropriate for vacuum calculations (without constraints)
    from openforcefield.typing.engines.smirnoff import ForceField
    forcefield = ForceField('openff_unconstrained-1.1.0.offxml')
    # Loop over molecules and minimize each conformer
    for molecule in molecules:
        # If the molecule doesn't have a name, set mol.name to be the hill formula
        if molecule.name == '':
            molecule.name = Topology._networkx_to_hill_formula(
                molecule.to_networkx())
            print('%s : %d conformers' %
                  (molecule.name, molecule.n_conformers))
            # Make a temporary copy of the molecule that we can update for each minimization
        mol_copy = Molecule(molecule)
        # Make an OpenFF Topology so we can parameterize the system
        off_top = molecule.to_topology()
        print(
            f"Parametrizing {molecule.name} (may take a moment to calculate charges)"
        )
        system = forcefield.create_openmm_system(off_top)
        # Use OpenMM to compute initial and minimized energy for all conformers
        integrator = openmm.VerletIntegrator(1 * unit.femtoseconds)
        platform = openmm.Platform.getPlatformByName('Reference')
        omm_top = off_top.to_openmm()
        simulation = openmm.app.Simulation(omm_top, system, integrator,
                                           platform)

        # Print text header
        print(
            'Conformer         Initial PE         Minimized PE       RMS between initial and minimized conformer'
        )
        output = [[
            'Conformer', 'Initial PE (kcal/mol)', 'Minimized PE (kcal/mol)',
            'RMS between initial and minimized conformer (Angstrom)'
        ]]
        for conformer_index, conformer in enumerate(molecule.conformers):
            simulation.context.setPositions(conformer)
            orig_potential = simulation.context.getState(
                getEnergy=True).getPotentialEnergy()
            simulation.minimizeEnergy()
            min_state = simulation.context.getState(getEnergy=True,
                                                    getPositions=True)
            min_potential = min_state.getPotentialEnergy()

            # Calculate the RMSD between the initial and minimized conformer
            min_coords = min_state.getPositions()
            min_coords = np.array([[atom.x, atom.y, atom.z]
                                   for atom in min_coords]) * unit.nanometer
            mol_copy._conformers = None
            mol_copy.add_conformer(conformer)
            mol_copy.add_conformer(min_coords)
            rdmol = mol_copy.to_rdkit()
            rmslist = []
            rdMolAlign.AlignMolConformers(rdmol, RMSlist=rmslist)
            minimization_rms = rmslist[0]

            # Save the minimized conformer to file
            mol_copy._conformers = None
            mol_copy.add_conformer(min_coords)
            mol_copy.to_file(
                f'{molecule.name}_conf{conformer_index+1}_minimized.sdf',
                file_format='sdf')
            print(
                '%5d / %5d : %8.3f kcal/mol %8.3f kcal/mol  %8.3f Angstroms' %
                (conformer_index + 1, molecule.n_conformers,
                 orig_potential / unit.kilocalories_per_mole,
                 min_potential / unit.kilocalories_per_mole, minimization_rms))
            output.append([
                str(conformer_index + 1),
                f'{orig_potential/unit.kilocalories_per_mole:.3f}',
                f'{min_potential/unit.kilocalories_per_mole:.3f}',
                f'{minimization_rms:.3f}'
            ])
            # Write the results out to CSV
        with open(f'{molecule.name}.csv', 'w') as of:
            for line in output:
                of.write(','.join(line) + '\n')
                # Clean up OpenMM Simulation
        del simulation, integrator