Beispiel #1
0
    def _get_rdkit_mcs_mapping(fragment: Molecule, parent: Molecule) -> Dict[int, int]:
        """
        Use rdkit MCS function to find the maximum mapping between the fragment and parent molecule.
        """

        from rdkit import Chem
        from rdkit.Chem import rdFMCS

        parent_rdkit = parent.to_rdkit()
        fragment_rdkit = fragment.to_rdkit()
        mcs = rdFMCS.FindMCS(
            [parent_rdkit, fragment_rdkit],
            atomCompare=rdFMCS.AtomCompare.CompareElements,
            bondCompare=rdFMCS.BondCompare.CompareAny,
            ringMatchesRingOnly=True,
            completeRingsOnly=True,
        )
        # make a new molecule from the mcs
        match_mol = Chem.MolFromSmarts(mcs.smartsString)
        # get the mcs parent/fragment mapping
        matches_parent = parent_rdkit.GetSubstructMatch(match_mol)
        matches_fragment = fragment_rdkit.GetSubstructMatch(match_mol)
        mapping = dict(zip(matches_fragment, matches_parent))
        return mapping
Beispiel #2
0
    def _prune_conformers(self, molecule: Molecule) -> None:

        no_conformers: int = molecule.n_conformers

        # This will be used to determined whether it should be pruned
        # from the RMSD calculations. If we find it should be pruned
        # just once, it is sufficient to avoid it later in the pairwise
        # processing.
        uniq: List = list([True] * no_conformers)

        # Needed to get the aligned best-fit RMSD
        rdmol = molecule.to_rdkit()

        rmsd = []
        # This begins the pairwise RMSD pruner
        if no_conformers > 1 and self.cutoff >= 0.0:

            # The reference conformer for RMSD calculation
            for j in range(no_conformers - 1):

                # A previous loop has determine this specific conformer
                # is too close to another, so we can entirely skip it
                if not uniq[j]:
                    continue

                # since k starts from j+1, we are only looking at the
                # upper triangle of the comparisons (j < k)
                for k in range(j + 1, no_conformers):

                    rmsd_i = AlignMol(rdmol, rdmol, k, j)
                    rmsd.append(rmsd_i)

                    # Flag this conformer for pruning, and also
                    # prevent it from being used as a reference in the
                    # future comparisons
                    if rmsd_i < self.cutoff:
                        uniq[k] = False

            confs = [
                molecule.conformers[j] for j, add_bool in enumerate(uniq)
                if add_bool
            ]

            molecule._conformers = confs.copy()
Beispiel #3
0
    def _get_new_single_graph_smirks(
        self,
        atoms: Tuple[int, ...],
        molecule: Molecule,
    ) -> str:
        """
        Generate a new smirks pattern for the selected atoms of the given molecule.

        Parameters
        ----------
        atoms: Tuple[int]
            The indices of the atoms that require a new smirks pattern.
        molecule: off.Molecule
            The molecule that that patten should be made for.

        Returns
        -------
        str
            A single smirks string encapsulating the atoms requested in the given molecule.
        """
        graph = SingleGraph(mol=molecule.to_rdkit(),
                            smirks_atoms=atoms,
                            layers=self.smirks_layers)
        return graph.as_smirks(compress=False)
Beispiel #4
0
def compute_conformer_energies_from_file(filename):
    # Load in the molecule and its conformers.
    # Note that all conformers of the same molecule are loaded as separate Molecule objects
    # If using a OFF Toolkit version before 0.7.0, loading SDFs through RDKit and OpenEye may provide
    # different behavior in some cases. So, here we force loading through RDKit to ensure the correct behavior
    rdktkw = RDKitToolkitWrapper()
    loaded_molecules = Molecule.from_file(filename, toolkit_registry=rdktkw)
    # Collatate all conformers of the same molecule
    # NOTE: This isn't necessary if you have already loaded or created multi-conformer molecules;
    # it is just needed because our SDF reader does not automatically collapse conformers.
    molecules = [loaded_molecules[0]]
    for molecule in loaded_molecules[1:]:
        if molecule == molecules[-1]:
            for conformer in molecule.conformers:
                molecules[-1].add_conformer(conformer)
        else:
            molecules.append(molecule)

    n_molecules = len(molecules)
    n_conformers = sum([mol.n_conformers for mol in molecules])
    print(
        f'{n_molecules} unique molecule(s) loaded, with {n_conformers} total conformers'
    )

    # Load the openff-1.1.0 force field appropriate for vacuum calculations (without constraints)
    from openforcefield.typing.engines.smirnoff import ForceField
    forcefield = ForceField('openff_unconstrained-1.1.0.offxml')
    # Loop over molecules and minimize each conformer
    for molecule in molecules:
        # If the molecule doesn't have a name, set mol.name to be the hill formula
        if molecule.name == '':
            molecule.name = Topology._networkx_to_hill_formula(
                molecule.to_networkx())
            print('%s : %d conformers' %
                  (molecule.name, molecule.n_conformers))
            # Make a temporary copy of the molecule that we can update for each minimization
        mol_copy = Molecule(molecule)
        # Make an OpenFF Topology so we can parameterize the system
        off_top = molecule.to_topology()
        print(
            f"Parametrizing {molecule.name} (may take a moment to calculate charges)"
        )
        system = forcefield.create_openmm_system(off_top)
        # Use OpenMM to compute initial and minimized energy for all conformers
        integrator = openmm.VerletIntegrator(1 * unit.femtoseconds)
        platform = openmm.Platform.getPlatformByName('Reference')
        omm_top = off_top.to_openmm()
        simulation = openmm.app.Simulation(omm_top, system, integrator,
                                           platform)

        # Print text header
        print(
            'Conformer         Initial PE         Minimized PE       RMS between initial and minimized conformer'
        )
        output = [[
            'Conformer', 'Initial PE (kcal/mol)', 'Minimized PE (kcal/mol)',
            'RMS between initial and minimized conformer (Angstrom)'
        ]]
        for conformer_index, conformer in enumerate(molecule.conformers):
            simulation.context.setPositions(conformer)
            orig_potential = simulation.context.getState(
                getEnergy=True).getPotentialEnergy()
            simulation.minimizeEnergy()
            min_state = simulation.context.getState(getEnergy=True,
                                                    getPositions=True)
            min_potential = min_state.getPotentialEnergy()

            # Calculate the RMSD between the initial and minimized conformer
            min_coords = min_state.getPositions()
            min_coords = np.array([[atom.x, atom.y, atom.z]
                                   for atom in min_coords]) * unit.nanometer
            mol_copy._conformers = None
            mol_copy.add_conformer(conformer)
            mol_copy.add_conformer(min_coords)
            rdmol = mol_copy.to_rdkit()
            rmslist = []
            rdMolAlign.AlignMolConformers(rdmol, RMSlist=rmslist)
            minimization_rms = rmslist[0]

            # Save the minimized conformer to file
            mol_copy._conformers = None
            mol_copy.add_conformer(min_coords)
            mol_copy.to_file(
                f'{molecule.name}_conf{conformer_index+1}_minimized.sdf',
                file_format='sdf')
            print(
                '%5d / %5d : %8.3f kcal/mol %8.3f kcal/mol  %8.3f Angstroms' %
                (conformer_index + 1, molecule.n_conformers,
                 orig_potential / unit.kilocalories_per_mole,
                 min_potential / unit.kilocalories_per_mole, minimization_rms))
            output.append([
                str(conformer_index + 1),
                f'{orig_potential/unit.kilocalories_per_mole:.3f}',
                f'{min_potential/unit.kilocalories_per_mole:.3f}',
                f'{minimization_rms:.3f}'
            ])
            # Write the results out to CSV
        with open(f'{molecule.name}.csv', 'w') as of:
            for line in output:
                of.write(','.join(line) + '\n')
                # Clean up OpenMM Simulation
        del simulation, integrator