예제 #1
0
    def make_probe(self, smiles: str) -> Chem.Mol:
        """
        This coverts the Smiles string to a RDKit molecule,
        for which conformers are generated.
        Oddly, using the argument ``params=ETKDG()`` for
        ``AllChem.EmbedMultipleConfs`` gave results that were problematic,
        in that it was very strict in terms of free energy and did not 
        consider minor torsions for O-phenylacetate. Whereas openbabel was fine with it.

        .. code-block:: bash
             obabel -:"CC(=O)Oc1ccccc1" --gen3d -p -osdf -O temp.sdf
             obabel temp.sdf -osdf -O conf.sdf --confab --conf 100 --original --rcutoff 0.05

        """
        probe = Chem.MolFromSmiles(smiles)
        assert probe is not None, f'{smiles} is dodgy.'
        probe = Chem.AddHs(probe)
        #         AllChem.EmbedMolecule(probe)
        #         AllChem.UFFOptimizeMolecule(probe, maxIters=2000)

        #         #Chem.rdPartialCharges.ComputeGasteigerCharges(probe)
        idx = AllChem.EmbedMultipleConfs(probe,
                                         numConfs=100,
                                         forceTol=0.2,
                                         pruneRmsThresh=0.1)
        AllChem.UFFOptimizeMoleculeConfs(probe)
        rdMolAlign.AlignMolConformers(probe)
        assert len(idx) > 0, 'No conformers generated!'
        return probe
예제 #2
0
    def test4AlignConfs(self):
        mol = Chem.MolFromSmiles('C1CC1CNc(n2)nc(C)cc2Nc(cc34)ccc3[nH]nc4')

        cids = rdDistGeom.EmbedMultipleConfs(mol, 10, 30, 100)
        #writer = Chem.SDWriter('mol_899.sdf')

        for cid in cids:
            ff = ChemicalForceFields.UFFGetMoleculeForceField(mol, confId=cid)
            ff.Initialize()
            more = 1
            while more:
                more = ff.Minimize()
            # FIX: this should not be necessary but somehow more comes out to be 0
            # even with the structure still being crappy
            ff.Minimize()
        aids = [12, 13, 14, 15, 16, 17, 18]
        rdMolAlign.AlignMolConformers(mol, aids)

        # now test that the atom location of these atom are consistent
        confs = mol.GetConformers()
        for aid in aids:
            mpos = 0
            for i, conf in enumerate(confs):
                if (i == 0):
                    mpos = list(conf.GetAtomPosition(aid))
                    continue
                else:
                    pos = list(conf.GetAtomPosition(aid))

                    self.failUnless(lstFeq(mpos, pos, .5))
예제 #3
0
def confgen(input, output, prunermsthresh, numconf, add_ref):
    mol = Chem.AddHs(Chem.MolFromMolFile(input), addCoords=True)
    refmol = Chem.AddHs(Chem.Mol(mol))
    param = rdDistGeom.ETKDGv2()
    param.pruneRmsThresh = prunermsthresh
    cids = rdDistGeom.EmbedMultipleConfs(mol, numconf, param)
    mp = AllChem.MMFFGetMoleculeProperties(mol, mmffVariant='MMFF94s')
    AllChem.MMFFOptimizeMoleculeConfs(mol, numThreads=0, mmffVariant='MMFF94s')
    w = Chem.SDWriter(output)
    if add_ref:
        refmol.SetProp('CID', '-1')
        refmol.SetProp('Energy', '')
        w.write(refmol)
    res = []

    for cid in cids:
        ff = AllChem.MMFFGetMoleculeForceField(mol, mp, confId=cid)
        e = ff.CalcEnergy()
        res.append((cid, e))
    sorted_res = sorted(res, key=lambda x: x[1])
    rdMolAlign.AlignMolConformers(mol)
    for cid, e in sorted_res:
        mol.SetProp('CID', str(cid))
        mol.SetProp('Energy', str(e))
        w.write(mol, confId=cid)
    w.close()
예제 #4
0
    def test4AlignConfs(self):
        mol = Chem.MolFromSmiles('C1CC1CNc(n2)nc(C)cc2Nc(cc34)ccc3[nH]nc4')

        cids = rdDistGeom.EmbedMultipleConfs(mol, 10, 30, 100)
        #writer = Chem.SDWriter('mol_899.sdf')

        for cid in cids:
            ff = ChemicalForceFields.UFFGetMoleculeForceField(mol, confId=cid)
            ff.Initialize()
            more = 1
            while more:
                more = ff.Minimize()
            # FIX: this should not be necessary but somehow more comes out to be 0
            # even with the structure still being crappy
            ff.Minimize()
        aids = [12, 13, 14, 15, 16, 17, 18]
        rdMolAlign.AlignMolConformers(mol, aids)

        # now test that the atom location of these atom are consistent
        confs = mol.GetConformers()
        for aid in aids:
            mpos = 0
            for i, conf in enumerate(confs):
                if (i == 0):
                    mpos = list(conf.GetAtomPosition(aid))
                    continue
                else:
                    pos = list(conf.GetAtomPosition(aid))

                    self.assertTrue(lstFeq(mpos, pos, .5))

        # now test that we can get a list of RMS values
        rmsvals = []
        rdMolAlign.AlignMolConformers(mol, aids, RMSlist=rmsvals)
        self.assertTrue((len(rmsvals) == mol.GetNumConformers() - 1))

        # make sure something sensible happens if we provide a stupid
        # argument:
        rmsvals = 4
        self.assertRaises(AttributeError,
                          rdMolAlign.AlignMolConformers,
                          mol,
                          atomIds=aids,
                          RMSlist=rmsvals)
예제 #5
0
 def _confsToAlignedMolsList(multiConfMol):
     """Input is a multiconformer RDKit mol.  Output is an aligned set of conformers as a list of RDKit mols."""
     rdMolAlign.AlignMolConformers(multiConfMol)
     ms = []
     cids = [x.GetId() for x in multiConfMol.GetConformers()]
     for cid in cids:
         newmol = Chem.MolToMolBlock(multiConfMol, confId=cid)
         newmol = Chem.MolFromMolBlock(newmol, removeHs=False)
         ms.append(newmol)
     return ms      
예제 #6
0
 def _confsToAlignedMolsList(multiConfMol):
     """Input is a multiconformer RDKit mol.  Output is an aligned set of conformers as a list of RDKit mols."""
     rdMolAlign.AlignMolConformers(multiConfMol)
     ms = []
     cids = [x.GetId() for x in multiConfMol.GetConformers()]
     for cid in cids:
         newmol = Chem.Mol(multiConfMol)
         for ocid in cids:
             if ocid == cid:
                 continue
             newmol.RemoveConformer(ocid)
         ms.append(newmol)
     return ms
예제 #7
0
def align_offmol_conformers(offmol):
    from rdkit.Chem import rdMolAlign
    rdmol = offmol.to_rdkit()
    rmslist = []
    rdMolAlign.AlignMolConformers(rdmol, RMSlist=rmslist)
    offmol2 = Molecule.from_rdkit(rdmol)
    # The RDKit roundtrip above may have messed with the properties dict,
    # so transfer all the aligned confs to a copy of the original mol.
    return_mol = copy.deepcopy(offmol)
    return_mol._conformers = []
    for aligned_conf in offmol2.conformers:
        return_mol.add_conformer(aligned_conf)
    return return_mol, rmslist
예제 #8
0
def _get_rms_two_conformers(mol: Molecule, positions1: unit.Quantity,
                            positions2: unit.Quantity) -> float:
    """Find the RMSD between two conformers of a molecule using RDKit"""
    # TODO: Is it worth making Molecule.get_rmsd(), which operates
    # through ToolkitWrapper methods?
    from rdkit.Chem import rdMolAlign

    mol_copy = Molecule(mol)
    mol_copy._conformers = None
    mol_copy.add_conformer(positions1)
    mol_copy.add_conformer(positions2)

    rdmol = mol_copy.to_rdkit()
    rmslist: List = []
    rdMolAlign.AlignMolConformers(rdmol, RMSlist=rmslist)

    return rmslist[0]
예제 #9
0
def conformers(
    mol: Chem.rdchem.Mol,
    conf_id: int = -1,
    n_confs: Union[int, List[int]] = None,
    align_conf: bool = True,
    n_cols: int = 3,
    sync_views: bool = True,
    remove_hs: bool = True,
    width: str = "auto",
):
    """Visualize the conformer(s) of a molecule.

    Args:
        mol: a molecule.
        conf_id: The ID of the conformer to show. -1 shows
            the first conformer. Only works if `n_confs` is None.
        n_confs: Can be a number of conformers
            to shows or a list of conformer indices. When None, only the first
            conformer is displayed. When -1, show all conformers.
        align_conf: Whether to align conformers together.
        n_cols: Number of columns. Defaults to 3.
        sync_views: Wether to sync the multiple views.
        remove_hs: Wether to remove the hydrogens of the conformers.
        width: The width of the returned view. Defaults to "auto".
    """

    widgets = _get_ipywidgets()
    nv = _get_nglview()

    if mol.GetNumConformers() == 0:
        raise ValueError(
            "The molecule has 0 conformers. You can generate conformers with `dm.conformers.generate(mol)`."
        )

    # Clone the molecule
    mol = copy.deepcopy(mol)

    if remove_hs:
        mol = Chem.RemoveHs(mol)  # type: ignore
    else:
        mol = Chem.AddHs(mol)  # type: ignore

    if n_confs is None:
        return nv.show_rdkit(mol, conf_id=conf_id)

    # If n_confs is int, convert to list of conformer IDs
    if n_confs == -1:
        n_confs = [conf.GetId() for conf in mol.GetConformers()]
    elif isinstance(n_confs, int):
        if n_confs > mol.GetNumConformers():
            n_confs = mol.GetNumConformers()
        n_confs = list(range(n_confs))  # type: ignore

    if align_conf:
        rdMolAlign.AlignMolConformers(mol, confIds=n_confs)

    # Get number of rows
    n_rows = len(n_confs) // n_cols
    n_rows += 1 if (len(n_confs) % n_cols) > 0 else 0

    # Create a grid
    grid = widgets.GridspecLayout(n_rows, n_cols)  # type: ignore

    # Create and add views to the grid.
    widget_coords = itertools.product(range(n_rows), range(n_cols))
    views = []
    for i, (conf_id, (x, y)) in enumerate(zip(n_confs, widget_coords)):
        view = nv.show_rdkit(mol, conf_id=conf_id)
        view.layout.width = width
        view.layout.align_self = "stretch"
        grid[x, y] = view
        views.append(view)

    # Sync views
    if sync_views:
        for view in views:
            view._set_sync_camera(views)

    return grid
예제 #10
0
def generate(
    mol: Chem.rdchem.Mol,
    n_confs: int = None,
    rms_cutoff: Optional[float] = None,
    clear_existing: bool = True,
    align_conformers: bool = True,
    minimize_energy: bool = False,
    method: str = None,
    energy_iterations: int = 500,
    warning_not_converged: int = 10,
    random_seed: int = 19,
    add_hs: bool = True,
    verbose: bool = False,
) -> Chem.rdchem.Mol:
    """Compute conformers of a molecule.

    Example:

    ```python
    import datamol as dm
    smiles = "O=C(C)Oc1ccccc1C(=O)O"
    mol = dm.to_mol(smiles)
    mol = dm.conformers.generate(mol)

    # Get all conformers as a list
    conformers = mol.GetConformers()

    # Get the 3D atom positions of the first conformer
    positions = mol.GetConformer(0).GetPositions()

    # If minimization has been enabled (default to True)
    # you can access the computed energy.
    conf = mol.GetConformer(0)
    props = conf.GetPropsAsDict()
    print(props)
    # {'rdkit_uff_energy': 1.7649408317784008}
    ```

    Args:
        mol: a molecule
        n_confs: Number of conformers to generate. Depends on the
            number of rotatable bonds by default.
        rms_cutoff: The minimum RMS value in Angstrom at which two conformers
            are considered redundant and one is deleted. If None, all conformers
            are kept. This step is done after an eventual minimization step.
        clear_existing: Whether to overwrite existing conformers for the molecule.
        align_conformers: Wehther to align conformer.
        minimize_energy: Wether to minimize conformer's energies using UFF.
            Disable to generate conformers much faster.
        method: RDKit method to use for embedding. Choose among
            ["ETDG", "ETKDG", "ETKDGv2", "ETKDGv3"]. If None, "ETKDGv3" is used.
        energy_iterations: Maximum number of iterations during the energy minimization procedure.
            It corresponds to the `maxIters` argument in RDKit.
        warning_not_converged: Wether to log a warning when the number of not converged conformers
            during the minimization is higher than `warning_not_converged`. Only works when `verbose` is set to True. Disable with 0. Defaults to 10.
        random_seed: Set to None or -1 to disable.
        add_hs: Whether to add hydrogens to the mol before embedding. If set to True, the hydrogens
            are removed in the returned molecule. Warning: explicit hydrogens won't be conserved. It is strongly
            recommended to let the default value to True. The RDKit documentation says: "To get good 3D conformations,
            it’s almost always a good idea to add hydrogens to the molecule first."
        verbose: Wether to enable logs during the process.

    Returns:
        mol: the molecule with the conformers.
    """

    AVAILABLE_METHODS = ["ETDG", "ETKDG", "ETKDGv2", "ETKDGv3"]

    if method is None:
        method = "ETKDGv3"

    if method not in AVAILABLE_METHODS:
        raise ValueError(
            f"The method {method} is not supported. Use from {AVAILABLE_METHODS}"
        )

    # Random seed
    if random_seed is None:
        random_seed = -1

    # Clone molecule
    mol = copy.deepcopy(mol)

    # Remove existing conformers
    if clear_existing:
        mol.RemoveAllConformers()

    # Add hydrogens
    if add_hs:
        mol = Chem.AddHs(mol)

    if not n_confs:
        # Set the number of conformers depends on
        # the number of rotatable bonds.
        rotatable_bonds = Descriptors.NumRotatableBonds(mol)
        if rotatable_bonds < 8:
            n_confs = 50
        elif rotatable_bonds < 12:
            n_confs = 200
        else:
            n_confs = 300

    # Embed conformers
    params = getattr(AllChem, method)()
    params.randomSeed = random_seed
    params.enforceChirality = True
    confs = AllChem.EmbedMultipleConfs(mol, numConfs=n_confs, params=params)

    # Sometime embedding fails. Here we try again by disabling `enforceChirality`.
    if len(confs) == 0:
        if verbose:
            logger.warning(
                f"Conformers embedding failed for {dm.to_smiles(mol)}. Trying without enforcing chirality."
            )
        params = getattr(AllChem, method)()
        params.randomSeed = random_seed
        params.enforceChirality = False
        confs = AllChem.EmbedMultipleConfs(mol,
                                           numConfs=n_confs,
                                           params=params)

    if len(confs) == 0:
        raise ValueError(
            f"Conformers embedding failed for {dm.to_smiles(mol)}")

    # Minimize energy
    if minimize_energy:

        # Minimize conformer's energy using UFF
        results = AllChem.UFFOptimizeMoleculeConfs(mol,
                                                   maxIters=energy_iterations)
        energies = [energy for _, energy in results]

        # Some conformers might not have converged during minimization.
        not_converged = sum(
            [not_converged for not_converged, _ in results if not_converged])
        if warning_not_converged != 0 and not_converged > warning_not_converged and verbose:
            logger.warning(
                f"{not_converged}/{len(results)} conformers have not converged for {dm.to_smiles(mol)}"
            )

        # Add the energy as a property to each conformers
        [
            conf.SetDoubleProp("rdkit_uff_energy", energy)
            for energy, conf in zip(energies, mol.GetConformers())
        ]

        # Now we reorder conformers according to their energies,
        # so the lowest energies conformers are first.
        mol_clone = copy.deepcopy(mol)
        ordered_conformers = [
            conf
            for _, conf in sorted(zip(energies, mol_clone.GetConformers()))
        ]
        mol.RemoveAllConformers()
        [mol.AddConformer(conf, assignId=True) for conf in ordered_conformers]

    # Align conformers to each others
    if align_conformers:
        rdMolAlign.AlignMolConformers(mol)

    if rms_cutoff is not None:
        mol = cluster(
            mol,
            rms_cutoff=rms_cutoff,
            already_aligned=align_conformers,
            centroids=True,
        )  # type: ignore

    if add_hs:
        mol = Chem.RemoveHs(mol)

    return mol
예제 #11
0
def align_conformers(molecule: Mol, heavy_only=True) -> None:
    atom_ids = []
    if heavy_only:
        atom_ids = [atom.GetIdx() for atom in molecule.GetAtoms() if atom.GetAtomicNum() > 1]
    rdMolAlign.AlignMolConformers(molecule, atomIds=atom_ids)
예제 #12
0
def changeAndOpt(rdkit, theta):

    Chem.SanitizeMol(rdkit)
    initconf = rdkit.GetConformer()

    # set outer most dihedral to 180 degrees.
    smarts_patt = "C-S-C-[C,Si,Ge;H0]"
    outer_dihedral_idx = find_dihedral_idx(rdkit, smarts_patt)

    for k, i, j, l in outer_dihedral_idx:
        rdMolTransforms.SetDihedralDeg(initconf, k, i, j, l, 180.0)

    # change second outmost dihedral with +-120 degrees.
    patt = "S-C-[C,Si,Ge;H0]-[C,Si,Ge]"
    dihedral_idx = find_dihedral_idx(rdkit, patt)

    new_angles = list()
    for k, i, j, l in dihedral_idx:
        init_dihedral_angle = rdMolTransforms.GetDihedralDeg(
            initconf, k, i, j, l)
        new_angles.append([
            init_dihedral_angle + x * theta for x in range(int(360. / theta))
        ])

    angle_combinations = list(
        itertools.product(*new_angles))  # all combinations.

    for dihedrals in angle_combinations:

        for (k, i, j, l), angle in zip(dihedral_idx, dihedrals):
            rdMolTransforms.SetDihedralDeg(initconf, k, i, j, l, angle)

        rdkit.AddConformer(initconf, assignId=True)

    rdMolAlign.AlignMolConformers(rdkit)

    mol_list = list()
    for idx, conf in enumerate(rdkit.GetConformers()):

        if idx == 0:
            continue

        sdf_txt = Chem.SDWriter.GetText(rdkit, conf.GetId())
        m = Chem.MolFromMolBlock(sdf_txt, removeHs=False)

        conf_name = m.GetProp("_Name") + "-" + str(idx - 1)
        m.SetProp("_Name", conf_name)

        mol_list.append(m)

    # Optimize structures with new dihedrals.
    confqmmol = QMMol(mol_list,
                      fmt="mol_list",
                      charge=0,
                      multi=1,
                      charged_fragments=True)
    confqmmol.optimize(program="xtb", method="opt", cpus=24, babelAC=True)

    # Write xyz files of conformers
    for newConf in confqmmol.GetConformers():

        obConversion = openbabel.OBConversion()
        obConversion.SetInAndOutFormats("sdf", "xyz")

        newConfm = openbabel.OBMol()
        obConversion.ReadString(newConfm, Chem.MolToMolBlock(newConf))

        new_xyz = obConversion.WriteString(newConfm)

        with open(newConf.GetProp("_Name") + ".xyz", 'w') as f:
            f.write(new_xyz)
예제 #13
0
def compute_conformer_energies_from_file(filename):
    # Load in the molecule and its conformers.
    # Note that all conformers of the same molecule are loaded as separate Molecule objects
    # If using a OFF Toolkit version before 0.7.0, loading SDFs through RDKit and OpenEye may provide
    # different behavior in some cases. So, here we force loading through RDKit to ensure the correct behavior
    rdktkw = RDKitToolkitWrapper()
    loaded_molecules = Molecule.from_file(filename, toolkit_registry=rdktkw)
    # The logic below only works for lists of molecules, so if a
    # single molecule was loaded, cast it to list
    if type(loaded_molecules) is not list:
        loaded_molecules = [loaded_molecules]
    # Collatate all conformers of the same molecule
    # NOTE: This isn't necessary if you have already loaded or created multi-conformer molecules;
    # it is just needed because our SDF reader does not automatically collapse conformers.
    molecules = [loaded_molecules[0]]
    for molecule in loaded_molecules[1:]:
        if molecule == molecules[-1]:
            for conformer in molecule.conformers:
                molecules[-1].add_conformer(conformer)
        else:
            molecules.append(molecule)

    n_molecules = len(molecules)
    n_conformers = sum([mol.n_conformers for mol in molecules])
    print(
        f'{n_molecules} unique molecule(s) loaded, with {n_conformers} total conformers'
    )

    # Load the openff-1.1.0 force field appropriate for vacuum calculations (without constraints)
    from openff.toolkit.typing.engines.smirnoff import ForceField
    forcefield = ForceField('openff_unconstrained-1.1.0.offxml')
    # Loop over molecules and minimize each conformer
    for molecule in molecules:
        # If the molecule doesn't have a name, set mol.name to be the hill formula
        if molecule.name == '':
            molecule.name = Topology._networkx_to_hill_formula(
                molecule.to_networkx())
            print('%s : %d conformers' %
                  (molecule.name, molecule.n_conformers))
            # Make a temporary copy of the molecule that we can update for each minimization
        mol_copy = Molecule(molecule)
        # Make an OpenFF Topology so we can parameterize the system
        off_top = molecule.to_topology()
        print(
            f"Parametrizing {molecule.name} (may take a moment to calculate charges)"
        )
        system = forcefield.create_openmm_system(off_top)
        # Use OpenMM to compute initial and minimized energy for all conformers
        integrator = openmm.VerletIntegrator(1 * unit.femtoseconds)
        platform = openmm.Platform.getPlatformByName('Reference')
        omm_top = off_top.to_openmm()
        simulation = openmm.app.Simulation(omm_top, system, integrator,
                                           platform)

        # Print text header
        print(
            'Conformer         Initial PE         Minimized PE       RMS between initial and minimized conformer'
        )
        output = [[
            'Conformer', 'Initial PE (kcal/mol)', 'Minimized PE (kcal/mol)',
            'RMS between initial and minimized conformer (Angstrom)'
        ]]
        for conformer_index, conformer in enumerate(molecule.conformers):
            simulation.context.setPositions(conformer)
            orig_potential = simulation.context.getState(
                getEnergy=True).getPotentialEnergy()
            simulation.minimizeEnergy()
            min_state = simulation.context.getState(getEnergy=True,
                                                    getPositions=True)
            min_potential = min_state.getPotentialEnergy()

            # Calculate the RMSD between the initial and minimized conformer
            min_coords = min_state.getPositions()
            min_coords = np.array([[atom.x, atom.y, atom.z]
                                   for atom in min_coords]) * unit.nanometer
            mol_copy._conformers = None
            mol_copy.add_conformer(conformer)
            mol_copy.add_conformer(min_coords)
            rdmol = mol_copy.to_rdkit()
            rmslist = []
            rdMolAlign.AlignMolConformers(rdmol, RMSlist=rmslist)
            minimization_rms = rmslist[0]

            # Save the minimized conformer to file
            mol_copy._conformers = None
            mol_copy.add_conformer(min_coords)
            mol_copy.to_file(
                f'{molecule.name}_conf{conformer_index+1}_minimized.sdf',
                file_format='sdf')
            print(
                '%5d / %5d : %8.3f kcal/mol %8.3f kcal/mol  %8.3f Angstroms' %
                (conformer_index + 1, molecule.n_conformers,
                 orig_potential / unit.kilocalories_per_mole,
                 min_potential / unit.kilocalories_per_mole, minimization_rms))
            output.append([
                str(conformer_index + 1),
                f'{orig_potential/unit.kilocalories_per_mole:.3f}',
                f'{min_potential/unit.kilocalories_per_mole:.3f}',
                f'{minimization_rms:.3f}'
            ])
            # Write the results out to CSV
        with open(f'{molecule.name}.csv', 'w') as of:
            for line in output:
                of.write(','.join(line) + '\n')
                # Clean up OpenMM Simulation
        del simulation, integrator