Example #1
0
def cluster(
    mol: Chem.rdchem.Mol,
    rms_cutoff: float = 1,
    already_aligned: bool = False,
    centroids: bool = True,
):
    """Cluster the conformers of a molecule according to an RMS threshold in Angstrom.

    Args:
        mol: a molecule
        rms_cutoff: The RMS cutoff in Angstrom.
        already_aligned: Whether or not the conformers are aligned. If False,
            they will be aligmned furing the RMS computation.
        centroids: If True, return one molecule with centroid conformers
            only. If False return a list of molecules per cluster with all
            the conformers of the cluster. Defaults to True.
    """

    # Clone molecule
    mol = copy.deepcopy(mol)

    # Compute RMS
    dmat = AllChem.GetConformerRMSMatrix(mol, prealigned=already_aligned)

    # Cluster
    conf_clusters = Butina.ClusterData(
        dmat,
        nPts=mol.GetNumConformers(),
        distThresh=rms_cutoff,
        isDistData=True,
        reordering=False,
    )

    return return_centroids(mol, conf_clusters, centroids=centroids)
def duplicate_conformers(m: Chem.rdchem.Mol,
                         new_conf_idx: int,
                         rms_limit: float = 0.5) -> bool:
    rmslist = []
    for i in range(m.GetNumConformers()):
        if i == new_conf_idx:
            continue
        rms = AllChem.GetConformerRMS(m, new_conf_idx, i, prealigned=True)
        rmslist.append(rms)
    return any(i < rms_limit for i in rmslist)
Example #3
0
def get_coords(mol: Chem.rdchem.Mol, conf_id: int = -1):
    """Get the coordinate of a conformer of a molecule.

    Args:
        mol: a molecule.
        conf_id: a conformer id.
    """

    if mol.GetNumConformers() == 0:
        raise ValueError("Molecule does not have any conformers.")

    conf = mol.GetConformer(id=conf_id)
    return conf.GetPositions()
Example #4
0
def rmsd(mol: Chem.rdchem.Mol) -> np.ndarray:
    """Compute the RMSD between all the conformers of a molecule.

    Args:
        mol: a molecule
    """

    if mol.GetNumConformers() <= 1:
        raise ValueError(
            "The molecule has 0 or 1 conformer. You can generate conformers with `dm.conformers.generate(mol)`."
        )

    n_confs = mol.GetNumConformers()
    rmsds = []
    for i in range(n_confs):
        for j in range(n_confs):
            rmsd = rdMolAlign.AlignMol(prbMol=mol,
                                       refMol=mol,
                                       prbCid=i,
                                       refCid=j)
            rmsds.append(rmsd)
    return np.array(rmsds).reshape(n_confs, n_confs)
Example #5
0
    def createRDKITconf(self, mol: Chem.rdchem.Mol, conversionFactor: float = 0.1):
        """creates a PyGromosTools CNF type from a rdkit molecule. If a conformation exists the first one will be used.

        Parameters
        ----------
        mol : Chem.rdchem.Mol
            Molecule, possibly with a conformation

        conversionFactor  :  float
            the factor used to convert length from rdkit to Gromos
            (default: angstrom -> nano meter = 0.1)
        """
        inchi = Chem.MolToInchi(mol).split("/")
        if len(inchi) >= 2:
            name = inchi[1]
        else:
            name = "XXX"
        self.__setattr__("TITLE", TITLE("\t" + name + " created from RDKit"))

        # check if conformations exist else create a new one
        if mol.GetNumConformers() < 1:
            mol = Chem.AddHs(mol)
            AllChem.EmbedMolecule(mol)
            AllChem.UFFOptimizeMolecule(mol)
        conf = mol.GetConformer(0)

        # fill a list with atomP types from RDKit data
        atomList = []
        for i in range(mol.GetNumAtoms()):
            x = conversionFactor * conf.GetAtomPosition(i).x
            y = conversionFactor * conf.GetAtomPosition(i).y
            z = conversionFactor * conf.GetAtomPosition(i).z
            atomType = mol.GetAtomWithIdx(i).GetSymbol()
            atomList.append(blocks.atomP(resID=1, resName=name, atomType=atomType, atomID=i + 1, xp=x, yp=y, zp=z))

        # set POSITION attribute
        self.__setattr__("POSITION", blocks.POSITION(atomList))
        # Defaults set for GENBOX - for liquid sim adjust manually
        self.__setattr__("GENBOX", blocks.GENBOX(pbc=1, length=[4, 4, 4], angles=[90, 90, 90]))
Example #6
0
def sasa(
    mol: Chem.rdchem.Mol,
    conf_id: Union[int, List[int]] = None,
    n_jobs: int = 1,
) -> np.ndarray:
    """Compute Solvent Accessible Surface Area of all the conformers
    using FreeSASA (https://freesasa.github.io/). Values are returned
    as an array and also stored within each conformer as a property
    called `rdkit_free_sasa`.

    Example:

    ```python
    smiles = "O=C(C)Oc1ccccc1C(=O)O"
    mol = dm.to_mol(smiles)
    mol = dm.conformers.generate(mol)

    # Compute SASA for all the conformers without parallelization
    sasa_values = dm.conformers.sasa(mol, conf_id=None, n_jobs=1)

    # If minimization has been enabled (default to True)
    # you can access the computed energy.
    conf = mol.GetConformer(0)
    props = conf.GetPropsAsDict()
    print(props)
    # {'rdkit_uff_energy': 1.7649408317784008}
    ```

    Args:
        mol: a molecule
        conf_id: Id of the conformers to compute. If None, compute all.
        n_jobs: Number of jobs for parallelization. Set to 1 to disable
            and -1 to use all cores.

    Returns:
        mol: the molecule with the conformers.
    """
    from rdkit.Chem import rdFreeSASA

    if mol.GetNumConformers() == 0:
        raise ValueError(
            "The molecule has 0 conformers. You can generate conformers with `dm.conformers.generate(mol)`."
        )

    # Get Van der Waals radii (angstrom)
    radii = [
        dm.PERIODIC_TABLE.GetRvdw(atom.GetAtomicNum())
        for atom in mol.GetAtoms()
    ]

    # Which conformers to compute
    conf_ids = []
    if conf_id is None:
        # If None compute for all the conformers
        conf_ids = list(range(mol.GetNumConformers()))  # type: ignore
    elif isinstance(conf_id, int):
        conf_ids = [conf_id]
    else:
        conf_ids = conf_id

    # Compute solvent accessible surface area
    def _get_sasa(i):
        conf = mol.GetConformer(i)
        sasa = rdFreeSASA.CalcSASA(mol, radii, confIdx=conf.GetId())
        conf.SetDoubleProp("rdkit_free_sasa", sasa)
        return sasa

    runner = dm.JobRunner(n_jobs=n_jobs)
    sasa_values = runner(_get_sasa, conf_ids)
    return np.array(sasa_values)
Example #7
0
def conformers(
    mol: Chem.rdchem.Mol,
    conf_id: int = -1,
    n_confs: Union[int, List[int]] = None,
    align_conf: bool = True,
    n_cols: int = 3,
    sync_views: bool = True,
    remove_hs: bool = True,
    width: str = "auto",
):
    """Visualize the conformer(s) of a molecule.

    Args:
        mol: a molecule.
        conf_id: The ID of the conformer to show. -1 shows
            the first conformer. Only works if `n_confs` is None.
        n_confs: Can be a number of conformers
            to shows or a list of conformer indices. When None, only the first
            conformer is displayed. When -1, show all conformers.
        align_conf: Whether to align conformers together.
        n_cols: Number of columns. Defaults to 3.
        sync_views: Wether to sync the multiple views.
        remove_hs: Wether to remove the hydrogens of the conformers.
        width: The width of the returned view. Defaults to "auto".
    """

    widgets = _get_ipywidgets()
    nv = _get_nglview()

    if mol.GetNumConformers() == 0:
        raise ValueError(
            "The molecule has 0 conformers. You can generate conformers with `dm.conformers.generate(mol)`."
        )

    # Clone the molecule
    mol = copy.deepcopy(mol)

    if remove_hs:
        mol = Chem.RemoveHs(mol)  # type: ignore
    else:
        mol = Chem.AddHs(mol)  # type: ignore

    if n_confs is None:
        return nv.show_rdkit(mol, conf_id=conf_id)

    # If n_confs is int, convert to list of conformer IDs
    if n_confs == -1:
        n_confs = [conf.GetId() for conf in mol.GetConformers()]
    elif isinstance(n_confs, int):
        if n_confs > mol.GetNumConformers():
            n_confs = mol.GetNumConformers()
        n_confs = list(range(n_confs))  # type: ignore

    if align_conf:
        rdMolAlign.AlignMolConformers(mol, confIds=n_confs)

    # Get number of rows
    n_rows = len(n_confs) // n_cols
    n_rows += 1 if (len(n_confs) % n_cols) > 0 else 0

    # Create a grid
    grid = widgets.GridspecLayout(n_rows, n_cols)  # type: ignore

    # Create and add views to the grid.
    widget_coords = itertools.product(range(n_rows), range(n_cols))
    views = []
    for i, (conf_id, (x, y)) in enumerate(zip(n_confs, widget_coords)):
        view = nv.show_rdkit(mol, conf_id=conf_id)
        view.layout.width = width
        view.layout.align_self = "stretch"
        grid[x, y] = view
        views.append(view)

    # Sync views
    if sync_views:
        for view in views:
            view._set_sync_camera(views)

    return grid
Example #8
0
def sanitize_mol(
    mol: Chem.rdchem.Mol,
    charge_neutral: bool = False,
    sanifix: bool = True,
    verbose: bool = True,
    add_hs: bool = False,
) -> Optional[Chem.rdchem.Mol]:
    """An augmented version of RDKit `sanitize=True`. It uses a
    mol-SMILES-mol conversion to catch potential aromaticity errors
    and try to fix aromatic nitrogen (using the popular sanifix4 script).
    Optionally, it can neutralize the charge of the molecule.

    Note #1: Only the first conformer (if present) will be preserved and
    a warning will be displayed if more than one conformer is detected.

    Note #2: The molecule's properties will be preserved but the atom's
    properties will be lost.

    Args:
        mol: a molecule.
        charge_neutral: whether charge neutralization should be applied.
        sanifix: whether to run the sanifix from James Davidson
            (sanifix4.py) that try to adjust aromatic nitrogens.
        verbose: Whether displaying a warning about multiple conformers.
        add_hs: Add hydrogens to the returned molecule. Useful when the input
            molecule already contains hydrogens.

    Returns:
        mol: a molecule.
    """
    if mol is None:
        return mol

    # Extract properties.
    original_mol = copy_mol(mol)
    properties = original_mol.GetPropsAsDict()

    if charge_neutral:
        mol = to_neutral(mol)

    if sanifix:
        mol = _sanifix4.sanifix(mol)

    if mol is not None:

        # Detect multiple conformers
        if verbose and mol.GetNumConformers() > 1:
            logger.warning(
                f"The molecule contains multiple conformers. Only the first one will be preserved."
            )

        # Try catch to avoid occasional aromaticity errors
        try:
            # `cxsmiles` is used here to preserve the first conformer.
            mol = to_mol(dm.to_smiles(mol, cxsmiles=True), sanitize=True, add_hs=add_hs)  # type: ignore
        except Exception:
            mol = None

    if mol is not None:
        # Insert back properties.
        mol = dm.set_mol_props(mol, properties)

    return mol