def cluster( mol: Chem.rdchem.Mol, rms_cutoff: float = 1, already_aligned: bool = False, centroids: bool = True, ): """Cluster the conformers of a molecule according to an RMS threshold in Angstrom. Args: mol: a molecule rms_cutoff: The RMS cutoff in Angstrom. already_aligned: Whether or not the conformers are aligned. If False, they will be aligmned furing the RMS computation. centroids: If True, return one molecule with centroid conformers only. If False return a list of molecules per cluster with all the conformers of the cluster. Defaults to True. """ # Clone molecule mol = copy.deepcopy(mol) # Compute RMS dmat = AllChem.GetConformerRMSMatrix(mol, prealigned=already_aligned) # Cluster conf_clusters = Butina.ClusterData( dmat, nPts=mol.GetNumConformers(), distThresh=rms_cutoff, isDistData=True, reordering=False, ) return return_centroids(mol, conf_clusters, centroids=centroids)
def duplicate_conformers(m: Chem.rdchem.Mol, new_conf_idx: int, rms_limit: float = 0.5) -> bool: rmslist = [] for i in range(m.GetNumConformers()): if i == new_conf_idx: continue rms = AllChem.GetConformerRMS(m, new_conf_idx, i, prealigned=True) rmslist.append(rms) return any(i < rms_limit for i in rmslist)
def get_coords(mol: Chem.rdchem.Mol, conf_id: int = -1): """Get the coordinate of a conformer of a molecule. Args: mol: a molecule. conf_id: a conformer id. """ if mol.GetNumConformers() == 0: raise ValueError("Molecule does not have any conformers.") conf = mol.GetConformer(id=conf_id) return conf.GetPositions()
def rmsd(mol: Chem.rdchem.Mol) -> np.ndarray: """Compute the RMSD between all the conformers of a molecule. Args: mol: a molecule """ if mol.GetNumConformers() <= 1: raise ValueError( "The molecule has 0 or 1 conformer. You can generate conformers with `dm.conformers.generate(mol)`." ) n_confs = mol.GetNumConformers() rmsds = [] for i in range(n_confs): for j in range(n_confs): rmsd = rdMolAlign.AlignMol(prbMol=mol, refMol=mol, prbCid=i, refCid=j) rmsds.append(rmsd) return np.array(rmsds).reshape(n_confs, n_confs)
def createRDKITconf(self, mol: Chem.rdchem.Mol, conversionFactor: float = 0.1): """creates a PyGromosTools CNF type from a rdkit molecule. If a conformation exists the first one will be used. Parameters ---------- mol : Chem.rdchem.Mol Molecule, possibly with a conformation conversionFactor : float the factor used to convert length from rdkit to Gromos (default: angstrom -> nano meter = 0.1) """ inchi = Chem.MolToInchi(mol).split("/") if len(inchi) >= 2: name = inchi[1] else: name = "XXX" self.__setattr__("TITLE", TITLE("\t" + name + " created from RDKit")) # check if conformations exist else create a new one if mol.GetNumConformers() < 1: mol = Chem.AddHs(mol) AllChem.EmbedMolecule(mol) AllChem.UFFOptimizeMolecule(mol) conf = mol.GetConformer(0) # fill a list with atomP types from RDKit data atomList = [] for i in range(mol.GetNumAtoms()): x = conversionFactor * conf.GetAtomPosition(i).x y = conversionFactor * conf.GetAtomPosition(i).y z = conversionFactor * conf.GetAtomPosition(i).z atomType = mol.GetAtomWithIdx(i).GetSymbol() atomList.append(blocks.atomP(resID=1, resName=name, atomType=atomType, atomID=i + 1, xp=x, yp=y, zp=z)) # set POSITION attribute self.__setattr__("POSITION", blocks.POSITION(atomList)) # Defaults set for GENBOX - for liquid sim adjust manually self.__setattr__("GENBOX", blocks.GENBOX(pbc=1, length=[4, 4, 4], angles=[90, 90, 90]))
def sasa( mol: Chem.rdchem.Mol, conf_id: Union[int, List[int]] = None, n_jobs: int = 1, ) -> np.ndarray: """Compute Solvent Accessible Surface Area of all the conformers using FreeSASA (https://freesasa.github.io/). Values are returned as an array and also stored within each conformer as a property called `rdkit_free_sasa`. Example: ```python smiles = "O=C(C)Oc1ccccc1C(=O)O" mol = dm.to_mol(smiles) mol = dm.conformers.generate(mol) # Compute SASA for all the conformers without parallelization sasa_values = dm.conformers.sasa(mol, conf_id=None, n_jobs=1) # If minimization has been enabled (default to True) # you can access the computed energy. conf = mol.GetConformer(0) props = conf.GetPropsAsDict() print(props) # {'rdkit_uff_energy': 1.7649408317784008} ``` Args: mol: a molecule conf_id: Id of the conformers to compute. If None, compute all. n_jobs: Number of jobs for parallelization. Set to 1 to disable and -1 to use all cores. Returns: mol: the molecule with the conformers. """ from rdkit.Chem import rdFreeSASA if mol.GetNumConformers() == 0: raise ValueError( "The molecule has 0 conformers. You can generate conformers with `dm.conformers.generate(mol)`." ) # Get Van der Waals radii (angstrom) radii = [ dm.PERIODIC_TABLE.GetRvdw(atom.GetAtomicNum()) for atom in mol.GetAtoms() ] # Which conformers to compute conf_ids = [] if conf_id is None: # If None compute for all the conformers conf_ids = list(range(mol.GetNumConformers())) # type: ignore elif isinstance(conf_id, int): conf_ids = [conf_id] else: conf_ids = conf_id # Compute solvent accessible surface area def _get_sasa(i): conf = mol.GetConformer(i) sasa = rdFreeSASA.CalcSASA(mol, radii, confIdx=conf.GetId()) conf.SetDoubleProp("rdkit_free_sasa", sasa) return sasa runner = dm.JobRunner(n_jobs=n_jobs) sasa_values = runner(_get_sasa, conf_ids) return np.array(sasa_values)
def conformers( mol: Chem.rdchem.Mol, conf_id: int = -1, n_confs: Union[int, List[int]] = None, align_conf: bool = True, n_cols: int = 3, sync_views: bool = True, remove_hs: bool = True, width: str = "auto", ): """Visualize the conformer(s) of a molecule. Args: mol: a molecule. conf_id: The ID of the conformer to show. -1 shows the first conformer. Only works if `n_confs` is None. n_confs: Can be a number of conformers to shows or a list of conformer indices. When None, only the first conformer is displayed. When -1, show all conformers. align_conf: Whether to align conformers together. n_cols: Number of columns. Defaults to 3. sync_views: Wether to sync the multiple views. remove_hs: Wether to remove the hydrogens of the conformers. width: The width of the returned view. Defaults to "auto". """ widgets = _get_ipywidgets() nv = _get_nglview() if mol.GetNumConformers() == 0: raise ValueError( "The molecule has 0 conformers. You can generate conformers with `dm.conformers.generate(mol)`." ) # Clone the molecule mol = copy.deepcopy(mol) if remove_hs: mol = Chem.RemoveHs(mol) # type: ignore else: mol = Chem.AddHs(mol) # type: ignore if n_confs is None: return nv.show_rdkit(mol, conf_id=conf_id) # If n_confs is int, convert to list of conformer IDs if n_confs == -1: n_confs = [conf.GetId() for conf in mol.GetConformers()] elif isinstance(n_confs, int): if n_confs > mol.GetNumConformers(): n_confs = mol.GetNumConformers() n_confs = list(range(n_confs)) # type: ignore if align_conf: rdMolAlign.AlignMolConformers(mol, confIds=n_confs) # Get number of rows n_rows = len(n_confs) // n_cols n_rows += 1 if (len(n_confs) % n_cols) > 0 else 0 # Create a grid grid = widgets.GridspecLayout(n_rows, n_cols) # type: ignore # Create and add views to the grid. widget_coords = itertools.product(range(n_rows), range(n_cols)) views = [] for i, (conf_id, (x, y)) in enumerate(zip(n_confs, widget_coords)): view = nv.show_rdkit(mol, conf_id=conf_id) view.layout.width = width view.layout.align_self = "stretch" grid[x, y] = view views.append(view) # Sync views if sync_views: for view in views: view._set_sync_camera(views) return grid
def sanitize_mol( mol: Chem.rdchem.Mol, charge_neutral: bool = False, sanifix: bool = True, verbose: bool = True, add_hs: bool = False, ) -> Optional[Chem.rdchem.Mol]: """An augmented version of RDKit `sanitize=True`. It uses a mol-SMILES-mol conversion to catch potential aromaticity errors and try to fix aromatic nitrogen (using the popular sanifix4 script). Optionally, it can neutralize the charge of the molecule. Note #1: Only the first conformer (if present) will be preserved and a warning will be displayed if more than one conformer is detected. Note #2: The molecule's properties will be preserved but the atom's properties will be lost. Args: mol: a molecule. charge_neutral: whether charge neutralization should be applied. sanifix: whether to run the sanifix from James Davidson (sanifix4.py) that try to adjust aromatic nitrogens. verbose: Whether displaying a warning about multiple conformers. add_hs: Add hydrogens to the returned molecule. Useful when the input molecule already contains hydrogens. Returns: mol: a molecule. """ if mol is None: return mol # Extract properties. original_mol = copy_mol(mol) properties = original_mol.GetPropsAsDict() if charge_neutral: mol = to_neutral(mol) if sanifix: mol = _sanifix4.sanifix(mol) if mol is not None: # Detect multiple conformers if verbose and mol.GetNumConformers() > 1: logger.warning( f"The molecule contains multiple conformers. Only the first one will be preserved." ) # Try catch to avoid occasional aromaticity errors try: # `cxsmiles` is used here to preserve the first conformer. mol = to_mol(dm.to_smiles(mol, cxsmiles=True), sanitize=True, add_hs=add_hs) # type: ignore except Exception: mol = None if mol is not None: # Insert back properties. mol = dm.set_mol_props(mol, properties) return mol