def complete_labels(mol: Chem.rdchem.Mol, mollabels_dict: Dict, mark_upmatched: bool = True) -> List: """ Complete the gaps in the atom labels dictionary (normally a list), by given names like CX1. :param mol: the molecule to be labelled _in place_. :type mol: Chem.rdchem.Mol :param mollabels_dict: key is index (int) and value is name like for a normal atomlabels (but with gaps) :type mollabels_dict: Dict :param mark_upmatched: Add an X between the symbol and the number :type mark_upmatched: bool :return: atom labels :rtype: List[str] """ mollabels = [] counters = {} for i in range(mol.GetNumAtoms()): if i in mollabels_dict: mollabels.append(mollabels_dict[i]) else: el = mol.GetAtomWithIdx(i).GetSymbol().upper() if el in counters: counters[el] += 1 else: counters[el] = 1 if mark_upmatched: mollabels.append(f'{el}X{counters[el]}') else: mollabels.append(el + str(counters[el])) return mollabels
def fix_valence_charge(mol: Chem.rdchem.Mol, inplace: bool = False) -> Optional[Chem.rdchem.Mol]: """Fix valence issues that are due to incorrect charges. Args: mol: Input molecule with incorrect valence for some atoms inplace: Whether to modify in place or make a copy. Returns: Fixed molecule via charge correction or original molecule if failed. """ vm = rdMolStandardize.RDKitValidation() # Don't fix something that is not broken if len(vm.validate(mol)) > 0: if not inplace: mol = copy.copy(mol) mol.UpdatePropertyCache(False) for a in mol.GetAtoms(): n_electron = ( a.GetImplicitValence() + a.GetExplicitValence() - dm.PERIODIC_TABLE.GetDefaultValence(a.GetSymbol()) ) a.SetFormalCharge(n_electron) return mol
def set_mol_props( mol: Chem.rdchem.Mol, props: Dict[str, Any], copy: bool = False, ) -> Chem.rdchem.Mol: """Set properties to a mol from a dict. Args: mol: the mol where to copy the props. props: the props to copy. copy: whether to copy the provided mol """ if copy is True: mol = dm.copy_mol(mol) for k, v in props.items(): if isinstance(v, bool): mol.SetBoolProp(k, v) elif isinstance(v, int): mol.SetIntProp(k, v) elif isinstance(v, float): mol.SetDoubleProp(k, v) else: mol.SetProp(k, str(v)) return mol
def copy_edit_mol(mol: Chem.rdchem.Mol) -> Chem.rdchem.Mol: new_mol = Chem.RWMol(Chem.MolFromSmiles('')) for atom in mol.GetAtoms(): new_atom = copy_atom(atom) new_mol.AddAtom(new_atom) for bond in mol.GetBonds(): a1 = bond.GetBeginAtom().GetIdx() a2 = bond.GetEndAtom().GetIdx() bt = bond.GetBondType() new_mol.AddBond(a1, a2, bt) return new_mol
def get_coords(mol: Chem.rdchem.Mol, conf_id: int = -1): """Get the coordinate of a conformer of a molecule. Args: mol: a molecule. conf_id: a conformer id. """ if mol.GetNumConformers() == 0: raise ValueError("Molecule does not have any conformers.") conf = mol.GetConformer(id=conf_id) return conf.GetPositions()
def randomize_atoms(mol: Chem.rdchem.Mol) -> Optional[Chem.rdchem.Mol]: """Randomize the position of the atoms in a mol. Args: mol: a molecule. Returns: mol: a molecule. """ if mol.GetNumAtoms() == 0: return mol atom_indices = list(range(mol.GetNumAtoms())) random.shuffle(atom_indices) return Chem.RenumberAtoms(mol, atom_indices)
def num_bond_in_ring(mol: Chem.rdchem.Mol): """ Check the number of the bonds that are in the ring, count vector. """ count = 0 n_atoms = mol.GetNumAtoms() for a1 in range(n_atoms): for a2 in range(a1 + 1, n_atoms): bond = mol.GetBondBetweenAtoms(a1, a2) if bond is None: continue elif bond.IsInRing(): count += 1 return [count]
def trim_side_chain(mol: Chem.rdchem.Mol, core, unwanted_side_chains): """Trim list of side chain from a molecule.""" mol = Chem.AddHs(mol) match = mol.GetSubstructMatch(core) map2idx = {} map2nei = {} unwanted2map = {} for patt in unwanted_side_chains: unwanted2map[patt] = [ a.GetAtomMapNum() for a in patt.GetAtoms() if a.GetAtomMapNum() ] unwanted_mapping = list( itertools.chain.from_iterable(unwanted2map.values())) for atom in core.GetAtoms(): num = atom.GetAtomMapNum() if num and num in unwanted_mapping: mol_atom_idx = match[atom.GetIdx()] map2idx[mol_atom_idx] = num nei_atoms = mol.GetAtomWithIdx(mol_atom_idx).GetNeighbors() map2nei[mol_atom_idx] = [ n.GetIdx() for n in nei_atoms if n.GetIdx() in match ] emol = Chem.EditableMol(mol) for atom_idx, atom_map in map2idx.items(): dummy = Chem.rdchem.Atom("*") dummy.SetAtomMapNum(atom_map) nei_idx = map2nei.get(atom_idx, [None])[0] if nei_idx: bond = mol.GetBondBetweenAtoms(atom_idx, nei_idx) emol.RemoveBond(atom_idx, nei_idx) new_ind = emol.AddAtom(dummy) emol.AddBond(nei_idx, new_ind, bond.GetBondType()) mol = emol.GetMol() mol = Chem.RemoveHs(mol) query_param = AdjustQueryParameters() query_param.makeDummiesQueries = False query_param.adjustDegree = False query_param.aromatizeIfPossible = True for patt, _ in unwanted2map.items(): cur_frag = dm.fix_mol(patt) mol = Chem.DeleteSubstructs(mol, cur_frag, onlyFrags=True) return dm.keep_largest_fragment(mol)
def cluster( mol: Chem.rdchem.Mol, rms_cutoff: float = 1, already_aligned: bool = False, centroids: bool = True, ): """Cluster the conformers of a molecule according to an RMS threshold in Angstrom. Args: mol: a molecule rms_cutoff: The RMS cutoff in Angstrom. already_aligned: Whether or not the conformers are aligned. If False, they will be aligmned furing the RMS computation. centroids: If True, return one molecule with centroid conformers only. If False return a list of molecules per cluster with all the conformers of the cluster. Defaults to True. """ # Clone molecule mol = copy.deepcopy(mol) # Compute RMS dmat = AllChem.GetConformerRMSMatrix(mol, prealigned=already_aligned) # Cluster conf_clusters = Butina.ClusterData( dmat, nPts=mol.GetNumConformers(), distThresh=rms_cutoff, isDistData=True, reordering=False, ) return return_centroids(mol, conf_clusters, centroids=centroids)
def center_of_mass( mol: Chem.rdchem.Mol, use_atoms: bool = True, digits: int = None, conf_id: int = -1, ) -> np.ndarray: """Compute the center of mass of a conformer of a molecule. Args: mol: a molecule use_atoms: Whether to compute the true center of mass or the geometrical center. digits: Number of digits to round to. conf_id: the conformer id. Returns cm: Center of mass or geometrical center """ coords = get_coords(mol) atom_weight = np.ones((coords.shape[0])) if use_atoms: atom_weight = np.array([atom.GetMass() for atom in mol.GetAtoms()]) atom_weight = atom_weight[:, None] atom_weight /= atom_weight.sum() center = (coords * atom_weight).sum(axis=0) if digits is not None: center = center.round(digits) return center
def reorder_atoms( mol: Chem.rdchem.Mol, break_ties: bool = True, include_chirality: bool = True, include_isotopes: bool = True, ) -> Optional[Chem.rdchem.Mol]: """Reorder the atoms in a mol. It ensures a single atom order for the same molecule, regardless of its original representation. Args: mol: a molecule. break_ties: Force breaking of ranked ties. include_chirality: Use chiral information when computing rank. include_isotopes: Use isotope information when computing rank. Returns: mol: a molecule. """ if mol.GetNumAtoms() == 0: return mol new_order = Chem.CanonicalRankAtoms( mol, breakTies=break_ties, includeChirality=include_chirality, includeIsotopes=include_isotopes, ) new_order = sorted([(y, x) for x, y in enumerate(new_order)]) return Chem.RenumberAtoms(mol, [y for (x, y) in new_order])
def translate(mol: Chem.rdchem.Mol, new_centroid: Union[np.ndarray, List[int]], conf_id: int = -1): """Move a given conformer of a molecule to a new position. The transformation is performed in place. Args: mol: the molecule. new_centroid: the new position to move to of shape [x, y, z] conf_id: id of the conformer. """ # Get conformer conf = mol.GetConformer(conf_id) # Compute the vector for translation mol_center = rdMolTransforms.ComputeCentroid(conf) mol_center = np.array([mol_center.x, mol_center.y, mol_center.z]) # Make the transformation matrix T = np.eye(4) T[:3, 3] = new_centroid - mol_center # Transform rdMolTransforms.TransformConformer(conf, T)
def label(mol: Chem.rdchem.Mol, atomlabels: List) -> None: # -> mol inplace. """ Assign the prop ``AtomLabel``... https://www.rdkit.org/docs/RDKit_Book.html :param mol: the molecule to be labelled _in place_. :type mol: Chem.rdchem.Mol :param atomlabels: atom labels :type atomlabels: List[str] :return: None """ assert len(atomlabels) == mol.GetNumAtoms( ), 'the number of atoms in mol has to be the same as atomlabels. Hydrogens? dehydrogenate!' for idx in range(mol.GetNumAtoms()): mol.GetAtomWithIdx(idx).SetProp('AtomLabel', atomlabels[idx]) return None
def num_atom_in_ring(mol: Chem.rdchem.Mol): """ Check the number of the atoms that are in the ring,, count vector. """ count = 0 for atom in mol.GetAtoms(): if atom.IsInRing(): count += 1 return [count]
def duplicate_conformers(m: Chem.rdchem.Mol, new_conf_idx: int, rms_limit: float = 0.5) -> bool: rmslist = [] for i in range(m.GetNumConformers()): if i == new_conf_idx: continue rms = AllChem.GetConformerRMS(m, new_conf_idx, i, prealigned=True) rmslist.append(rms) return any(i < rms_limit for i in rmslist)
def copy_mol_props(source: Chem.rdchem.Mol, destination: Chem.rdchem.Mol): """Copy properties from one source molecule to another destination molecule. Args: source: a molecule to copy from. destination: a molecule to copy to. """ props = source.GetPropsAsDict() dm.set_mol_props(destination, props)
def is_zwitterion(mol: Chem.rdchem.Mol): """ To identify whether the molecule is zwitterion or not """ zwitterion = 0 for atom in mol.GetAtoms(): if atom.GetFormalCharge() != 0: zwitterion = 1 break return [zwitterion]
def display(mol: Chem.rdchem.Mol, show='name'): # show = 'index' | 'name' if show: atoms = mol.GetNumAtoms() mol = copy.deepcopy(mol) for idx in range(atoms): if show == 'index': mol.GetAtomWithIdx(idx).SetProp('molAtomMapNumber', str(idx)) elif show == 'name': raise NotImplementedError( 'I need to figure out what property is needed as molAtomMapNumber is an str(int)' ) mol.GetAtomWithIdx(idx).SetProp( 'molAtomMapNumber', str(mol.GetAtomWithIdx(idx).GetProp('AtomLabel'))) else: raise ValueError display(Draw.MolToImage(mol)) return None
def atom_graph(mol: Chem.rdchem.Mol): """ Generates the atom graph from an RDKit Mol object. Function taken from https://github.com/maxhodak/keras-molecules/pull/32/files. """ if mol: G = nx.Graph() for atom in mol.GetAtoms(): G.add_node( atom.GetIdx(), atomic_num=atom.GetAtomicNum( ), # this should be instantiated once, and later reused for defining the feature vector formal_charge=atom.GetFormalCharge(), chiral_tag=atom.GetChiralTag(), hybridization=atom.GetHybridization(), num_explicit_hs=atom.GetNumExplicitHs(), is_aromatic=atom.GetIsAromatic(), mass=atom.GetMass(), implicit_valence=atom.GetImplicitValence(), total_hydrogens=atom.GetTotalNumHs(), features=np.array([ atom.GetAtomicNum(), atom.GetFormalCharge(), atom.GetChiralTag(), atom.GetHybridization(), atom.GetNumExplicitHs(), atom.GetIsAromatic(), atom.GetMass(), atom.GetImplicitValence(), atom.GetTotalNumHs(), ]), ) for bond in mol.GetBonds(): G.add_edge( bond.GetBeginAtomIdx(), bond.GetEndAtomIdx(), bond_type=bond.GetBondType(), ) return G
def rmsd(mol: Chem.rdchem.Mol) -> np.ndarray: """Compute the RMSD between all the conformers of a molecule. Args: mol: a molecule """ if mol.GetNumConformers() <= 1: raise ValueError( "The molecule has 0 or 1 conformer. You can generate conformers with `dm.conformers.generate(mol)`." ) n_confs = mol.GetNumConformers() rmsds = [] for i in range(n_confs): for j in range(n_confs): rmsd = rdMolAlign.AlignMol(prbMol=mol, refMol=mol, prbCid=i, refCid=j) rmsds.append(rmsd) return np.array(rmsds).reshape(n_confs, n_confs)
def return_centroids( mol: Chem.rdchem.Mol, conf_clusters: Sequence[Sequence[int]], centroids: bool = True, ) -> Union[List[Chem.rdchem.Mol], Chem.rdchem.Mol]: """Given a list of cluster indices, return one single molecule with only the centroid of the clusters of a list of molecules per cluster. Args: mol: a molecule. conf_clusters: list of cluster indices. centroids: If True, return one molecule with centroid conformers only. If False return a list of molecules per cluster with all the conformers of the cluster. """ if centroids: # Collect centroid of each cluster (first element of the list) centroid_list = [indices[0] for indices in conf_clusters] # Keep only centroid conformers mol_clone = copy.deepcopy(mol) confs = [mol_clone.GetConformers()[i] for i in centroid_list] mol.RemoveAllConformers() [mol.AddConformer(conf, assignId=True) for conf in confs] return mol else: # Create a new molecule for each cluster and add conformers to it. mols = [] for cluster in conf_clusters: m = copy.deepcopy(mol) m.RemoveAllConformers() [ m.AddConformer(mol.GetConformer(c), assignId=True) for c in cluster ] mols.append(m) return mols
def createRDKITconf(self, mol: Chem.rdchem.Mol, conversionFactor: float = 0.1): """creates a PyGromosTools CNF type from a rdkit molecule. If a conformation exists the first one will be used. Parameters ---------- mol : Chem.rdchem.Mol Molecule, possibly with a conformation conversionFactor : float the factor used to convert length from rdkit to Gromos (default: angstrom -> nano meter = 0.1) """ inchi = Chem.MolToInchi(mol).split("/") if len(inchi) >= 2: name = inchi[1] else: name = "XXX" self.__setattr__("TITLE", TITLE("\t" + name + " created from RDKit")) # check if conformations exist else create a new one if mol.GetNumConformers() < 1: mol = Chem.AddHs(mol) AllChem.EmbedMolecule(mol) AllChem.UFFOptimizeMolecule(mol) conf = mol.GetConformer(0) # fill a list with atomP types from RDKit data atomList = [] for i in range(mol.GetNumAtoms()): x = conversionFactor * conf.GetAtomPosition(i).x y = conversionFactor * conf.GetAtomPosition(i).y z = conversionFactor * conf.GetAtomPosition(i).z atomType = mol.GetAtomWithIdx(i).GetSymbol() atomList.append(blocks.atomP(resID=1, resName=name, atomType=atomType, atomID=i + 1, xp=x, yp=y, zp=z)) # set POSITION attribute self.__setattr__("POSITION", blocks.POSITION(atomList)) # Defaults set for GENBOX - for liquid sim adjust manually self.__setattr__("GENBOX", blocks.GENBOX(pbc=1, length=[4, 4, 4], angles=[90, 90, 90]))
def bond_graph(mol: Chem.rdchem.Mol): """ Generates the bond graph from an RDKit Mol object. Here, unlike the atom gaph, bonds are nodes, and are connected to each other by atoms. :returns: a NetworkX graph. """ if mol: G = nx.Graph() for bond in mol.GetBonds(): G.add_node( (bond.GetBeginAtomIdx(), bond.GetEndAtomIdx()), bond_type=bond.GetBondTypeAsDouble(), aromatic=bond.GetIsAromatic(), stereo=bond.GetStereo(), in_ring=bond.IsInRing(), is_conjugated=bond.GetIsConjugated(), features=[ bond.GetBondTypeAsDouble(), int(bond.GetIsAromatic()), # bond.GetStereo(), int(bond.IsInRing()), int(bond.GetIsConjugated()), ], ) for atom in mol.GetAtoms(): bonds = atom.GetBonds() if len(bonds) >= 2: for b1, b2 in combinations(bonds, 2): n1 = (b1.GetBeginAtomIdx(), b1.GetEndAtomIdx()) n2 = (b2.GetBeginAtomIdx(), b2.GetEndAtomIdx()) joining_node = list(set(n1).intersection(n2))[0] G.add_edge(n1, n2, atom=joining_node) G.add_edge(n2, n1) return G
def atom_indices_to_mol(mol: Chem.rdchem.Mol, copy: bool = False): """Add the `molAtomMapNumber` property to each atoms. Args: mol: a molecule copy: Whether to copy the molecule. """ if copy is True: mol = copy_mol(mol) for atom in mol.GetAtoms(): atom.SetProp("molAtomMapNumber", str(atom.GetIdx())) return mol
def with_message_passing(mol: Chem.rdchem.Mol): """ Molecule with heavy atom(s)=1 or 2 is not processed message passing action. (Only for D-MPNN) """ m_passing_vector = [0, 0, 0] # m_passing_vector = [hv=1, hv=2, hv>2] num_atoms = mol.GetNumAtoms() if num_atoms == 1: m_passing_vector[0] = 1 elif num_atoms == 2: m_passing_vector[1] = 1 else: m_passing_vector[2] = 1 return m_passing_vector
def adjust_singleton(mol: Chem.rdchem.Mol) -> Optional[Chem.rdchem.Mol]: """Remove all atoms that are essentially disconnected singleton nodes in the molecular graph. For example, the chlorine atom and methane fragment will be removed in Cl.[N:1]1=CC(O)=CC2CCCCC12.CC.C", but not the ethane fragment. Args: mol: a molecule. """ to_rem = [] em = Chem.RWMol(mol) for atom in mol.GetAtoms(): if atom.GetExplicitValence() == 0: to_rem.append(atom.GetIdx()) to_rem.sort(reverse=True) for a_idx in to_rem: em.RemoveAtom(a_idx) return em.GetMol()
def to_neutral(mol: Chem.rdchem.Mol) -> Optional[Chem.rdchem.Mol]: """Neutralize the charge of a molecule. Args: mol: a molecule. Returns: mol: a molecule. """ if mol is None: return mol for a in mol.GetAtoms(): if a.GetFormalCharge() < 0 or ( a.GetExplicitValence() >= PERIODIC_TABLE.GetDefaultValence(a.GetSymbol()) and a.GetFormalCharge() > 0 ): a.SetFormalCharge(0) a.UpdatePropertyCache(False) return mol
def generate_conformers( mol: Chem.rdchem.Mol, ref_mol: Chem.rdchem.Mol, num_conf: int, ref_smi: str = None, minimum_conf_rms: Optional[float] = None, ) -> List[Chem.rdchem.Mol]: # if SMILES to be fixed are not given, assume to the MCS if not ref_smi: ref_smi = get_mcs(mol, ref_mol) # Creating core of reference ligand # core_with_wildcards = AllChem.ReplaceSidechains( ref_mol, Chem.MolFromSmiles(ref_smi)) core1 = AllChem.DeleteSubstructs(core_with_wildcards, Chem.MolFromSmiles('*')) core1.UpdatePropertyCache() # Add Hs so that conf gen is improved mol.RemoveAllConformers() outmol = deepcopy(mol) mol_wh = Chem.AddHs(mol) # Generate conformers with constrained embed conf_lst = [] dup_count = 0 for i in range(num_conf): temp_mol = Chem.Mol(mol_wh) # copy to avoid inplace changes AllChem.ConstrainedEmbed(temp_mol, core1, randomseed=i) temp_mol = Chem.RemoveHs(temp_mol) conf_idx = outmol.AddConformer(temp_mol.GetConformer(0), assignId=True) if minimum_conf_rms is not None: if duplicate_conformers(outmol, conf_idx, rms_limit=minimum_conf_rms): dup_count += 1 outmol.RemoveConformer(conf_idx) if dup_count: pass # print(f'removed {dup_count} duplicated conformations') return outmol
def name(self, mol: Chem.rdchem.Mol) -> List: """ Given a molecule labelled it according to the reference ligand. :param mol: the mol to label :type mol: Chem.rdchem.Mol :return: atom labels :rtype: List[str] """ common = Chem.MolFromSmarts( rdFMCS.FindMCS([mol, self.ref]).smartsString) commonlabels = [ self.reflabels[a] for a in self.ref.GetSubstructMatches(common)[0] ] self.label(common, commonlabels) mollabels_dict = { a: commonlabels[i] for i, a in enumerate(mol.GetSubstructMatches(common)[0]) } mollabels = self.complete_labels(mol, mollabels_dict, self.mark_upmatched) self.label(mol, mollabels) return mollabels
def get_atom_features(atom: Chem.rdchem.Mol): """Given an atom object, returns a numpy array of features.""" # Atom features are symbol, formal charge, degree, explicit/implicit # valence, and aromaticity symbol = onek_unk_encoding(atom.GetSymbol(), SYMBOLS) if False: # atom.is_dummy: padding = [0] * (N_ATOM_FEATS - len(symbol)) feature_array = symbol + padding else: aro = [atom.GetIsAromatic()] chiral = onek_unk_encoding(int(atom.GetChiralTag()), CHIRAL_TAG) degree = onek_unk_encoding(atom.GetDegree(), DEGREES) exp_valence = onek_unk_encoding(atom.GetExplicitValence(), EXPLICIT_VALENCES) fc = onek_unk_encoding(atom.GetFormalCharge(), FORMAL_CHARGES) imp_valence = onek_unk_encoding(atom.GetImplicitValence(), IMPLICIT_VALENCES) feature_array = symbol + aro + chiral + degree + exp_valence + \ fc + imp_valence return torch.Tensor(feature_array)