def charge_gasteiger_h(atom: Atom) -> float: """Gasteiger partial charge for implicit hydrogens (float). """ if not atom.HasProp('_GasteigerHCharge'): mol = atom.GetOwningMol() AllChem.ComputeGasteigerCharges(mol) return atom.GetDoubleProp('_GasteigerHCharge')
def _get_atom_descriptors(self, atom: Chem.Atom) -> dict: return { 'name': self._get_pdb_atomname(atom), 'rtype': atom.GetProp('_rType'), 'mtype': ' X ', 'partial': atom.GetDoubleProp('_GasteigerCharge') }
def graph(self, m): from rdkit.Chem import EditableMol, Atom, rdchem hcount = m.GetNumAtoms(False) - m.GetNumAtoms(True) # create new molecule using single bonds only em = EditableMol(Mol()) nbridx = [None] * m.GetNumAtoms() iatom = 0 for atom in m.GetAtoms(): atnum = atom.GetAtomicNum() if atnum == 1 and atom.GetIsotope() == 1: #if atom.GetMass() > 1: pass hcount += 1 else: newatom = Atom(atnum) #if atom.GetTotalDegree() == 0: newatom.SetNoImplicit(True) # otherwise [Na]. becomes [NaH]. #newatom.SetFormalCharge(atom.GetFormalCharge()) newatom.SetFormalCharge(0) em.AddAtom(newatom) aidx = atom.GetIdx() nbridx[aidx] = iatom iatom += 1 for a2 in atom.GetNeighbors(): a2idx = nbridx[a2.GetIdx()] if a2idx != None: em.AddBond(aidx, a2idx, rdchem.BondType.SINGLE) #cansmi = self.cansmiles(em.GetMol()) cansmi = MolToSmiles(m, isomericSmiles=True) #cansmi = cansmi.replace('+','').replace('-','').replace('[N]','N').replace('[O]','O').replace('[C]','C').replace('[I]','I').replace('[S]','S').replace('[P]','P').replace('[B]','B').replace('[Br]','Br').replace('[Cl]','Cl') return "%s%s%d%+d" % (cansmi, ' H', hcount, GetFormalCharge(m))
def decode(v): """Decode a molvector into a molecule :param v: molvector :result rdkit.RWMol: """ chunksize = atom_size + bond_chunk_size nchunks = len(v) // chunksize m = RWMol() bonds = {} for i in range(nchunks): start = i * (atom_size + bond_chunk_size) el, c, h, b1, o1, b2, o2, b3, o3, b4, o4 = v[start:start + chunksize] atom = Atom(el) atom.SetFormalCharge(c) atom.SetNumExplicitHs(h) atom_idx = m.AddAtom(atom) assert atom_idx == i for b, o in ((b1, o1), (b2, o2), (b3, o3), (b4, o4)): if o: to_atom = atom_idx + o bonds[tuple(sorted((atom_idx, to_atom)))] = b for (a1, a2), btype in bonds.items(): try: m.AddBond(a1 % m.GetNumAtoms(), a2 % m.GetNumAtoms(), BondType.values[btype]) except: pass return m
def is_hbond_donor(atom: Atom) -> int: """If the atom is a hydrogen bond donor (0 or 1). """ if not atom.HasProp('_Feature_Donor'): mol = atom.GetOwningMol() _ChemicalFeatureGenerator().assign_features(mol) return atom.GetIntProp('_Feature_Donor')
def _get_xyz(cls, atom: Chem.Atom) -> Tuple[float]: if atom.HasProp('_x'): return (atom.GetDoubleProp('_x'), atom.GetDoubleProp('_y'), atom.GetDoubleProp('_z')) else: return ()
def _parse_atom(self, atom: Chem.Atom) -> None: if atom.GetSymbol() == '*': neighbor = atom.GetNeighbors()[0] n_name = self._get_pdb_atomname(neighbor) self.CONNECT.append([n_name, len(self.CONNECT) + 1, 'CONNECT']) else: d = self._get_atom_descriptors(atom) self.ATOM.append(d)
def _get_square(self, first: Chem.Atom, second: Chem.Atom) -> Union[Tuple[int, int], None]: for third in [neigh for neigh in second.GetNeighbors() if neigh.GetIdx() != first.GetIdx()]: fourths = self._get_triangles(first, third) if fourths and len(fourths) > 1: fourth = [f for f in fourths if f != second.GetIdx()][0] return third.GetIdx(), fourth else: return None
def _get_valence_difference(self, atom: Chem.Atom) -> int: pt = Chem.GetPeriodicTable() valence = self._get_atom_valence(atom) if self._valence_mode == 'max': maxv = max(pt.GetValenceList(atom.GetAtomicNum())) return valence - maxv else: d = pt.GetDefaultValence(atom.GetAtomicNum()) return valence - d
def assess_atom(atom: Chem.Atom, bt: Chem.BondType) -> Tuple[bool, Chem.BondType]: if atom.GetAtomicNum() > 8: return True, bt elif len(atom.GetNeighbors()) <= 2 and atom.GetIsAromatic(): return True, Chem.BondType.SINGLE elif len(atom_i.GetNeighbors()) <= 3 and not atom.GetIsAromatic(): return True, bt else: return False, bt # too bonded already!
def _get_origin(cls, atom: Chem.Atom) -> List[str]: if atom.HasProp('_Origin'): o = atom.GetProp('_Origin') if o != 'none': return json.loads(o) else: return [] else: return []
def stereochemistry(atom: Atom) -> str: """CIP sterochemistry label (string). """ mol = atom.GetOwningMol() if not mol.HasProp('_CIPLabelsAssigned'): rdCIPLabeler.AssignCIPLabels(mol) mol.SetProp('_CIPLabelsAssigned', '1') return atom.GetProp('_CIPCode') if atom.HasProp('_CIPCode') else ''
def _get_atom_valence(self, atom: Chem.Atom): """ Cannot get the normal way as it cannot be sanitised. :param atom: :return: """ valence = 0 for bond in atom.GetBonds(): valence += bond.GetBondTypeAsDouble() return valence - atom.GetFormalCharge()
def atom_features(atom: Chem.Atom): return np.array( encoding_onehot_unk(atom.GetSymbol(), [ 'C', 'N', 'O', 'S', 'F', 'Si', 'P', 'Cl', 'Br', 'Mg', 'Na', 'Ca', 'Fe', 'As', 'Al', 'I', 'B', 'V', 'K', 'Tl', 'Yb', 'Sb', 'Sn', 'Ag', 'Pd', 'Co', 'Se', 'Ti', 'Zn', 'H', 'Li', 'Ge', 'Cu', 'Au', 'Ni', 'Cd', 'In', 'Mn', 'Zr', 'Cr', 'Pt', 'Hg', 'Pb', 'Unknown' ]) + encoding_onehot(atom.GetDegree(), [0, 1, 2, 3, 4, 5]) + encoding_onehot_unk(atom.GetTotalNumHs(), [0, 1, 2, 3, 4]) + encoding_onehot_unk(atom.GetImplicitValence(), [0, 1, 2, 3, 4, 5]) + [atom.GetIsAromatic()])
def is_sp2(atom: Chem.Atom) -> bool: N_neigh = len(atom.GetBonds()) symbol = atom.GetSymbol() if symbol == 'H': return False elif symbol == 'N' and N_neigh < 3: return True elif symbol == 'C' and N_neigh < 4: return True elif symbol == 'O' and N_neigh < 2: return True else: return False
def _is_count_valid(self, atom: Chem.Atom) -> bool: """ Some atoms are not to be counted as they will be deleted. :param atom: :return: """ if atom.HasProp('DELETE'): return False elif atom.HasProp('_ori_i') and atom.GetIntProp('_ori_i') == -1: return False else: return True
def set_atom_feat(a: Atom, key: str, val: int): if key == 'atomic_num': a.SetAtomicNum(val) elif key == 'formal_charge': a.SetFormalCharge(val) elif key == 'chiral_tag': a_chiral = rdchem.ChiralType.values[val] a.SetChiralTag(a_chiral) elif key == 'num_explicit_hs': a.SetNumExplicitHs(val) elif key == 'is_aromatic': a.SetIsAromatic(bool(val)) return a
def _is_square(self, first: Chem.Atom, second: Chem.Atom) -> bool: """ Get bool of whether two atoms share a common over-neighbor. Ie. joining them would make a square. Direct bond does not count. :param first: :param second: :return: """ for third in [neigh for neigh in second.GetNeighbors() if neigh.GetIdx() != first.GetIdx()]: if self._is_triangle(first, third) is True: return True else: return False
def get_other(bond: Chem.Bond, atom: Chem.Atom) -> Chem.Atom: """Given an bond and an atom return the other.""" if bond.GetEndAtomIdx() == atom.GetIdx( ): # atom == itself gives false. return bond.GetBeginAtom() else: return bond.GetEndAtom()
def graph2(self, m): from rdkit.Chem import EditableMol, RemoveHs, Atom, rdchem #, SanitizeMol, rdmolops #natoms = m.GetNumAtoms() # create new molecule using single bonds only em = EditableMol(Mol()) hcount = 0 for atom in m.GetAtoms(): atnum = atom.GetAtomicNum() hcount += atom.GetTotalNumHs(False) newatom = Atom(atnum) #newatom.SetFormalCharge(atom.GetFormalCharge()) em.AddAtom(newatom) for bond in m.GetBonds(): em.AddBond(bond.GetBeginAtomIdx(), bond.GetEndAtomIdx(), rdchem.BondType.SINGLE) try: mol = RemoveHs(em.GetMol()) except: mol = em.GetMol() #mol = em.GetMol() #SanitizeMol(mol, SanitizeFlags.SANITIZE_ADJUSTHS) #Chem.rdmolops.SanitizeFlags.SANITIZE_ADJUSTHS cansmi = self.cansmiles(mol) return "%s%s%d%+d" % (cansmi, ' H', hcount, GetFormalCharge(m))
def decorate_mol(get_new_mol, mol_parse, mol_data, three_d_mol=False): mol = get_new_mol(mol_parse, mol_data) patt = Chem.MolFromSmarts("[*;R]-;!@[H]") # Get the list of atom Indices to replace out_atom_repls = [x for x in mol.GetSubstructMatches(patt)] # Now replace with At - an produce a new mol everytime new_mols = {} for atom_pairs in out_atom_repls: atom = atom_pairs[1] rw_mol = get_new_mol(mol_parse, mol_data) rw_mol.ReplaceAtom(atom, Atom(85)) newer_mol = rw_mol.GetMol() this_mol = Chem.MolToSmiles( Chem.MolFromSmiles(Chem.MolToSmiles(newer_mol, isomericSmiles=True)), isomericSmiles=True, ) if three_d_mol: if this_mol in new_mols: new_mols[this_mol].append(atom_pairs) else: new_mols[this_mol] = [atom_pairs] else: new_mols[this_mol] = atom_pairs return new_mols
def convert_dict_to_mols(tot_dict): """ :param tot_dict: :return: """ mol_list = [] for smiles in tot_dict: # Now generate the molecules for that mol = RWMol() atoms = tot_dict[smiles] print(atoms) for atom in atoms: atom = Atom(6) mol.AddAtom(atom) # for i in range(len(atoms)-1): # mol.AddBond(i,i+1) mol = mol.GetMol() AllChem.EmbedMolecule(mol) conf = mol.GetConformer() for i, atom in enumerate(atoms): point_3d = Point3D(atom[0], atom[1], atom[2]) conf.SetAtomPosition(i, point_3d) mol = conf.GetOwningMol() mol.SetProp("_Name", smiles) mol_list.append(mol) return mol_list
def get_atom_type(self, atom: Chem.Atom) -> int: """ Get the atom type (represented as the index in self.atom_types) of the given atom `atom` Args: atom (Chem.Atom): The input atom Returns: int: The atom type as int """ atom_symbol = atom.GetSymbol() atom_charge = atom.GetFormalCharge() atom_hs = atom.GetNumExplicitHs() return self.atom_types.index((atom_symbol, atom_charge, atom_hs))
def get_substructures_from_atom( atom: Chem.Atom, max_size: int, substructure: Set[int] = None) -> Set[FrozenSet[int]]: """ Recursively gets all substructures up to a maximum size starting from an atom in a substructure. :param atom: The atom to start at. :param max_size: The maximum size of the substructure to fine. :param substructure: The current substructure that atom is in. :return: A set of substructures starting at atom where each substructure is a frozenset of indices. """ assert max_size >= 1 if substructure is None: substructure = {atom.GetIdx()} substructures = {frozenset(substructure)} if len(substructure) == max_size: return substructures # Get neighbors which are not already in the substructure new_neighbors = [ neighbor for neighbor in atom.GetNeighbors() if neighbor.GetIdx() not in substructure ] for neighbor in new_neighbors: # Define new substructure with neighbor new_substructure = deepcopy(substructure) new_substructure.add(neighbor.GetIdx()) # Skip if new substructure has already been considered if frozenset(new_substructure) in substructures: continue # Recursively get substructures including this substructure plus neighbor new_substructures = get_substructures_from_atom( neighbor, max_size, new_substructure) # Add those substructures to current set of substructures substructures |= new_substructures return substructures
def _AtomHallKierDeltas(atom: Chem.Atom, skipHs: bool = False) -> List[float]: """Calculate Kier & Hall atomic valence delta-values for molecular connectivity. From Kier L. and Hall L., J. Pharm. Sci. (1983), 72(10),1170-1173. """ global periodicTable res = [] n = atom.GetAtomicNum() if n > 1: nV = periodicTable.GetNOuterElecs(n) nHs = atom.GetTotalNumHs() if n < 10: res.append(float(nV - nHs)) else: res.append(float(nV - nHs) / float(n - nV - 1)) elif not skipHs: res.append(0.0) return res
def _get_triangle(self, first: Chem.Atom, second: Chem.Atom) -> Union[int, None]: """ Get the third atom... :param first: atom :param second: atom :return: atom index of third """ get_neigh_idxs = lambda atom: [neigh.GetIdx() for neigh in atom.GetNeighbors()] f_neighs = get_neigh_idxs(first) s_neighs = get_neigh_idxs(second) a = set(f_neighs) - {first.GetIdx(), second.GetIdx()} b = set(s_neighs) - {first.GetIdx(), second.GetIdx()} others = list(a.intersection(b)) if len(others) == 0: # is disjoined return None else: return others[0]
def _get_measurements(self, conf: Chem.Conformer, a: Chem.Atom, b: Chem.Atom, c: Chem.Atom, d: Chem.Atom): ai = a.GetIdx() bi = b.GetIdx() ci = c.GetIdx() di = d.GetIdx() dist = 0 angle = 0 tor = 0 try: dist = Chem.rdMolTransforms.GetBondLength(conf, ai, bi) angle = 180 - Chem.rdMolTransforms.GetAngleDeg(conf, ai, bi, ci) tor = Chem.rdMolTransforms.GetDihedralDeg(conf, ai, bi, ci, di) except ValueError: pass if str(tor) == 'nan': #quicker than isnan. tor = 0 return self._Measure(distance=dist, angle=angle, torsion=tor)
def _parse_atom(self, atom: Chem.Atom) -> None: if atom.GetSymbol() == '*': neighbor = atom.GetNeighbors()[0] n_name = self._get_PDBInfo_atomname(neighbor) if self.is_aminoacid() and neighbor.GetSymbol() == 'N': # atom_name, index, connect_type, connect_name self.CONNECT.append([n_name, 1, 'LOWER_CONNECT', 'LOWER']) elif self.is_aminoacid() and neighbor.GetSymbol() == 'C': # atom_name, index, connect_type, connect_name self.CONNECT.append([n_name, 2, 'UPPER_CONNECT', 'UPPER']) elif self.is_aminoacid(): i = max(3, len(self.CONNECT) + 1) self.CONNECT.append([n_name, i, 'CONNECT']) else: self.CONNECT.append([n_name, len(self.CONNECT) + 1, 'CONNECT']) else: d = self._get_atom_descriptors(atom) self.ATOM.append(d)
def get_hybridization(self, atom: Atom): hyb = str(atom.GetHybridization()) hyb = hyb.strip("SP") hyb = "1" if hyb == "" else hyb assert hyb in ["1", "2", "3"], "No hybridization assigned for the atom!" return hyb
def atom_type_CO(atom: Chem.Atom) -> List[bool]: """Returns a one-hot list of length 2 for whether `atom` is a carbon or oxygen atom. """ anum = atom.GetSymbol() atom_feats = [ anum == 'C', anum == 'O', ] return atom_feats