def test_custom_kekulize(): smiles = 'CC=C1c2ccccc2C(=CC)c3ccccc13' smiles = 'N#CC1=C(SCC(=O)Nc2cccc(Cl)c2)N=C([O-])[C@H](C#N)C12CCCCC2' mol = MolFromSmiles(smiles) display(mol) for atom_idx in range(0,mol.GetNumAtoms()): bonds = mol.GetAtomWithIdx(atom_idx).GetBonds() for bond in bonds: print(bond.GetBondType()) non_aromatic_atoms = find_custom_Kekulize_set(mol, 60, 5) mol = custom_kekulize(mol,non_aromatic_atoms) display(mol) for atom_idx in range(0,mol.GetNumAtoms()): bonds = mol.GetAtomWithIdx(atom_idx).GetBonds() for bond in bonds: print(bond.GetBondType())
def test_force_Kekulize(): df = pd.read_csv('All_Moles_Tested_Data.csv') i= 0 mol_list = [] for smile in df['smiles']: mol = MolFromSmiles(smile) x = find_custom_Kekulize_set(smile, max_atoms= 60, max_degree= 5,printMe = False) for index in x: mol.GetAtomWithIdx(index).SetAtomicNum(32) mol_list.append(mol) df['mol'] = pd.DataFrame({'mol':mol_list}) unit = 5 for i in range(0,len(df)//unit): display(PandasTools.FrameToGridImage(df.iloc[i*unit:i*unit+unit],column='mol', legendsCol='',molsPerRow=unit)) if((len(df)%unit>0)*1): display(PandasTools.FrameToGridImage(df.iloc[len(df)//unit*unit:len(df)],column='mol', legendsCol='',molsPerRow=unit))
def generate_graph(smiles, label=None): mol = MolFromSmiles(smiles) if not mol: raise ValueError("Could not parse SMILES string:", smiles) SYMBOL = [ 'B', 'C', 'N', 'O', 'F', 'Si', 'P', 'S', 'Cl', 'As', 'Se', 'Br', 'Te', 'I', 'At', 'other' ] HYBRIDIZATION = [ Chem.rdchem.HybridizationType.SP, Chem.rdchem.HybridizationType.SP2, Chem.rdchem.HybridizationType.SP3, Chem.rdchem.HybridizationType.SP3D, Chem.rdchem.HybridizationType.SP3D2, 'other', ] num_atom = Chem.RemoveHs(mol).GetNumAtoms() symbol = np.zeros((num_atom, 16), np.uint8) hybridization = np.zeros((num_atom, 6), np.uint8) degree = np.zeros((num_atom, 6), np.uint8) num_h = np.zeros((num_atom, 5), np.uint8) chirality = np.zeros((num_atom, 3), np.uint8) aromatic = np.zeros((num_atom, 1), np.uint8) formal_charge = np.zeros((num_atom, 1), np.float32) radical_electrons = np.zeros((num_atom, 1), np.float32) for i in range(num_atom): atom = mol.GetAtomWithIdx(i) symbol[i] = one_of_k_encoding_unk(atom.GetSymbol(), SYMBOL) hybridization[i] = one_of_k_encoding_unk(atom.GetHybridization(), HYBRIDIZATION) degree[i] = one_of_k_encoding_unk(atom.GetDegree(), [0, 1, 2, 3, 4, 5]) num_h[i] = one_of_k_encoding_unk( atom.GetTotalNumHs(includeNeighbors=True), [0, 1, 2, 3, 4]) try: chirality[i] = one_of_k_encoding_unk(atom.GetProp('_CIPCode'), ['R', 'S', 'unknown']) except: chirality[i] = [0, 0, 0] aromatic[i] = atom.GetIsAromatic() formal_charge[i] = atom.GetFormalCharge() radical_electrons[i] = atom.GetNumRadicalElectrons() # abundant features # won't bring substantial change to predictive performance, sometimes even worse AtomicWeight = np.zeros((num_atom, 1), np.float32) AtomicNumber = np.zeros((num_atom, 1), np.float32) Rvdw = np.zeros((num_atom, 1), np.float32) RCovalent = np.zeros((num_atom, 1), np.float32) DefaultValence = np.zeros((num_atom, 1), np.float32) valence = np.zeros((num_atom, 1), np.float32) NOuterElecs = np.zeros((num_atom, 1), np.float32) ring = np.zeros((num_atom, 7), np.uint8) acceptor = np.zeros((num_atom, 1), np.uint8) donor = np.zeros((num_atom, 1), np.uint8) for i in range(num_atom): atom = mol.GetAtomWithIdx(i) AtomicNum = atom.GetAtomicNum() AtomicNumber[i] = AtomicNum AtomicWeight[i] = Chem.GetPeriodicTable().GetAtomicWeight(AtomicNum) Rvdw[i] = Chem.GetPeriodicTable().GetRvdw( AtomicNum) # (van der Waals radius) RCovalent[i] = Chem.GetPeriodicTable().GetRcovalent( AtomicNum) #(covalent radius) DefaultValence[i] = Chem.GetPeriodicTable().GetDefaultValence( AtomicNum) valence[i] = atom.GetExplicitValence() NOuterElecs[i] = Chem.GetPeriodicTable().GetNOuterElecs(AtomicNum) ring[i] = [int(atom.IsInRing()), int(atom.IsInRingSize(3)), \ int(atom.IsInRingSize(4)), int(atom.IsInRingSize(5)), \ int(atom.IsInRingSize(6)), int(atom.IsInRingSize(7)), int(atom.IsInRingSize(8))] factory = ChemicalFeatures.BuildFeatureFactory( os.path.join(RDConfig.RDDataDir, 'BaseFeatures.fdef')) feature = factory.GetFeaturesForMol(mol) for t in range(0, len(feature)): if feature[t].GetFamily() == 'Donor': for i in feature[t].GetAtomIds(): donor[i] = 1 elif feature[t].GetFamily() == 'Acceptor': for i in feature[t].GetAtomIds(): acceptor[i] = 1 num_bond = mol.GetNumBonds() if num_bond == 0: num_bond = 1 # except error caused by CH4, NH3 bond_feat = np.zeros((num_bond * 2, 10), np.int16) bond_index = np.zeros((num_bond * 2, 2), np.int16) BOND_TYPE = [ Chem.rdchem.BondType.SINGLE, Chem.rdchem.BondType.DOUBLE, Chem.rdchem.BondType.TRIPLE, Chem.rdchem.BondType.AROMATIC, ] BOND_STEREO = ["STEREONONE", "STEREOANY", "STEREOZ", "STEREOE"] ij = 0 for i in range(num_atom): for j in range(num_atom): if i == j: continue bond = mol.GetBondBetweenAtoms(i, j) if bond is not None: atom1 = mol.GetAtomWithIdx(i) atom2 = mol.GetAtomWithIdx(j) bond_index[ij] = [i, j] bond_type = one_of_k_encoding(bond.GetBondType(), BOND_TYPE) bond_ring = [bond.GetIsConjugated(), bond.IsInRing()] bond_stereo = one_of_k_encoding(str(bond.GetStereo()), BOND_STEREO) bond_feat[ij] = bond_type + bond_ring + bond_stereo ij += 1 graph = Graph( smiles, [symbol, hybridization, degree, num_h, chirality, aromatic, formal_charge, radical_electrons, \ AtomicWeight, AtomicNumber, Rvdw, RCovalent, DefaultValence, valence, NOuterElecs, ring, acceptor, donor], bond_feat, bond_index, np.array(label).reshape((1, 1)), ) return graph