def test_SmartsMolFilter(self): smis = ['C1CCC1', 'C1CCC1C=O', 'CCCC', 'CCC=O', 'CC(=O)C', 'CCN', 'NCCN', 'NCC=O'] mols = [Chem.MolFromSmiles(x) for x in smis] suppl = SupplyNode(contents=mols) self.assertEqual(len(list(suppl)), 8) smas = ['C=O', 'CN'] counts = [1, 2] filt = SmartsMolFilter.SmartsFilter(patterns=smas, counts=counts) filt.AddParent(suppl) self.assertEqual(len(list(filt)), 5) suppl.reset() filt.SetNegate(True) self.assertEqual(len(list(filt)), 3) smas = ['C=O', 'CN'] filt = SmartsMolFilter.SmartsFilter(patterns=smas) filt.AddParent(suppl) self.assertEqual(len(list(filt)), 6) self.assertRaises(ValueError, SmartsMolFilter.SmartsFilter, patterns=smas, counts=['notEnough', ]) RDLogger.DisableLog('rdApp.error') self.assertRaises(ValueError, SmartsMolFilter.SmartsFilter, patterns=['BadSmarts']) RDLogger.EnableLog('rdApp.error')
def as_atom(symbol): # Temporarily disable rdkit's logging to avoid spamming with # "WARNING: not removing hydrogen atom without neighbors" RDLogger.DisableLog('rdApp.warning') mol = Chem.MolFromSmiles(f'[{symbol}]') RDLogger.EnableLog('rdApp.warning') return mol.GetAtoms()[0]
def test_SmartsRemover(self): salts = ['[Cl;H1&X1,-]', '[Na+]', '[O;H2,H1&-,X0&-2]', 'BadSmarts'] RDLogger.DisableLog('rdApp.error') self.assertRaises(ValueError, SmartsRemover.SmartsRemover, patterns=salts) RDLogger.EnableLog('rdApp.error')
def test_PatternHolder(self): fname = os.path.join(os.environ["RDBASE"], "Data", "NCI", "first_5K.smi") suppl = Chem.SmilesMolSupplier(fname, delimiter="\t", titleLine=False) mols1 = rdSubstructLibrary.CachedTrustedSmilesMolHolder() fps1 = rdSubstructLibrary.PatternHolder(2048) ssslib1 = rdSubstructLibrary.SubstructLibrary(mols1, fps1) mols2 = rdSubstructLibrary.CachedTrustedSmilesMolHolder() fps2 = rdSubstructLibrary.PatternHolder() ssslib2 = rdSubstructLibrary.SubstructLibrary(mols2, fps2) RDLogger.DisableLog('rdApp.error') for i in range(0, 1000, 10): try: mol = suppl[i] except Exception: continue if (not mol): continue mols1.AddSmiles(Chem.MolToSmiles(mol)) fps1.AddFingerprint(fps1.MakeFingerprint(mol)) ssslib2.AddMol(mol) RDLogger.EnableLog('rdApp.error') query = Chem.MolFromSmarts("N") self.assertIsNotNone(query) matches1 = sorted(ssslib1.GetMatches(query)) matches2 = sorted(ssslib2.GetMatches(query)) self.assertEqual(len(matches1), len(matches2)) self.assertTrue(all([m1 == matches2[i] for i, m1 in enumerate(matches1)]))
def _gen_compound(mol): rkl.DisableLog("rdApp.*") try: if explicit_h: mol = RemoveHs(mol) # resolve potential tautomers and choose first one mol_smiles = MolToSmiles(mol, True) if "n" in mol_smiles: mol_smiles = utils.postsanitize_smiles([mol_smiles])[0][0] mol = MolFromSmiles(mol_smiles) SanitizeMol(mol) # TODO: logger # Get lots of "Explicit valence greater than permitted" errors here # This is for predicted compounds that are infeasible, so we throw them out except BaseException: return None rkl.EnableLog("rdApp.*") mol_smiles = MolToSmiles(mol, True) if "." in mol_smiles: return None cpd_id, inchi_key = utils.get_compound_hash(mol_smiles, "Predicted") if cpd_id: if cpd_id not in local_cpds: cpd_dict = { "ID": None, "_id": cpd_id, "SMILES": mol_smiles, "InChI_key": inchi_key, "Type": "Predicted", "Generation": generation, "atom_count": utils.get_atom_count(mol), "Reactant_in": [], "Product_of": [], "Expand": True, "Formula": CalcMolFormula(mol), "last_tani": 0, } else: cpd_dict = local_cpds[cpd_id] return cpd_dict else: return None
def search(query: str, min_mw: float, max_mw: float, layout: widgets.Box) -> None: with get_new_log_box(layout): clear_search_output(layout) results = get_synonym_matches(query) for cur in results: RDLogger.DisableLog("rdApp.*") # hide rdkit warnings cur["mol"] = cheminfo.normalize_molecule( Chem.inchi.MolFromInchi(cur["inchi"])) cur["norm_inchi"] = Chem.inchi.MolToInchi(cur["mol"]) RDLogger.EnableLog("rdApp.*") cur["MW"] = ExactMolWt(cur["mol"]) filtered = filter_by_mw(filter_to_norm_inchi_in_db(results), min_mw, max_mw) logger.debug("Found %d matches to %s.", len(filtered), query) if not is_valid_num_results(len(filtered), query, layout): return final = sorted(filtered, key=lambda x: x["MW"]) logger.debug("Num mols: %d", len(final)) column_names = ["", "Name", "MW", "Structure"] sheet = ipysheet.sheet( rows=len(final), columns=len(column_names), column_headers=column_names, column_resizing=False, column_width=[1, 4, 2, 10], ) buttons = [ widgets.Button(description="use", layout=widgets.Layout(width="100%")) for x in final ] for button in buttons: button.on_click( lambda current: on_use_button_clicked(current, final, layout)) ipysheet.column(0, buttons) ipysheet.column(1, [x["name"] for x in final]) ipysheet.column(2, [ExactMolWt(x["mol"]) for x in final]) ipysheet.column(3, [cheminfo.mol_to_image(x["mol"]) for x in final]) layout.children = swap_layout(layout.children, LayoutPosition.SEARCH_OUTPUT.value, sheet)
def test1InchiReadPubChem(self): for f in self.dataset.values(): same, diff, reasonable = 0, 0, 0 for m in f: if m is None: # pragma: nocover continue x = MolToInchi(m) y = None RDLogger.DisableLog('rdApp.error') mol = MolFromInchi(x) RDLogger.EnableLog('rdApp.error') if mol is not None: y = MolToInchi( MolFromSmiles(MolToSmiles(mol, isomericSmiles=True))) if y is None: # metal involved? try: MolToInchi(m, treatWarningAsError=True) except InchiReadWriteError as inst: _, error = inst.args if 'Metal' in error or \ 'Charges were rearranged' in error: reasonable += 1 continue # THERE ARE NO EXAMPLES FOR THE FOLLOWING (no coverage) # RDKit does not like the SMILES? use MolBlock instead inchiMol = MolFromInchi(x) if inchiMol: rdDepictor.Compute2DCoords(inchiMol) z = MolToInchi(MolFromMolBlock( MolToMolBlock(inchiMol))) if x == z: reasonable += 1 continue # InChI messed up the radical? unsanitizedInchiMol = MolFromInchi(x, sanitize=False) if sum([ a.GetNumRadicalElectrons() * a.GetAtomicNum() for a in m.GetAtoms() if a.GetNumRadicalElectrons() != 0 ]) != sum([ a.GetNumRadicalElectrons() * a.GetAtomicNum() for a in unsanitizedInchiMol.GetAtoms() if a.GetNumRadicalElectrons() != 0 ]): reasonable += 1 continue diff += 1 cid = m.GetProp('PUBCHEM_COMPOUND_CID') print(COLOR_GREEN + 'Empty mol for PubChem Compound ' + cid + '\n' + COLOR_RESET) continue if x != y: # if there was warning in the first place, then this is # tolerable try: MolToInchi(m, treatWarningAsError=True) MolFromInchi(x, treatWarningAsError=True) except InchiReadWriteError as inst: reasonable += 1 continue # or if there are big rings SanitizeMol(m) if filter(lambda i: i >= 8, [len(r) for r in m.GetRingInfo().AtomRings()]): reasonable += 1 continue # THERE ARE NO EXAMPLES FOR THE FOLLOWING (no coverage) # or if RDKit loses bond stereo s = MolToSmiles(m, True) if MolToSmiles(MolFromSmiles(s), True) != s: reasonable += 1 continue # or if it is RDKit SMILES writer unhappy about the mol inchiMol = MolFromInchi(x) rdDepictor.Compute2DCoords(inchiMol) z = MolToInchi(MolFromMolBlock(MolToMolBlock(inchiMol))) if x == z: reasonable += 1 continue diff += 1 print(COLOR_GREEN + 'Molecule mismatch for PubChem Compound ' + cid + COLOR_RESET) print(inchiDiff(x, y)) print() else: same += 1 fmt = "\n{0}InChI read Summary: {1} identical, {2} variance, {3} reasonable variance{4}" print(fmt.format(COLOR_GREEN, same, diff, reasonable, COLOR_RESET)) self.assertEqual(same, 621) self.assertEqual(diff, 0) self.assertEqual(reasonable, 560)
def tearDown(self): RDLogger.EnableLog('rdApp.warning') RDLogger.EnableLog('rdApp.error')