def test_wctype(self): mol = reader.mol_from_text(MOL["wctype_C_alip"]) wclogp.assign_wctype(mol) self.assertEqual( [a.wctype for _, a in mol.atoms_iter()], ["C2", "C1", "C7", "C4", "N2", "C3", "N1", "C7", "C5", "O9", "C27", "Me1", "C27", "C27", "C27", "H1", "C6", "C26", "C21", "C21", "N12", "C18", "C21", "C21", "C11", "O3", "C12", "C1", "C1", "C1", "C9", "C10", "C1"]) mol = reader.mol_from_text(MOL["wctype_C_arom"]) wclogp.assign_wctype(mol) self.assertEqual( [a.wctype for _, a in mol.atoms_iter()], ["C21", "C23", "C22", "C24", "C19", "C19", "C14", "C20", "C25", "O1", "O8", "O2", "N3", "S1", "C20", "C15", "C16", "C17", "C13", "C18", "Cl", "Br", "I", "C8", "P", "F"]) mol = reader.mol_from_text(MOL["wctype_N"]) wclogp.assign_wctype(mol) self.assertEqual( [a.wctype for _, a in mol.atoms_iter()], ["N11", "C21", "C22", "C18", "C22", "C22", "N4", "C4", "N8", "C4", "N6", "N7", "N5", "C3", "N13", "C3", "C3", "C3", "C3", "N6", "N10", "C7", "N9", "N14", "N14"]) mol = reader.mol_from_text(MOL["wctype_OS"]) wclogp.assign_wctype(mol) self.assertEqual( [a.wctype for _, a in mol.atoms_iter()], ["S2", "C4", "N13", "P", "C4", "O6", "O5", "O7", "C5", "O9", "O3", "C5", "O4", "C23", "S3", "C23", "C24", "C21", "O2a", "O11", "C5", "O12", "O10", "O4", "S1", "O2a", "O2a"])
def test_formula(self): mol = reader.mol_from_text(MOL["Phe"]) self.assertEqual(molutil.formula(mol), "C9H11NO2") mol = reader.mol_from_text(MOL["KCl"]) self.assertEqual(molutil.formula(mol), "Cl.K") # longer text first mol = smiles_to_compound("CCO.O.O") self.assertEqual(molutil.formula(mol), "C2H6O.2H2O") mol = smiles_to_compound("CCCSCC(Cl)C(O)O") self.assertEqual(molutil.formula(mol), "C6H13O2SCl")
def test_lys_arg(self): compound = reader.mol_from_text(MOL["Lys"]) print("lys") mpl = Matplotlib(compound) mpl.save("_test_Lys.png") compound = reader.mol_from_text(MOL["Arg"]) print("arg") mpl = Matplotlib(compound) mpl.save("_test_Arg.png")
def test_assign_rotatable(self): mol = reader.mol_from_text(MOL["Phe"]) descriptor.assign_rotatable(mol) self.assertEqual(molutil.rotatable_count(mol), 3) mol = reader.mol_from_text(MOL["KCl"]) self.assertEqual(molutil.rotatable_count(mol), 0) mol = reader.mol_from_text(MOL["Dipyridamole"]) self.assertEqual(molutil.rotatable_count(mol), 12) mol = reader.mol_from_text(MOL["Paclitaxel"]) self.assertEqual(molutil.rotatable_count(mol), 15)
def test_mcsdr1(self): # TODO: pi mismatch is not acceptable mol1 = reader.mol_from_text(MOL["Phe"]) mol2 = reader.mol_from_text(MOL["Arg"]) arr1 = mcsdr.DescriptorArray(mol1) arr2 = mcsdr.DescriptorArray(mol2) self.assertEqual(mcsdr.from_array(arr1, arr2).edge_count(), 5) # Delta-y exchange will not occur due to distance descriptor mol1 = smiles_to_compound("C1OC1CCC(=O)O") mol2 = smiles_to_compound("CC(O)CCC(=O)O") arr1 = mcsdr.DescriptorArray(mol1) arr2 = mcsdr.DescriptorArray(mol2) self.assertEqual(mcsdr.from_array(arr1, arr2).edge_count(), 7)
def test_clique_dist(self): sq = "./datasource/DrugBank_FDA_Approved.sqlite3" from cheddar.data.sqliteconnection import CON CON.connect(sq, "DrugBank_FDA_Approved") mcol = CON.columns.index("Mol_Block") d = {} # small d["s1"] = CON.find_by("DRUGBANK_ID", "DB00120") d["s2"] = CON.find_by("DRUGBANK_ID", "DB00968") # mid d["m1"] = CON.find_by("DRUGBANK_ID", "DB00279") d["m2"] = CON.find_by("DRUGBANK_ID", "DB00451") # large d["l1"] = CON.find_by("DRUGBANK_ID", "DB00881") d["l2"] = CON.find_by("DRUGBANK_ID", "DB00691") # cyclic polypeptide d["c1"] = CON.find_by("DRUGBANK_ID", "DB00093") d["c2"] = CON.find_by("DRUGBANK_ID", "DB00035") # porphyrin d["p1"] = CON.find_by("DRUGBANK_ID", "DB00115") d["p2"] = CON.find_by("DRUGBANK_ID", "DB00200") # long chain sugar d["inu"] = CON.find_by("DRUGBANK_ID", "DB00638") d["s1"] = mcsdr.comparison_array(reader.mol_from_text(d["s1"][mcol])) d["s2"] = mcsdr.comparison_array(reader.mol_from_text(d["s2"][mcol])) print(len(d["s1"][0]), d["s1"][1], len(d["s2"][0]), d["s2"][1]) print(mcsdr.mcs_score(d["s1"], d["s2"])) # print(mcs_dist(d["s1"], d["s2"])) d["m1"] = mcsdr.comparison_array(reader.mol_from_text(d["m1"][mcol])) d["m2"] = mcsdr.comparison_array(reader.mol_from_text(d["m2"][mcol])) print(len(d["m1"][0]), d["m1"][1], len(d["m2"][0]), d["m2"][1]) print(mcsdr.mcs_score(d["m1"], d["m2"])) # print(mcs_dist(d["m1"], d["m2"])) d["l1"] = mcsdr.comparison_array(reader.mol_from_text(d["l1"][mcol])) d["l2"] = mcsdr.comparison_array(reader.mol_from_text(d["l2"][mcol])) print(len(d["l1"][0]), d["l1"][1], len(d["l2"][0]), d["l2"][1]) print(mcsdr.mcs_score(d["l1"], d["l2"])) # print(mcs_dist(d["l1"], d["l2"])) d["c1"] = mcsdr.comparison_array(reader.mol_from_text(d["c1"][mcol])) d["c2"] = mcsdr.comparison_array(reader.mol_from_text(d["c2"][mcol])) print(len(d["c1"][0]), d["c1"][1], len(d["c2"][0]), d["c2"][1]) print(mcsdr.mcs_score(d["c1"], d["c2"])) # print(mcs_dist(d["c1"], d["c2"])) dp1 = mcsdr.comparison_array(reader.mol_from_text(d["p1"][mcol])) dp2 = mcsdr.comparison_array(reader.mol_from_text(d["p2"][mcol])) print(len(dp1[0]), dp1[1], len(dp2[0]), dp2[1]) print(mcsdr.mcs_score(dp1, dp2)) """
def test_timeout(self): mol = reader.mol_from_text(MOL["Buckminsterfullerene"]) tout = mcsdr.DescriptorArray(mol, timeout=0.1) self.assertFalse(tout.valid) arr = mcsdr.DescriptorArray(mol, diameter=5, timeout=0.1) sim = mcsdr.from_array(arr, arr, timeout=0.1) self.assertGreater(sim.local_sim(), 0)
def test_mcsdr2(self): # Disconnected mol1 = smiles_to_compound("C1CCCC1CCCC(=O)O") mol2 = reader.mol_from_text(MOL["CaAcO2"]) arr1 = mcsdr.DescriptorArray(mol1) arr2 = mcsdr.DescriptorArray(mol2) self.assertEqual(mcsdr.from_array(arr1, arr2).edge_count(), 3) # No line graph mol1 = smiles_to_compound("CO") mol2 = smiles_to_compound("CC") arr1 = mcsdr.DescriptorArray(mol1) arr2 = mcsdr.DescriptorArray(mol2) self.assertEqual(mcsdr.from_array(arr1, arr2).edge_count(), 0) # TODO: minimum MCS edge size is 2 mol1 = smiles_to_compound("CCO") mol2 = smiles_to_compound("CCC") arr1 = mcsdr.DescriptorArray(mol1) arr2 = mcsdr.DescriptorArray(mol2) self.assertEqual(mcsdr.from_array(arr1, arr2).edge_count(), 0) # This works well mol1 = smiles_to_compound("CCCO") mol2 = smiles_to_compound("CCCC") arr1 = mcsdr.DescriptorArray(mol1) arr2 = mcsdr.DescriptorArray(mol2) self.assertEqual(mcsdr.from_array(arr1, arr2).edge_count(), 2) # TODO: pitfall in line graph mol1 = smiles_to_compound("CO(C)(C)C") mol2 = smiles_to_compound("OC(O)(O)O") arr1 = mcsdr.DescriptorArray(mol1) arr2 = mcsdr.DescriptorArray(mol2)
def test_json(self): m = reader.mol_from_text(MOL["Cyanocobalamin"]) d = m.jsonized() j = json.dumps(d) # Compressed file size """ from chorus.util import debug print("Original: {} Bytes".format(debug.total_size(m))) print("Dict: {} Bytes".format(debug.total_size(d))) print("JSON: {} Bytes".format(debug.total_size(j))) print("Pickled: {} Bytes".format( debug.total_size(pickle.dumps(m)))) print("Pickled dict: {} Bytes".format( debug.total_size(pickle.dumps(d)))) """ # Results """ Original: 144414 Bytes Pickled: 30033 Bytes Dict: 195291 Bytes JSON: 32373 Bytes Pickled dict: 22943 Bytes """ m2 = Compound(json.loads(j)) # Atom key should be integer, not string self.assertIsInstance(next(iter(m2.graph.node.keys())), int) self.assertIsInstance(next(iter(m2.graph.adj.keys())), int) self.assertEqual(molutil.mw(m), molutil.mw(m2)) self.assertTrue(substructure.equal(m, m2))
def test_format_stereo(self): m = reader.mol_from_text(MOL["Phe"]) m.bond(3, 5).is_lower_first = 1 m.bond(3, 5).type = 2 helper.spine_to_terminal_wedge(m) self.assertEqual(m.bond(3, 5).is_lower_first, 0) self.assertEqual(m.bond(3, 5).type, 1)
def test_scale_and_center(self): m2 = reader.mol_from_text(MOL["Goserelin"]) helper.scale_and_center(m2) self.assertAlmostEqual(m2.size2d[0], 15.21, 2) self.assertAlmostEqual(m2.size2d[1], 16.06, 2) self.assertAlmostEqual(m2.size2d[2], 0.83, 2) m3 = reader.mol_from_text(MOL["KCl"]) helper.scale_and_center(m3) self.assertAlmostEqual(m3.size2d[0], 1.0, 2) self.assertAlmostEqual(m3.size2d[1], 0, 2) self.assertAlmostEqual(m3.size2d[2], 0.71, 2) m = smilessupplier.smiles_to_compound("[K+].[Cl-]") # TODO: overlap calc2dcoords.calc2dcoords(m) helper.scale_and_center(m) self.assertAlmostEqual(m.size2d[0], 0, 2) self.assertAlmostEqual(m.size2d[1], 0, 2) self.assertAlmostEqual(m.size2d[2], 1, 2)
def test_composition(self): mol = reader.mol_from_text(MOL["Phe"]) self.assertEqual(molutil.composition(mol), { 'H': 11, 'C': 9, 'O': 2, 'N': 1 })
def test_mcsdr2(self): # Disconnected mol1 = smiles_to_compound("C1CCCC1CCCC(=O)O") mol2 = reader.mol_from_text(MOL["CaAcO2"]) arr1 = mcsdr.comparison_array(mol1) arr2 = mcsdr.comparison_array(mol2) self.assertEqual(mcsdr.local_sim(arr1, arr2)["mcsdr_edges"], 3) # No line graph mol1 = smiles_to_compound("CO") mol2 = smiles_to_compound("CC") arr1 = mcsdr.comparison_array(mol1) arr2 = mcsdr.comparison_array(mol2) self.assertEqual(mcsdr.local_sim(arr1, arr2)["mcsdr_edges"], 0)
def test_format_double_bond(self): m = reader.mol_from_text(MOL["Phe"]) helper.scale_and_center(m) self.assertEqual(m.bond(8, 11).type, 0) self.assertEqual(m.bond(12, 10).type, 0) self.assertEqual(m.bond(7, 6).type, 0) self.assertEqual(m.bond(2, 9).type, 0) helper.format_ring_double_bond(m) self.assertEqual(m.bond(8, 11).type, 1) self.assertEqual(m.bond(12, 10).type, 0) self.assertEqual(m.bond(7, 6).type, 0) helper.equalize_terminal_double_bond(m) self.assertEqual(m.bond(2, 9).type, 2)
def test_draw_mol(self): compound = reader.mol_from_text(MOL["demo"]) # compound = reader.mol_from_text(MOL["Phe"]) # Small # compound = reader.mol_from_text(MOL["Goserelin"]) # Large # compound = reader.mol_from_text(MOL["CyclosporinA"]) # Atypical bond len # compound = reader.mol_from_text(MOL["Carbidopa"]) # Isolated components # compound = reader.mol_from_text(MOL["Gadodiamide"]) # Charged # compound = reader.mol_from_text(MOL["Premarin"]) # Stereo # compound = reader.mol_from_text(MOL["Nitroprusside"]) # Transition metal # compound = reader.mol_from_text(MOL["Fondaparinux"]) # Multi-line props # compound = reader.mol_from_text(MOL["KCl"]) # No bond, width = 0 # compound = reader.mol_from_text(MOL["Cyanocobalamin"]) svg = SVG(compound)
def test_recognize(self): # Phenylalanin mol = reader.mol_from_text(MOL["Phe"]) self.assertTrue( self.equivalent_ring(mol.rings[0], [8, 11, 12, 10, 7, 6])) self.assertEqual(mol.scaffolds, [[0]]) self.assertEqual(mol.isolated, []) # Premarin mol = reader.mol_from_text(MOL["Premarin"]) for a, b in zip( sorted(mol.rings), sorted([[10, 9, 8, 7, 4, 5], [10, 14, 13, 12, 11, 9], [24, 25, 26, 12, 11], [2, 3, 4, 5, 6, 1]])): self.assertTrue(self.equivalent_ring(a, b)) self.assertEqual(mol.scaffolds, [[0, 1, 2, 3]]) self.assertEqual(mol.isolated, [[28]]) # Pyrene mol = smiles_to_compound("C12=CC=C3C=CC=C4C=CC(C2=C34)=CC=C1") self.assertEqual([len(r) for r in mol.rings], [6, 6, 6, 6]) self.assertEqual(mol.scaffolds, [[0, 1, 2, 3]]) self.assertEqual(mol.isolated, []) # KCl mol = reader.mol_from_text(MOL["KCl"]) self.assertEqual(mol.rings, []) self.assertEqual(mol.scaffolds, []) self.assertEqual(mol.isolated, [[2]]) # Goserelin mol = reader.mol_from_text(MOL["Goserelin"]) self.assertEqual(sorted([len(r) for r in mol.rings]), [5, 5, 5, 5, 6, 6]) self.assertEqual(Counter([len(s) for s in mol.scaffolds]), { 1: 4, 2: 1 }) # Tetrahedrane (K4 graph) mol = smiles_to_compound("C12C3C1C23") self.assertEqual([len(r) for r in mol.rings], [3, 3, 3])
def test_fmcs(self): mol1 = reader.mol_from_text(MOL["Phe"]) mol2 = reader.mol_from_text(MOL["Arg"]) self.assertEqual(rdkit.fmcs(mol1, mol2)["mcs_edges"], 7) mol1 = reader.mol_from_text(MOL["Lys"]) mol2 = reader.mol_from_text(MOL["Formestane"]) self.assertEqual(rdkit.fmcs(mol1, mol2)["similarity"], 0.194) # null molecule mol1 = reader.mol_from_text(MOL["null"]) mol2 = reader.mol_from_text(MOL["Phe"]) mcs = rdkit.fmcs(mol1, mol2) self.assertEqual(mcs["mcs_edges"], 0) self.assertEqual(mcs["similarity"], 0)
def test_minify_ring(self): # Cyanocobalamin mol = reader.mol_from_text(MOL["Cyanocobalamin"]) self.assertEqual(Counter([len(r) for r in mol.rings]), { 5: 7, 6: 4, 19: 1 }) # Rifabutin # TODO: this can be [5, 5, 6, 6, 6, 25] or [5, 5, 6, 6, 6, 24] # mol = reader.mol_from_text(MOL["Rifabutin"]) # self.assertEqual(sorted([len(r) for r in mol.rings]), # [5, 5, 6, 6, 6, 24]) # Cubane mol = smiles_to_compound("C12C3C4C1C5C4C3C25") self.assertEqual(Counter([len(r) for r in mol.rings]), {4: 5}) # Pinene mol = smiles_to_compound("CC1(C2CCC(=C)C1C2)C") self.assertEqual(Counter([len(r) for r in mol.rings]), {4: 1, 6: 1})
def test_fingerprint(self): mol1 = reader.mol_from_text(MOL["Goserelin"]) mol2 = reader.mol_from_text(MOL["Goserelin"]) self.assertEqual(indigo.fingerprint_similarity(mol1, mol2), 1) mol2 = reader.mol_from_text(MOL["Formestane"]) self.assertEqual(indigo.fingerprint_similarity(mol1, mol2), 0.23)
def test_to_real_mol(self): mol = reader.mol_from_text(MOL["Phe"]) idmol = indigo.to_real_mol(mol)
def test_hide_carbon(self): m = reader.mol_from_text(MOL["Phe"]) self.assertTrue(m.atom(3).visible) self.assertFalse(m.atom(6).visible)
def test_pickle(self): # Compound is picklable m = reader.mol_from_text(MOL["Phe"]) dmp = pickle.dumps(m) m2 = pickle.loads(dmp) self.assertEqual(len(m), len(m2))
def test_fmcs_timeout(self): mol1 = reader.mol_from_text(MOL["Docetaxel"]) mol2 = reader.mol_from_text(MOL["Paclitaxel"]) res = rdkit.fmcs(mol1, mol2, timeout=1) self.assertTrue(res["canceled"])
def test_morgan(self): mol1 = reader.mol_from_text(MOL["Goserelin"]) mol2 = reader.mol_from_text(MOL["Formestane"]) self.assertEqual(rdkit.morgan_sim(mol1, mol2, 2), 0.107)
def test_to_rdmol(self): mol = reader.mol_from_text(MOL["Phe"]) rdmol = rdkit.to_rdmol(mol) self.assertEqual(rdmol.GetNumBonds(), mol.bond_count()) self.assertAlmostEqual(Descriptors.MolWt(rdmol), molutil.mw(mol), 2)
def test_remove_coordinated_metal(self): mol = reader.mol_from_text(MOL["Cyanocobalamin"]) self.assertEqual(len(mol), 95) remover.remove_coordinated_metal(mol) self.assertEqual(len(mol), 94)
def test_get_size(self): compound = reader.mol_from_text(MOL["demo"]) mpl = Matplotlib(compound) self.assertEqual(mpl.get_size(), (733, 733), 2)
def test_colesevelam(self): """ Polymer Expression not supported yet """ with self.assertRaises(ValueError): next(reader.mols_from_text(MOL["Colesevelam"], False)) with self.assertRaises(ValueError): reader.mol_from_text(MOL["Colesevelam"])
import chorus import networkx as nx from chorus.demo import MOL from chorus import v2000reader as reader from chorus.draw.svg import SVG mol = reader.mol_from_text(MOL["demo"]) svg = SVG(mol) svg.contents() svg.data_url_scheme() svg.save("demo.svg") import networkx as nx g = nx.Graph() g.add_node('dinghao', value=1) g.add_node('jeff', value=1) print('networkx version:', nx.__version__) print(g.nodes)
def test_sdf(self): compound = reader.mol_from_text(MOL["Phe"]) print(writer.mols_to_text([compound]))