def testRSMItoRINCHI(self): data = [ ("C>N>O", "RInChI=1.00.1S/CH4/h1H4<>H2O/h1H2<>H3N/h1H3/d+"), ("O>N>C", "RInChI=1.00.1S/CH4/h1H4<>H2O/h1H2<>H3N/h1H3/d-"), ("O>>C", "RInChI=1.00.1S/CH4/h1H4<>H2O/h1H2/d-"), # The following is assumed to be d+ by analogy with # the empty reaction which is d+ ("O>>O", "RInChI=1.00.1S/H2O/h1H2<>H2O/h1H2/d+"), # Example: esterification of acetic acid ("OCC.CC(=O)O>S(=O)(=O)(O)O>CC(=O)OCC.O", "RInChI=1.00.1S/C2H4O2/c1-2(3)4/h1H3,(H,3,4)!C2H6O/c1-2-3/h3H,2H2,1H3<>C4H8O2/c1-3-6-4(2)5/h3H2,1-2H3!H2O/h1H2<>H2O4S/c1-5(2,3)4/h(H2,1,2,3,4)/d+"), # Example: alkaline ring opening ("CC[C@]1(C)O[C@H]1C.[OH-]>>CC[C@](C)(O)[C@@H](C)O", "RInChI=1.00.1S/C6H12O/c1-4-6(3)5(2)7-6/h5H,4H2,1-3H3/t5-,6-/m0/s1!H2O/h1H2/p-1<>C6H14O2/c1-4-6(3,8)5(2)7/h5,7-8H,4H2,1-3H3/t5-,6+/m1/s1/d+"), # Partial reactions (">>C1CC=C(O)CC1", "RInChI=1.00.1S/<>C6H10O/c7-6-4-2-1-3-5-6/h4,7H,1-3,5H2/d+"), ("C1CC=C(O)CC1>>", "RInChI=1.00.1S/<>C6H10O/c7-6-4-2-1-3-5-6/h4,7H,1-3,5H2/d-"), # The empty reaction (">>", "RInChI=1.00.1S//d+"), # Test 'no-structure' ("c1ccccc1C=C>>*", "RInChI=1.00.1S/<>C8H8/c1-2-8-6-4-3-5-7-8/h2-7H,1H2/d-/u1-0-0"), ("*>>C1CC=C(O)CC1", "RInChI=1.00.1S/<>C6H10O/c7-6-4-2-1-3-5-6/h4,7H,1-3,5H2/d+/u1-0-0"), ("O>*>C", "RInChI=1.00.1S/CH4/h1H4<>H2O/h1H2/d-/u0-0-1"), ("*.O>>C", "RInChI=1.00.1S/CH4/h1H4<>H2O/h1H2/d-/u0-1-0"), # Empty except for 'no-structures' (assumed) ("*>*>*", "RInChI=1.00.1S//d+/u1-1-1"), ] for eqm in [False, True]: for rsmi, rinchi in data: if eqm: output, error = run_exec('obabel -:%s -ismi -orinchi -xe' % rsmi) ans = rinchi.replace("/d-", "/d=").replace("/d+", "/d=") self.assertEqual(output.rstrip(), ans) else: output, error = run_exec('obabel -:%s -ismi -orinchi' % rsmi) self.assertEqual(output.rstrip(), rinchi)
def testSMILESto3DMDL(self): """Test interconversion between SMILES and 3D MDL""" data = [ ([0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 3]), # 'ClC=CF' ([0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0]), # 'Cl/C=C/F' ([0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0]), # 'Cl/C=C\\F' # The bond parities are irrelevant/meaningless for the next two ([0, 0, 0, 0, 1], []), # 'Cl[C@@](Br)(F)I' ([0, 0, 0, 0, 2], []), # 'Cl[C@](Br)(F)I' ([0, 0, 0, 0, 3], [0, 0, 0, 4]), # 'ClC(Br)(F)I' ([0, 0, 0, 1], []), # 'O=[S@@](Cl)I), ([0, 0, 0, 2], []), # 'O=[S@](Cl)I), ([0, 0, 0, 3], []), # 'O=S(Cl)I), ([0]*9, [0]*8 + [3]), # "IC=C1NC1" ([0]*9, [0]*9), # r"I/C=C\1/NC1" ([0]*9, [0]*9), # r"I/C=C/1\NC1" ] for i, (atompar, bondstereo) in enumerate(data): smiles, can = self.data[i][0:2] output, error = run_exec(smiles, "babel -ismi -osdf --gen3d") atoms, bonds = self.parseMDL(output) parities = [atom['parity'] for atom in atoms] parities.sort() stereos = [bond['stereo'] for bond in bonds] stereos.sort() self.assertEqual(atompar, parities) if bondstereo: self.assertEqual(bondstereo, stereos) output, error = run_exec(output, "babel -isdf -ocan") self.assertEqual(output.rstrip(), can)
def testSMILESto3DMDL(self): """Test interconversion between SMILES and 3D MDL""" data = [ ([0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 3]), # 'ClC=CF' ([0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 3]), # 'ClC=CF' ([0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0]), # 'Cl/C=C/F' ([0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0]), # 'Cl/C=C\\F' # The bond parities are irrelevant/meaningless for the next two ([0, 0, 0, 0, 1], []), # 'Cl[C@@](Br)(F)I' ([0, 0, 0, 0, 2], []), # 'Cl[C@](Br)(F)I' ([0, 0, 0, 0, 3], [0, 0, 0, 0]), # 'ClC(Br)(F)I' ([0, 0, 0, 1], []), # 'O=[S@@](Cl)I), ([0, 0, 0, 2], []), # 'O=[S@](Cl)I), ([0, 0, 0, 3], []), # 'O=S(Cl)I), ([0] * 9, [0] * 8 + [3]), # "IC=C1NC1" ([0] * 9, [0] * 9), # r"I/C=C\1/NC1" ([0] * 9, [0] * 9), # r"I/C=C/1\NC1" ] for i, (atompar, bondstereo) in enumerate(data): smiles, can = self.data[i][0:2] output, error = run_exec(smiles, "babel -ismi -osdf --gen3d") atoms, bonds = self.parseMDL(output) parities = [atom['parity'] for atom in atoms] parities.sort() stereos = [bond['stereo'] for bond in bonds] stereos.sort() self.assertEqual(atompar, parities) if bondstereo: self.assertEqual(bondstereo, stereos) output, error = run_exec(output, "obabel -isdf -as -ocan") # "-as" is necessary to identify the unknown stereo self.assertEqual(output.rstrip(), can)
def testSMILESto0DMDL(self): """Test interconversion between SMILES and 0D MDL""" data = [ ([0, 0, 0, 0, 1], [0, 0, 0, 0]), # 'Cl[C@@](Br)(F)I' ([0, 0, 0, 0, 2], [0, 0, 0, 0]), # 'Cl[C@](Br)(F)I' ([0, 0, 0, 0, 0], [0, 0, 0, 0]), # 'ClC(Br)(F)I' ([0, 0, 0, 0, 3], [0, 0, 0, 0]) # 'ClC(Br)(F)I' option 'S' when reading ] for i, (atompar, bondstereo) in enumerate(data): if i == 3: smiles, can = self.data[6][0:2] output, error = run_exec(smiles, "babel -ismi -osdf -aS") else: smiles, can = self.data[i + 4][0:2] output, error = run_exec(smiles, "babel -ismi -osdf") atoms, bonds = self.parseMDL(output) parities = [atom['parity'] for atom in atoms] parities.sort() stereos = [bond['stereo'] for bond in bonds] stereos.sort() self.assertEqual(atompar, parities) self.assertEqual(bondstereo, stereos) output, error = run_exec(output, "babel -isdf -as -ocan") self.assertEqual(output.rstrip(), can)
def testSMIthruXML(self): """Verify that roundtripping through CML preserves stereo""" output, error = run_exec("\n".join(self.smiles), "babel -ismi -ocml tmp.cml") output, error = run_exec(output.rstrip(), "babel -icml tmp.cml -ocan") output = "\n".join([x.rstrip() for x in output.split("\n")]) self.assertEqual(output.rstrip(), "\n".join([self.cansmi] * len(self.smiles))) os.remove("tmp.cml")
def testSMItoSMI(self): """ Some initial tests based on previous geometry stereo bugs (i.e., OBBuilder can't handle correctly) """ self.canFindExecutable("obabel") # A series of aromatic strings, which should convert to themselves self.smiles = [ 'c1ccccc1', # benzene 'C/C=C\\C', # Z-butene 'C/C=C/C', # E-butene 'N[C@](Br)(O)C', #'CCC[C@@H]([C@H](CC(C)C)C)C', #'C1CC[C@H]2[C@@H](C1)CCCC2', # cis-decalin #'C1CC[C@@H]2[C@@H](C1)CCCC2', # trans-decalin #'CCCNC1=C(C)C(=O)C2=C(C1=O)[C@@H](COC(=O)N)[C@]1(N2C[C@H]2[C@H]1N2)OC', #'CN([C@H]1C(=O)C(=C([C@]2([C@@H]1C[C@@H]1Cc3c(C(=C1C2=O)O)c(O)ccc3N(C)C)O)O)C(=O)N)C', #'CN([C@@H]1C(=O)C(=C([C@@]2([C@H]1C[C@@H]1C(=C(O)c3c([C@@]1(C)O)c(Cl)ccc3O)C2=O)O)O)C(=O)N)C', #'C[C@@H](CC(=O)OC[C@@]12CC[C@H]3[C@@]([C@@H]2C[C@@H](O1)C1=CC(=O)O[C@H]1O)(C)CC[C@@H]1[C@]3(C)CCCC1(C)C)O', #'CC(=O)OC[C@@]12CC[C@H]3[C@@]([C@@H]2C[C@H](O1)C1=CC(=O)O[C@H]1O)(C)CC[C@@H]1[C@]3(C)CCCC1(C)C' ] for smi in self.smiles: # generate a canonical SMILES in case the ordering changes canSMI, error = run_exec(smi, "obabel -ismi -ocan") # generate a mol2 (any 3D format without implicit hydrogens) mol2, error = run_exec(smi, "obabel -ismi -osdf --gen3d dg") # now check if it matches the previous canonical SMILES output, error = run_exec(mol2, "obabel -isdf -ocan") self.assertEqual(output.split('\t')[0].rstrip(), canSMI.rstrip())
def testSingleHit(self): """PR#2955101 - Difficulty reading from a fastsearch index""" smiles = """C12(C(N(C(=O)C)c3c2cccc3)=O)Nc2c(ccc(c2N1)OCCCC)OCCCC n1c([nH]c(cc1c1ccccc1)=O)c1ccc(cc1)Br n1c(nc2c(c1N(C)C)cccc2)c1c(O)cccc1 C1(/[CH]2[CH]3\C(=C4/CC(C)(C)NC(C4)(C)C)C=C[CH]3[CH]1C=C2)=C1/CC(C)(C)NC(C1)(C)C n1c(c2ccc(C(=O)O)cc2)ccc(c1)CCCCC N1(C(CN(CC1=O)C(=O)C1CCCCC1)=O)CCc1ccccc1 S(N1[CH](c2ccccc2C=C1)C#N)(c1ccc(cc1)C)(=O)=O c12c(c(OC)c3c(c1OC)occ3)ccc(o2)=O c12c(O[CH](C1=O)C(C)C)cc1c(c2)ccc(=O)o1 c12[C]3([C@H]4([N@@](CCc1c1ccccc1[nH]2)C[C@H](C=C4CC)C3))C(=O)OC""" outputfile = open("ten.smi", "w") outputfile.write(smiles) outputfile.close() output, error = run_exec("babel ten.smi ten.fs") self.canFindFile("ten.fs") self.assertConverted(error, 10) query = "Nc2nc(c1ccccc1)nc3ccccc23" output, error = run_exec("babel ten.fs -ifs -s %s -osmi" % query) self.assertConverted(error, 1) output, error = run_exec("babel ten.fs -ifs -s %s -at 0.5 -aa -osmi" % query) self.assertConverted(error, 1)
def testSingleHit(self): """PR#2955101 - Difficulty reading from a fastsearch index""" smiles = """C12(C(N(C(=O)C)c3c2cccc3)=O)Nc2c(ccc(c2N1)OCCCC)OCCCC n1c([nH]c(cc1c1ccccc1)=O)c1ccc(cc1)Br n1c(nc2c(c1N(C)C)cccc2)c1c(O)cccc1 C1(/[CH]2[CH]3\C(=C4/CC(C)(C)NC(C4)(C)C)C=C[CH]3[CH]1C=C2)=C1/CC(C)(C)NC(C1)(C)C n1c(c2ccc(C(=O)O)cc2)ccc(c1)CCCCC N1(C(CN(CC1=O)C(=O)C1CCCCC1)=O)CCc1ccccc1 S(N1[CH](c2ccccc2C=C1)C#N)(c1ccc(cc1)C)(=O)=O c12c(c(OC)c3c(c1OC)occ3)ccc(o2)=O c12c(O[CH](C1=O)C(C)C)cc1c(c2)ccc(=O)o1 c12[C]3([C@H]4([N@@](CCc1c1ccccc1[nH]2)C[C@H](C=C4CC)C3))C(=O)OC""" outputfile = open("ten.smi", "w") outputfile.write(smiles) outputfile.close() output, error = run_exec("babel ten.smi ten.fs") self.canFindFile("ten.fs") self.assertConverted(error, 10) query = "Nc2nc(c1ccccc1)nc3ccccc23" output, error = run_exec("babel ten.fs -ifs -s %s -osmi" % query) self.assertConverted(error, 1)
def testSMILEStoInChI(self): # Tests interconversions between the SMILES on the left versus # the InChI on the right. # The canonical smiles (in the middle) were derived from the SMILES. for smiles, can, inchi in self.data: output, error = run_exec(smiles, "babel -ismi -oinchi") self.assertEqual(output.rstrip(), inchi) output, error = run_exec(inchi, "babel -iinchi -ocan") self.assertEqual(output.rstrip(), can)
def testInChIToSMILES_Bug(self): """PR#2101034- InChI <-> SMILES conv misrepresents stereo""" test_inchi = 'InChI=1S/C10H10/c1-2-3-7-10-8-5-4-6-9-10/h2-9H,1H2/b7-3+' output, error = run_exec(test_inchi, "babel -iinchi -osmi") self.assertEqual(output.rstrip(), "C=C/C=C/c1ccccc1") test_smiles = "C=C\C=C/c1ccccc1" output, error = run_exec(test_smiles, "babel -ismi -oinchi") self.assertEqual(output.rstrip(), "InChI=1S/C10H10/c1-2-3-7-10-8-5-4-6-9-10/h2-9H,1H2/b7-3-")
def testInChIToSMILES_Bug(self): """PR#2101034- InChI <-> SMILES conv misrepresents stereo""" test_inchi = 'InChI=1S/C10H10/c1-2-3-7-10-8-5-4-6-9-10/h2-9H,1H2/b7-3+' output, error = run_exec(test_inchi, "babel -iinchi -osmi") self.assertEqual(output.rstrip(), "C=C/C=C/c1ccccc1") test_smiles = "C=C\C=C/c1ccccc1" output, error = run_exec(test_smiles, "babel -ismi -oinchi") self.assertEqual( output.rstrip(), "InChI=1S/C10H10/c1-2-3-7-10-8-5-4-6-9-10/h2-9H,1H2/b7-3-")
def test2DMDLto0D(self): """Test conversion for 2D MDL to CAN and InChI""" # The following file was created using RDKit starting from # the SMILES strings in data[x][0] below. filename = self.getTestFile("testsym_2Dtests.sdf") output, error = run_exec("babel -isdf %s -ocan" % filename) for i, smiles in enumerate(output.rstrip().split("\n")): self.assertEqual(smiles.rstrip(), self.data[i][1]) output, error = run_exec("babel -isdf %s -oinchi" % filename) for i, inchi in enumerate(output.rstrip().split("\n")): self.assertEqual(inchi.rstrip(), self.data[i][2])
def testChiralToLonePair(self): """PR#3058701 - Handle stereochemistry at lone pair on S""" # Note to self: Need to ensure that roundtripping through the various # 2D and 3D formats works. In the meanwhile, this test at least ensures # that SMILES reading and writing works fine. can = '[S@@](=O)(Cl)C' smiles = [can, '[S@](Cl)(=O)C', 'O=[S@](Cl)C'] for smile in smiles: output, error = run_exec(smile, "babel -ismi -ocan") self.assertEqual(output.rstrip(), can) # Check that regular chiral S still work fine smi = "[S@](=O)(=N)(C)O" output, error = run_exec(smi, "babel -ismi -osmi") self.assertEqual(output.rstrip(), smi)
def testChiralToLonePair(self): """PR#3058701 - Handle stereochemistry at lone pair on S""" # Note to self: Need to ensure that roundtripping through the various # 2D and 3D formats works. In the meanwhile, this test at least ensures # that SMILES reading and writing works fine. can = 'C[S@](=O)Cl' smiles = [can, '[S@](Cl)(=O)C', 'O=[S@](Cl)C'] for smile in smiles: output, error = run_exec(smile, "babel -ismi -ocan") self.assertEqual(output.rstrip(), can) # Check that regular chiral S still work fine smi = "[S@](=O)(=N)(C)O" output, error = run_exec(smi, "babel -ismi -osmi") self.assertEqual(output.rstrip(), smi)
def test2DMDLto2DMDL(self): """Make sure that stereo is preserved when writing wedge bonds""" filenames = [self.getTestFile(x) for x in ["testsym_2Dtests_more.sdf", "testsym_2Dtests_threeligands.sdf"]] # The test files have the correct canonical SMILES string # stored in the data field "smiles" output, error = run_exec("obabel -isdf %s %s -osdf --append smiles" % (filenames[0], filenames[1])) finaloutput, error = run_exec(output, "obabel -isdf -ocan") for line in finaloutput.rstrip().split("\n"): result, correct_answer = line.split() self.assertEqual(result, correct_answer)
def test2DMDLto2DMDL(self): """Make sure that stereo is preserved when writing wedge bonds""" filenames = [ self.getTestFile(x) for x in ["testsym_2Dtests_more.sdf", "testsym_2Dtests_threeligands.sdf"] ] # The test files have the correct canonical SMILES string # stored in the data field "smiles" output, error = run_exec("obabel -isdf %s %s -osdf --append smiles" % (filenames[0], filenames[1])) finaloutput, error = run_exec(output, "obabel -isdf -ocan") for line in finaloutput.rstrip().split("\n"): result, correct_answer = line.split() self.assertEqual(result, correct_answer)
def fastcheckmatch(query, molecules): """May fail where Open Babel does not output the input query, e.g. [C@@]([H])(Br)(Cl)I is output as [C@@H](Br)(Cl)I""" output, error = run_exec("\n".join(molecules), "obabel -ismi -s%s -osmi" % query) converted = [x.rstrip() for x in output.split("\n")] results = [smi in converted for smi in molecules] return results
def testRInChIOfficialExamples(self): """These test RXN to RInChI using the examples in the RInChI distrib""" for rxnfile in glob.glob(os.path.join(here, "rinchi", "*.rxn")): dirname, fname = os.path.split(rxnfile) output, error = run_exec('obabel %s -orinchi' % rxnfile) ans = open(os.path.join(dirname, fname.split(".")[0]+".txt")).readlines()[0] self.assertEqual(output.rstrip(), ans.rstrip())
def testSMItoInChI(self): """Verify that all molecules give the same InChI""" output, error = run_exec("\n".join(self.smiles), "obabel -ismi -oinchi") output = "\n".join([x.rstrip() for x in output.split("\n")]) self.assertEqual(output.rstrip(), "\n".join([self.inchi] * len(self.smiles)))
def testSMItoCAN(self): """PR#1842055- bad isotope canonicalization""" self.canFindExecutable("babel") # A series of isotopamers, and their canonical forms self.smiles = [ 'c1ccccc1', 'c1[14cH]cccc1', '[14cH]1[14cH]cccc1', '[14cH]1[14cH]ccc[14cH]1', '[14cH]1[14cH]cc[14cH][14cH]1', '[14cH]1[14cH]c[14cH][14cH][14cH]1', '[14cH]1[14cH][14cH][14cH][14cH][14cH]1', ] self.cansmis = [ 'c1ccccc1', '[14cH]1ccccc1', '[14cH]1[14cH]cccc1', '[14cH]1[14cH]ccc[14cH]1', '[14cH]1[14cH][14cH]cc[14cH]1', '[14cH]1[14cH][14cH]c[14cH][14cH]1', '[14cH]1[14cH][14cH][14cH][14cH][14cH]1', ] for i in range(0, len(self.smiles)): output, error = run_exec(self.smiles[i], "babel -ismi -ocan") self.assertEqual(output.rstrip(), self.cansmis[i])
def testSMItoSMI(self): """ PR#2705497 aromatic - kekule conversion issue PR#1445453 SMILES aromaticity fails on 4-valent N+ atoms PR#1814248 Aromaticity munged by SMILES input PR#1761638 Error in Aromaticity / Kekulize PR#2948661 Trunk fails aromaticity """ self.canFindExecutable("babel") # A series of aromatic strings, which should convert to themselves self.smiles = [ 'c12c3c(cc(N)cc3)Cc1cccc2', 'c1(=O)n(c2c(c(=O)o1)cccc2)CC(=O)OCC', 'c1n[nH]c(=S)[nH]1', 'O=c1[nH]ccc2nc3oc4ccccc4c(=O)c3cc12', 'c1nc2sccn2c1', 'c1[n+]cnc2[nH]cnc12', 'c1onc(c2ccccc2Cl)c1', 'c1ccc2[nH]c3ccc4cc[n+]cc4c3c2c1', '[nH]1c2ccccc2c2c3C(=O)NCc3c3c4ccccc4[nH]c3c12', 'c1c(C)c2C=c3[n-]c(=Cc4[nH]c(C=c5[n-]c(=Cc1[nH]2)c(C)c5C=C)c(C)c4CCC(=O)O)c(CCC(=O)O)c3', 'C1=C2CCC(=Cc3ccc([nH]3)C=c3ccc(=Cc4ccc1[nH]4)[nH]3)N2', 'c1(NC(=O)C2CC2)nc2c3c(cccc3)CCc2cn1', 'O=C1N(CCCC)C(=O)NC2C1C1N(N2)CCN1', 'Cn1cccnc1=O', 'O=c1n(C)c(=O)nc2c1c1n([nH]2)cc[nH]1', 'Cn1ccn2c1nc1c2c(=O)n(C)c(=O)n1C' ] for i in range(0, len(self.smiles)): output, error = run_exec(self.smiles[i], "babel -ismi -osmi") self.assertEqual(output.rstrip(), self.smiles[i])
def testSMItoCAN(self): """PR#1842055- bad isotope canonicalization""" self.canFindExecutable("babel") # A series of isotopamers, and their canonical forms self.smiles = [ 'c1ccccc1', 'c1[14cH]cccc1', 'c1[14cH][14cH]ccc1', 'c1[14cH][14cH][14cH]cc1', 'c1[14cH][14cH][14cH][14cH]c1', 'c1[14cH][14cH][14cH][14cH][14cH]1', '[14cH]1[14cH][14cH][14cH][14cH][14cH]1', ] self.cansmis = [ 'c1ccccc1', '[14cH]1ccccc1', '[14cH]1[14cH]cccc1', '[14cH]1[14cH]ccc[14cH]1', '[14cH]1[14cH][14cH]cc[14cH]1', '[14cH]1[14cH][14cH]c[14cH][14cH]1', '[14cH]1[14cH][14cH][14cH][14cH][14cH]1', '[14cH]1[14cH][14cH][14cH][14cH][14cH]1', ] for i in range(0, len(self.smiles)): output, error = run_exec(self.smiles[i], "babel -ismi -ocan") self.assertEqual(output.rstrip(), self.cansmis[i])
def testSMItoSMI(self): """ PR#2705497 aromatic - kekule conversion issue PR#1445453 SMILES aromaticity fails on 4-valent N+ atoms PR#1814248 Aromaticity munged by SMILES input PR#1761638 Error in Aromaticity / Kekulize PR#2948661 Trunk fails aromaticity """ self.canFindExecutable("babel") # A series of aromatic strings, which should convert to themselves self.smiles = [ 'c12-c3c(cc(N)cc3)Cc1cccc2', 'c1(=O)n(c2c(c(=O)o1)cccc2)CC(=O)OCC', 'c1n[nH]c(=S)[nH]1', 'O=c1[nH]ccc2nc3oc4ccccc4c(=O)c3cc12', 'c1nc2sccn2c1', 'c1[nH+]cnc2[nH]cnc12', 'c1onc(c2ccccc2Cl)c1', 'c1ccc2[nH]c3ccc4cc[nH+]cc4c3c2c1', '[nH]1c2ccccc2c2c3C(=O)NCc3c3c4ccccc4[nH]c3c12', 'c1c(C)c2C=c3[n-]c(=Cc4[nH]c(C=c5[n-]c(=Cc1[nH]2)c(C)c5C=C)c(C)c4CCC(=O)O)c(CCC(=O)O)c3', 'C1=C2CCC(=Cc3ccc([nH]3)C=c3ccc(=Cc4ccc1[nH]4)[nH]3)N2', 'c1(NC(=O)C2CC2)nc2-c3c(cccc3)CCc2cn1', 'O=C1N(CCCC)C(=O)NC2C1C1N(N2)CCN1', 'Cn1cccnc1=O', 'O=c1n(C)c(=O)nc2-c1c1n([nH]2)cc[nH]1', 'Cn1ccn2c1nc1c2c(=O)n(C)c(=O)n1C' ] for i in range(0, len(self.smiles)): output, error = run_exec(self.smiles[i], "babel -ismi -osmi") self.assertEqual(output.rstrip(), self.smiles[i])
def testSMILEStoInChI(self): # Tests interconversions between the SMILES on the left versus # the InChI on the right. # The canonical smiles (in the middle) were derived from the SMILES. data = [ ('ClC=CF', 'FC=CCl', 'InChI=1S/C2H2ClF/c3-1-2-4/h1-2H'), ('Cl/C=C/F', 'F/C=C/Cl', 'InChI=1S/C2H2ClF/c3-1-2-4/h1-2H/b2-1+'), ('Cl/C=C\\F', 'F/C=C\\Cl', 'InChI=1S/C2H2ClF/c3-1-2-4/h1-2H/b2-1-'), ('Cl[C@@](Br)(F)I', 'F[C@@](Cl)(Br)I', 'InChI=1S/CBrClFI/c2-1(3,4)5/t1-/m0/s1'), ('Cl[C@](Br)(F)I', 'F[C@](Cl)(Br)I', 'InChI=1S/CBrClFI/c2-1(3,4)5/t1-/m1/s1'), ('ClC(Br)(F)I', 'FC(Cl)(Br)I', 'InChI=1S/CBrClFI/c2-1(3,4)5')] for smiles, can, inchi in data: output, error = run_exec(smiles, "babel -ismi -oinchi") self.assertEqual(output.rstrip(), inchi) output, error = run_exec(inchi, "babel -iinchi -ocan") self.assertEqual(output.rstrip(), can)
def testSMItoSMI(self): """ Some initial tests based on previous geometry stereo bugs (i.e., OBBuilder can't handle correctly) """ self.canFindExecutable("obabel") # A series of aromatic strings, which should convert to themselves self.smiles = [ 'c1ccccc1', # benzene 'C#C', # triple bond 'CC=CC', # butene unspecified 'C/C=C\\C', # Z-butene 'C/C=C/C', # E-butene 'NC(Br)(O)C', 'N[C@](Br)(O)C', 'N[C@@](Br)(O)C', 'CCC[C@@H]([C@H](CC(C)C)C)C', 'C1CC[C@H]2[C@@H](C1)CCCC2', # cis-decalin 'C1CC[C@@H]2[C@@H](C1)CCCC2', # trans-decalin '[C@H]1(NC[C@H]2[C@H]1N2)OC', 'Clc1cccc(Cl)c1\\C=N\\NC(=O)c1cccs1', 'O=C1NC(=S)S\\C1=C/c1ccco1', 'S=C1NC(=O)/C(=C/c2ccco2)/S1', 'O=C1NC(=S)N\\C1=C\\c1ccncc1', 'S=C1NC(=O)C(=C)N1', 'CC(=O)N\\N=C\\c1ccncc1', 'N/N=c/1\\sc2c(n1C)cccc2', 'OCCN/C=C\\1/C(=NN(C1=O)c1ccccc1)C', 'Cc1ccc(o1)/C=C/C=O', # disabled to make test run faster: #'CCCNC1=C(C)C(=O)C2=C(C1=O)[C@@H](COC(=O)N)[C@]1(N2C[C@H]2[C@H]1N2)OC', #'CN([C@H]1C(=O)C(=C([C@]2([C@@H]1C[C@@H]1Cc3c(C(=C1C2=O)O)c(O)ccc3N(C)C)O)O)C(=O)N)C', #'CN([C@@H]1C(=O)C(=C([C@@]2([C@H]1C[C@@H]1C(=C(O)c3c([C@@]1(C)O)c(Cl)ccc3O)C2=O)O)O)C(=O)N)C', #'C[C@@H](CC(=O)OC[C@@]12CC[C@H]3[C@@]([C@@H]2C[C@@H](O1)C1=CC(=O)O[C@H]1O)(C)CC[C@@H]1[C@]3(C)CCCC1(C)C)O', #'CC(=O)OC[C@@]12CC[C@H]3[C@@]([C@@H]2C[C@H](O1)C1=CC(=O)O[C@H]1O)(C)CC[C@@H]1[C@]3(C)CCCC1(C)C' ] for smi in self.smiles: # generate a canonical SMILES in case the ordering changes canSMI, error = run_exec(smi, "obabel -ismi -ocan") # generate a mol2 (any 3D format without implicit hydrogens) mol2, error = run_exec(smi, "obabel -ismi -osdf --gen3d dg") # now check if it matches the previous canonical SMILES output, error = run_exec(mol2, "obabel -isdf -ocan") self.assertEqual(output.split('\t')[0].rstrip(), canSMI.rstrip())
def testRSMItoRINCHI(self): data = [ ("C>N>O", "RInChI=1.00.1S/CH4/h1H4<>H2O/h1H2<>H3N/h1H3/d+"), ("O>N>C", "RInChI=1.00.1S/CH4/h1H4<>H2O/h1H2<>H3N/h1H3/d-"), ("O>>C", "RInChI=1.00.1S/CH4/h1H4<>H2O/h1H2/d-"), # The following is assumed to be d+ by analogy with # the empty reaction which is d+ ("O>>O", "RInChI=1.00.1S/H2O/h1H2<>H2O/h1H2/d+"), # Example: esterification of acetic acid ("OCC.CC(=O)O>S(=O)(=O)(O)O>CC(=O)OCC.O", "RInChI=1.00.1S/C2H4O2/c1-2(3)4/h1H3,(H,3,4)!C2H6O/c1-2-3/h3H,2H2,1H3<>C4H8O2/c1-3-6-4(2)5/h3H2,1-2H3!H2O/h1H2<>H2O4S/c1-5(2,3)4/h(H2,1,2,3,4)/d+" ), # Example: alkaline ring opening ("CC[C@]1(C)O[C@H]1C.[OH-]>>CC[C@](C)(O)[C@@H](C)O", "RInChI=1.00.1S/C6H12O/c1-4-6(3)5(2)7-6/h5H,4H2,1-3H3/t5-,6-/m0/s1!H2O/h1H2/p-1<>C6H14O2/c1-4-6(3,8)5(2)7/h5,7-8H,4H2,1-3H3/t5-,6+/m1/s1/d+" ), # Partial reactions (">>C1CC=C(O)CC1", "RInChI=1.00.1S/<>C6H10O/c7-6-4-2-1-3-5-6/h4,7H,1-3,5H2/d+"), ("C1CC=C(O)CC1>>", "RInChI=1.00.1S/<>C6H10O/c7-6-4-2-1-3-5-6/h4,7H,1-3,5H2/d-"), # The empty reaction (">>", "RInChI=1.00.1S//d+"), # Test 'no-structure' ("c1ccccc1C=C>>*", "RInChI=1.00.1S/<>C8H8/c1-2-8-6-4-3-5-7-8/h2-7H,1H2/d-/u1-0-0"), ("*>>C1CC=C(O)CC1", "RInChI=1.00.1S/<>C6H10O/c7-6-4-2-1-3-5-6/h4,7H,1-3,5H2/d+/u1-0-0" ), ("O>*>C", "RInChI=1.00.1S/CH4/h1H4<>H2O/h1H2/d-/u0-0-1"), ("*.O>>C", "RInChI=1.00.1S/CH4/h1H4<>H2O/h1H2/d-/u0-1-0"), # Empty except for 'no-structures' (assumed) ("*>*>*", "RInChI=1.00.1S//d+/u1-1-1"), ] for eqm in [False, True]: for rsmi, rinchi in data: if eqm: output, error = run_exec( 'obabel -:%s -irsmi -orinchi -xe' % rsmi) ans = rinchi.replace("/d-", "/d=").replace("/d+", "/d=") self.assertEqual(output.rstrip(), ans) else: output, error = run_exec('obabel -:%s -irsmi -orinchi' % rsmi) self.assertEqual(output.rstrip(), rinchi)
def testRInChIOfficialExamples(self): """These test RXN to RInChI using the examples in the RInChI distrib""" for rxnfile in glob.glob(os.path.join(here, "rinchi", "*.rxn")): dirname, fname = os.path.split(rxnfile) output, error = run_exec('obabel %s -orinchi' % rxnfile) ans = open(os.path.join(dirname, fname.split(".")[0] + ".txt")).readlines()[0] self.assertEqual(output.rstrip(), ans.rstrip())
def testSMILESto0DMDL(self): """Test interconversion between SMILES and 0D MDL""" data = [ ([0, 0, 0, 0, 1], [0, 0, 0, 0]), # 'Cl[C@@](Br)(F)I' ([0, 0, 0, 0, 2], [0, 0, 0, 0]), # 'Cl[C@](Br)(F)I' ([0, 0, 0, 0, 3], [0, 0, 0, 0]) # 'ClC(Br)(F)I' ] for i, (atompar, bondstereo) in enumerate(data): smiles, can = self.data[i + 3][0:2] output, error = run_exec(smiles, "babel -ismi -osdf") atoms, bonds = self.parseMDL(output) parities = [atom['parity'] for atom in atoms] parities.sort() stereos = [bond['stereo'] for bond in bonds] stereos.sort() self.assertEqual(atompar, parities) self.assertEqual(bondstereo, stereos) output, error = run_exec(output, "babel -isdf -as -ocan") self.assertEqual(output.rstrip(), can)
def test2DMDLto0D(self): """Test conversion for 2D MDL to CAN and InChI""" # The following file was created using RDKit starting from # the SMILES strings in data[x][0] below. filename = self.getTestFile("testsym_2Dtests.sdf") data = [ ('ClC=CF', 'FC=CCl', 'InChI=1S/C2H2ClF/c3-1-2-4/h1-2H'), ('Cl/C=C/F', 'F/C=C/Cl', 'InChI=1S/C2H2ClF/c3-1-2-4/h1-2H/b2-1+'), ('Cl/C=C\\F', 'F/C=C\\Cl', 'InChI=1S/C2H2ClF/c3-1-2-4/h1-2H/b2-1-'), ('Cl[C@@](Br)(F)I', 'F[C@@](Cl)(Br)I', 'InChI=1S/CBrClFI/c2-1(3,4)5/t1-/m0/s1'), ('Cl[C@](Br)(F)I', 'F[C@](Cl)(Br)I', 'InChI=1S/CBrClFI/c2-1(3,4)5/t1-/m1/s1'), ('ClC(Br)(F)I', 'FC(Cl)(Br)I', 'InChI=1S/CBrClFI/c2-1(3,4)5')] output, error = run_exec("babel -isdf %s -ocan" % filename) for i, smiles in enumerate(output.rstrip().split("\n")): self.assertEqual(smiles.rstrip(), data[i][1]) output, error = run_exec("babel -isdf %s -oinchi" % filename) for i, inchi in enumerate(output.rstrip().split("\n")): self.assertEqual(inchi.rstrip(), data[i][2])
def testFindDups(self): """Look for duplicates using --unique""" params = [("", 13), ("/formula", 5), ("/connect", 6), ("/nostereo", 9), ("/nosp3", 11), ("/noEZ", 11), ("/nochg", 12), ("/noiso", 11), ("cansmi", 13), ("cansmiNS", 7)] for param in params: output, error = run_exec( self.smiles, "babel -ismi -osmi --unique %s" % param[0]) self.assertConverted(error, param[1])
def testSelfMatch(self): """Verify that a molecule matches itself""" data = [ '[C@@](F)(Br)(Cl)I', '[C@](F)(Br)(Cl)I', 'F[C@](Br)(Cl)I', '[C@H](Br)(Cl)I', 'Br[C@H](Cl)I', '[C@]1(Br)(Cl)NC1', '[C@@]1(Br)(Cl)NC1', 'Br[C@]1(Cl)NC1', 'C1N[C@]1(Cl)Br', 'F[C@]1(Br)N[C@]1(Br)Cl', '[C@H]1(Cl)NC1' ] for smi in data: output, error = run_exec("obabel -:%s -s%s -osmi" % (smi, smi)) self.assertEqual(output.rstrip(), smi)
def testSMILESto3DMDL(self): """Test interconversion between SMILES and 3D MDL""" data = [ ('ClC=CF', 'FC=CCl', [0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 3]), ('Cl/C=C/F', 'F/C=C/Cl', [0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0]), ('Cl/C=C\\F', 'F/C=C\\Cl', [0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0]), ('Cl[C@@](Br)(F)I', 'F[C@@](Cl)(Br)I', [0, 0, 0, 0, 1], [0, 0, 0, 1]), ('Cl[C@](Br)(F)I', 'F[C@](Cl)(Br)I', [0, 0, 0, 0, 2], [0, 0, 0, 6]), ('ClC(Br)(F)I', 'FC(Cl)(Br)I', [0, 0, 0, 0, 3], [0, 0, 0, 4]) ] for smiles, can, atompar, bondstereo in data: output, error = run_exec(smiles, "babel -ismi -osdf --gen3d") atoms, bonds = self.parseMDL(output) parities = [atom['parity'] for atom in atoms] parities.sort() stereos = [bond['stereo'] for bond in bonds] stereos.sort() self.assertEqual(atompar, parities) self.assertEqual(bondstereo, stereos) output, error = run_exec(output, "babel -isdf -ocan") self.assertEqual(output.rstrip(), can)
def testXYZtoSMILESand3DMDL(self): """Test conversion from XYZ to SMILES and 3D MDL""" # Since the XYZ format does not trigger stereo perception, # this test makes sure that the SMILES and 3D MDL formats # perceive stereo themselves. data = [ ([0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 3]), # 'ClC=CF' ([0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 3]), # 'ClC=CF' ([0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0]), # 'Cl/C=C/F' ([0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0]), # 'Cl/C=C\\F' # The bond parities are irrelevant/meaningless for the next two ([0, 0, 0, 0, 1], []), # 'Cl[C@@](Br)(F)I' ([0, 0, 0, 0, 2], []), # 'Cl[C@](Br)(F)I' ([0, 0, 0, 0, 3], [0, 0, 0, 4]), # 'ClC(Br)(F)I' ([0, 0, 0, 1], []), # 'O=[S@@](Cl)I), ([0, 0, 0, 2], []), # 'O=[S@](Cl)I), ([0, 0, 0, 3], []), # 'O=S(Cl)I), ([0] * 9, [0] * 8 + [3]), # "IC=C1NC1" ([0] * 9, [0] * 9), # r"I/C=C\1/NC1" ([0] * 9, [0] * 9), # r"I/C=C/1\NC1" ] for i, (atompar, bondstereo) in enumerate(data): if i in [0, 1, 6, 10]: continue # ambiguous stereo is lost in XYZ if i in [7, 8, 9]: continue # perception of S=O from XYZ fails smiles, can = self.data[i][0:2] output, error = run_exec(smiles, "babel -ismi -oxyz --gen3d") canoutput, error = run_exec(output, "babel -ixyz -ocan") self.assertEqual(canoutput.rstrip(), can) sdfoutput, error = run_exec(output, "babel -ixyz -osdf") atoms, bonds = self.parseMDL(sdfoutput) parities = [atom['parity'] for atom in atoms] parities.sort() stereos = [bond['stereo'] for bond in bonds] stereos.sort() self.assertEqual(atompar, parities) if bondstereo: self.assertEqual(bondstereo, stereos)
def testXYZtoSMILESand3DMDL(self): """Test conversion from XYZ to SMILES and 3D MDL""" # Since the XYZ format does not trigger stereo perception, # this test makes sure that the SMILES and 3D MDL formats # perceive stereo themselves. data = [ ([0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 3]), # 'ClC=CF' ([0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 3]), # 'ClC=CF' ([0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0]), # 'Cl/C=C/F' ([0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0]), # 'Cl/C=C\\F' # The bond parities are irrelevant/meaningless for the next two ([0, 0, 0, 0, 1], []), # 'Cl[C@@](Br)(F)I' ([0, 0, 0, 0, 2], []), # 'Cl[C@](Br)(F)I' ([0, 0, 0, 0, 3], [0, 0, 0, 4]), # 'ClC(Br)(F)I' ([0, 0, 0, 1], []), # 'O=[S@@](Cl)I), ([0, 0, 0, 2], []), # 'O=[S@](Cl)I), ([0, 0, 0, 3], []), # 'O=S(Cl)I), ([0]*9, [0]*8 + [3]), # "IC=C1NC1" ([0]*9, [0]*9), # r"I/C=C\1/NC1" ([0]*9, [0]*9), # r"I/C=C/1\NC1" ] for i, (atompar, bondstereo) in enumerate(data): if i in [0, 1, 6, 10]: continue # ambiguous stereo is lost in XYZ if i in [7, 8, 9]: continue # perception of S=O from XYZ fails smiles, can = self.data[i][0:2] output, error = run_exec(smiles, "babel -ismi -oxyz --gen3d") canoutput, error = run_exec(output, "babel -ixyz -ocan") self.assertEqual(canoutput.rstrip(), can) sdfoutput, error = run_exec(output, "babel -ixyz -osdf") atoms, bonds = self.parseMDL(sdfoutput) parities = [atom['parity'] for atom in atoms] parities.sort() stereos = [bond['stereo'] for bond in bonds] stereos.sort() self.assertEqual(atompar, parities) if bondstereo: self.assertEqual(bondstereo, stereos)
def test2DMDLto0D_more(self): """Test various combinations of stereobonds in 2D perception""" filenames = [self.getTestFile(x) for x in ["testsym_2Dtests_more.sdf", "testsym_2Dtests_threeligands.sdf"]] # The test files have the correct canonical SMILES string # stored in the data field "smiles" output, error = run_exec("obabel -isdf %s %s -ocan --append smiles" % (filenames[0], filenames[1])) for line in output.rstrip().split("\n"): result, correct_answer = line.split() self.assertEqual(result, correct_answer)
def testFindDups(self): """Look for duplicates using --unique""" params = [("", 13), ("/formula", 5), ("/connect", 6), ("/nostereo", 9), ("/nosp3", 11), ("/noEZ", 11), ("/nochg", 12), ("/noiso", 11), ("cansmi", 13), ("cansmiNS", 7)] for param in params: output, error = run_exec(self.smiles, "babel -ismi -osmi --unique %s" % param[0]) self.assertConverted(error, param[1])
def test2DMDLto0D_more(self): """Test various combinations of stereobonds in 2D perception""" filenames = [ self.getTestFile(x) for x in ["testsym_2Dtests_more.sdf", "testsym_2Dtests_threeligands.sdf"] ] # The test files have the correct canonical SMILES string # stored in the data field "smiles" output, error = run_exec("obabel -isdf %s %s -ocan --append smiles" % (filenames[0], filenames[1])) for line in output.rstrip().split("\n"): result, correct_answer = line.split() self.assertEqual(result, correct_answer)
def testXYZtoXYZ(self): """PR#2956135- crash in kekulize""" self.canFindExecutable("babel") # A series of isotopamers, and their canonical forms self.xyz = """39 crash.gamout C -0.31501 -0.05904 0.00332 C 0.47846 1.04480 0.28483 N 1.83248 1.00566 0.38129 C 2.42024 -0.19701 0.19943 C 1.71953 -1.35983 -0.07408 C 0.33143 -1.28624 -0.17253 H -0.24386 -2.18519 -0.38442 H 2.23907 -2.30258 -0.20635 H 3.50341 -0.20251 0.28526 H 0.06258 2.02989 0.46235 C -1.79310 -0.00135 -0.09779 O -2.46156 -1.02575 -0.18756 N -2.41033 1.21816 -0.10797 H -1.94649 2.11169 -0.16198 H -3.41687 1.20930 -0.22381 C 0.26924 0.47947 -3.35313 C -0.44373 -0.03140 -4.37204 H -0.48212 0.45752 -5.34214 H -1.00864 -0.95087 -4.26294 C 1.00998 1.73774 -3.59280 O 1.09874 2.34143 -4.64607 O 1.61155 2.19132 -2.48091 H 2.04906 3.01525 -2.77997 C 0.34525 -0.16868 -2.00668 H -0.05656 0.49552 -1.23462 H 1.38253 -0.41344 -1.75510 H -0.22965 -1.10058 -1.97049 C 0.26203 0.28311 3.26498 C 1.38529 0.78393 2.72190 H 1.56449 0.74957 1.65029 H 2.15564 1.24022 3.33413 C -0.73494 -0.32012 2.35309 O -0.68045 -0.35957 1.13769 O -1.76945 -0.85942 3.01857 H -2.33520 -1.21968 2.30453 C -0.01692 0.31604 4.73465 H -0.11977 -0.69915 5.13149 H -0.93979 0.86887 4.93917 H 0.78936 0.80651 5.29109 """ output, error = run_exec(self.xyz, "babel -ixyz -oxyz") self.assertConverted(error, 1)
def testInChItoSMI(self): """Verify that the InChI is read correctly""" output, error = run_exec(self.inchi, "babel -iinchi -ocan") self.assertEqual(output.rstrip(), self.cansmi)
def checkmatch(query, molecules): result = [] for smi in molecules: output, error = run_exec("obabel -:%s -s%s -osmi" % (smi, query)) result.append(output.strip() != "") return result
def testInsertionCodes(self): """ Testing a PDB entry with insertion codes to distinguish residues upon conversion to FASTA. """ self.canFindExecutable("babel") self.entryPDBwithInsertioncodes = """ATOM 406 N VAL L 29 58.041 17.797 48.254 1.00 0.00 N ATOM 407 CA VAL L 29 57.124 18.088 47.170 1.00 0.00 C ATOM 408 C VAL L 29 55.739 17.571 47.538 1.00 0.00 C ATOM 409 O VAL L 29 55.535 16.362 47.550 1.00 0.00 O ATOM 410 CB VAL L 29 57.580 17.456 45.842 1.00 0.00 C ATOM 411 CG1 VAL L 29 56.571 17.743 44.741 1.00 0.00 C ATOM 412 CG2 VAL L 29 58.957 17.973 45.450 1.00 0.00 C ATOM 413 H VAL L 29 58.603 16.959 48.212 1.00 0.00 H ATOM 414 HA VAL L 29 57.012 19.163 47.024 1.00 0.00 H ATOM 415 HB VAL L 29 57.674 16.378 45.977 1.00 0.00 H ATOM 416 1HG1 VAL L 29 56.909 17.289 43.809 1.00 0.00 H ATOM 417 2HG1 VAL L 29 55.603 17.327 45.016 1.00 0.00 H ATOM 418 3HG1 VAL L 29 56.479 18.821 44.604 1.00 0.00 H ATOM 419 1HG2 VAL L 29 59.263 17.515 44.510 1.00 0.00 H ATOM 420 2HG2 VAL L 29 58.917 19.055 45.331 1.00 0.00 H ATOM 421 3HG2 VAL L 29 59.676 17.719 46.229 1.00 0.00 H ATOM 422 N SER L 30 54.838 18.500 47.837 1.00 0.00 N ATOM 423 CA SER L 30 53.494 18.162 48.273 1.00 0.00 C ATOM 424 C SER L 30 52.725 17.364 47.221 1.00 0.00 C ATOM 425 O SER L 30 52.723 17.697 46.056 1.00 0.00 O ATOM 426 CB SER L 30 52.734 19.429 48.610 1.00 0.00 C ATOM 427 OG SER L 30 51.403 19.143 48.941 1.00 0.00 O ATOM 428 H SER L 30 55.100 19.472 47.757 1.00 0.00 H ATOM 429 HA SER L 30 53.471 17.585 49.199 1.00 0.00 H ATOM 430 1HB SER L 30 53.219 19.934 49.445 1.00 0.00 H ATOM 431 2HB SER L 30 52.761 20.107 47.758 1.00 0.00 H ATOM 432 HG SER L 30 50.919 19.965 48.828 1.00 0.00 H ATOM 433 N SER L 30A 52.170 16.303 47.698 1.00 0.00 N ATOM 434 CA SER L 30A 51.329 15.409 46.920 1.00 0.00 C ATOM 435 C SER L 30A 52.015 14.812 45.685 1.00 0.00 C ATOM 436 O SER L 30A 51.350 14.366 44.764 1.00 0.00 O ATOM 437 CB SER L 30A 50.082 16.156 46.488 1.00 0.00 C ATOM 438 OG SER L 30A 49.348 16.592 47.599 1.00 0.00 O ATOM 439 H SER L 30A 52.421 16.046 48.642 1.00 0.00 H ATOM 440 HA SER L 30A 50.943 14.567 47.497 1.00 0.00 H ATOM 441 1HB SER L 30A 50.364 17.013 45.876 1.00 0.00 H ATOM 442 2HB SER L 30A 49.463 15.505 45.873 1.00 0.00 H ATOM 443 HG SER L 30A 49.931 17.176 48.090 1.00 0.00 H ATOM 444 N SER L 31 53.347 14.792 45.683 1.00 0.00 N ATOM 445 CA SER L 31 54.094 14.259 44.549 1.00 0.00 C ATOM 446 C SER L 31 53.734 14.959 43.242 1.00 0.00 C ATOM 447 O SER L 31 53.703 14.356 42.179 1.00 0.00 O ATOM 448 CB SER L 31 53.835 12.771 44.418 1.00 0.00 C ATOM 449 OG SER L 31 54.240 12.087 45.572 1.00 0.00 O ATOM 450 H SER L 31 53.852 15.150 46.480 1.00 0.00 H ATOM 451 HA SER L 31 55.175 14.292 44.689 1.00 0.00 H ATOM 452 1HB SER L 31 52.774 12.600 44.243 1.00 0.00 H ATOM 453 2HB SER L 31 54.375 12.383 43.555 1.00 0.00 H ATOM 454 HG SER L 31 53.773 11.248 45.560 1.00 0.00 H ATOM 455 N TYR L 32 53.460 16.259 43.402 1.00 0.00 N ATOM 456 CA TYR L 32 53.176 17.161 42.301 1.00 0.00 C ATOM 457 C TYR L 32 54.489 17.641 41.668 1.00 0.00 C ATOM 458 O TYR L 32 54.910 18.762 41.892 1.00 0.00 O ATOM 459 CB TYR L 32 52.342 18.352 42.780 1.00 0.00 C ATOM 460 CG TYR L 32 50.880 18.031 42.990 1.00 0.00 C ATOM 461 CD1 TYR L 32 50.294 16.936 42.371 1.00 0.00 C ATOM 462 CD2 TYR L 32 50.089 18.824 43.807 1.00 0.00 C ATOM 463 CE1 TYR L 32 48.958 16.639 42.559 1.00 0.00 C ATOM 464 CE2 TYR L 32 48.751 18.535 44.002 1.00 0.00 C ATOM 465 CZ TYR L 32 48.190 17.441 43.376 1.00 0.00 C ATOM 466 OH TYR L 32 46.859 17.150 43.569 1.00 0.00 O ATOM 467 H TYR L 32 53.456 16.618 44.347 1.00 0.00 H ATOM 468 HA TYR L 32 52.651 16.625 41.509 1.00 0.00 H ATOM 469 1HB TYR L 32 52.778 18.693 43.721 1.00 0.00 H ATOM 470 2HB TYR L 32 52.439 19.136 42.030 1.00 0.00 H ATOM 471 HD1 TYR L 32 50.908 16.305 41.727 1.00 0.00 H ATOM 472 HD2 TYR L 32 50.537 19.687 44.299 1.00 0.00 H ATOM 473 HE1 TYR L 32 48.512 15.775 42.066 1.00 0.00 H ATOM 474 HE2 TYR L 32 48.145 19.172 44.648 1.00 0.00 H ATOM 475 HH TYR L 32 46.462 17.658 44.280 1.00 0.00 H """ output, error = run_exec(self.entryPDBwithInsertioncodes, "babel -ipdb -ofasta") self.assertEqual(output.rstrip().rsplit("\n", 1)[1], "VSSSY")
def testSMILESto2D(self): """Test gen2d for some basic cases""" for smi, can, inchi in self.data: output, error = run_exec(smi, "obabel -ismi --gen2d -omdl") output, error = run_exec(output.rstrip(), "obabel -imdl -ocan") self.assertEqual(can, output.rstrip())
def testSMItoCAN(self): """Verify that all molecules give the same cansmi""" output, error = run_exec("\n".join(self.smiles), "babel -ismi -ocan") output = "\n".join([x.rstrip() for x in output.split("\n")]) self.assertEqual(output.rstrip(), "\n".join([self.cansmi] * len(self.smiles)))
def testInsertionCodes(self): """ Testing a PDB entry with insertion codes to distinguish residues upon conversion to FASTA. """ self.canFindExecutable("babel") self.entryPDBwithInsertioncodes="""ATOM 406 N VAL L 29 58.041 17.797 48.254 1.00 0.00 N ATOM 407 CA VAL L 29 57.124 18.088 47.170 1.00 0.00 C ATOM 408 C VAL L 29 55.739 17.571 47.538 1.00 0.00 C ATOM 409 O VAL L 29 55.535 16.362 47.550 1.00 0.00 O ATOM 410 CB VAL L 29 57.580 17.456 45.842 1.00 0.00 C ATOM 411 CG1 VAL L 29 56.571 17.743 44.741 1.00 0.00 C ATOM 412 CG2 VAL L 29 58.957 17.973 45.450 1.00 0.00 C ATOM 413 H VAL L 29 58.603 16.959 48.212 1.00 0.00 H ATOM 414 HA VAL L 29 57.012 19.163 47.024 1.00 0.00 H ATOM 415 HB VAL L 29 57.674 16.378 45.977 1.00 0.00 H ATOM 416 1HG1 VAL L 29 56.909 17.289 43.809 1.00 0.00 H ATOM 417 2HG1 VAL L 29 55.603 17.327 45.016 1.00 0.00 H ATOM 418 3HG1 VAL L 29 56.479 18.821 44.604 1.00 0.00 H ATOM 419 1HG2 VAL L 29 59.263 17.515 44.510 1.00 0.00 H ATOM 420 2HG2 VAL L 29 58.917 19.055 45.331 1.00 0.00 H ATOM 421 3HG2 VAL L 29 59.676 17.719 46.229 1.00 0.00 H ATOM 422 N SER L 30 54.838 18.500 47.837 1.00 0.00 N ATOM 423 CA SER L 30 53.494 18.162 48.273 1.00 0.00 C ATOM 424 C SER L 30 52.725 17.364 47.221 1.00 0.00 C ATOM 425 O SER L 30 52.723 17.697 46.056 1.00 0.00 O ATOM 426 CB SER L 30 52.734 19.429 48.610 1.00 0.00 C ATOM 427 OG SER L 30 51.403 19.143 48.941 1.00 0.00 O ATOM 428 H SER L 30 55.100 19.472 47.757 1.00 0.00 H ATOM 429 HA SER L 30 53.471 17.585 49.199 1.00 0.00 H ATOM 430 1HB SER L 30 53.219 19.934 49.445 1.00 0.00 H ATOM 431 2HB SER L 30 52.761 20.107 47.758 1.00 0.00 H ATOM 432 HG SER L 30 50.919 19.965 48.828 1.00 0.00 H ATOM 433 N SER L 30A 52.170 16.303 47.698 1.00 0.00 N ATOM 434 CA SER L 30A 51.329 15.409 46.920 1.00 0.00 C ATOM 435 C SER L 30A 52.015 14.812 45.685 1.00 0.00 C ATOM 436 O SER L 30A 51.350 14.366 44.764 1.00 0.00 O ATOM 437 CB SER L 30A 50.082 16.156 46.488 1.00 0.00 C ATOM 438 OG SER L 30A 49.348 16.592 47.599 1.00 0.00 O ATOM 439 H SER L 30A 52.421 16.046 48.642 1.00 0.00 H ATOM 440 HA SER L 30A 50.943 14.567 47.497 1.00 0.00 H ATOM 441 1HB SER L 30A 50.364 17.013 45.876 1.00 0.00 H ATOM 442 2HB SER L 30A 49.463 15.505 45.873 1.00 0.00 H ATOM 443 HG SER L 30A 49.931 17.176 48.090 1.00 0.00 H ATOM 444 N SER L 31 53.347 14.792 45.683 1.00 0.00 N ATOM 445 CA SER L 31 54.094 14.259 44.549 1.00 0.00 C ATOM 446 C SER L 31 53.734 14.959 43.242 1.00 0.00 C ATOM 447 O SER L 31 53.703 14.356 42.179 1.00 0.00 O ATOM 448 CB SER L 31 53.835 12.771 44.418 1.00 0.00 C ATOM 449 OG SER L 31 54.240 12.087 45.572 1.00 0.00 O ATOM 450 H SER L 31 53.852 15.150 46.480 1.00 0.00 H ATOM 451 HA SER L 31 55.175 14.292 44.689 1.00 0.00 H ATOM 452 1HB SER L 31 52.774 12.600 44.243 1.00 0.00 H ATOM 453 2HB SER L 31 54.375 12.383 43.555 1.00 0.00 H ATOM 454 HG SER L 31 53.773 11.248 45.560 1.00 0.00 H ATOM 455 N TYR L 32 53.460 16.259 43.402 1.00 0.00 N ATOM 456 CA TYR L 32 53.176 17.161 42.301 1.00 0.00 C ATOM 457 C TYR L 32 54.489 17.641 41.668 1.00 0.00 C ATOM 458 O TYR L 32 54.910 18.762 41.892 1.00 0.00 O ATOM 459 CB TYR L 32 52.342 18.352 42.780 1.00 0.00 C ATOM 460 CG TYR L 32 50.880 18.031 42.990 1.00 0.00 C ATOM 461 CD1 TYR L 32 50.294 16.936 42.371 1.00 0.00 C ATOM 462 CD2 TYR L 32 50.089 18.824 43.807 1.00 0.00 C ATOM 463 CE1 TYR L 32 48.958 16.639 42.559 1.00 0.00 C ATOM 464 CE2 TYR L 32 48.751 18.535 44.002 1.00 0.00 C ATOM 465 CZ TYR L 32 48.190 17.441 43.376 1.00 0.00 C ATOM 466 OH TYR L 32 46.859 17.150 43.569 1.00 0.00 O ATOM 467 H TYR L 32 53.456 16.618 44.347 1.00 0.00 H ATOM 468 HA TYR L 32 52.651 16.625 41.509 1.00 0.00 H ATOM 469 1HB TYR L 32 52.778 18.693 43.721 1.00 0.00 H ATOM 470 2HB TYR L 32 52.439 19.136 42.030 1.00 0.00 H ATOM 471 HD1 TYR L 32 50.908 16.305 41.727 1.00 0.00 H ATOM 472 HD2 TYR L 32 50.537 19.687 44.299 1.00 0.00 H ATOM 473 HE1 TYR L 32 48.512 15.775 42.066 1.00 0.00 H ATOM 474 HE2 TYR L 32 48.145 19.172 44.648 1.00 0.00 H ATOM 475 HH TYR L 32 46.462 17.658 44.280 1.00 0.00 H """ output, error = run_exec(self.entryPDBwithInsertioncodes, "babel -ipdb -ofasta") self.assertEqual(output.rstrip().rsplit("\n",1)[1], "VSSSY")