def testChiralMorgans(self): try: fname = tempfile.mktemp()+".smi" storefname = tempfile.mktemp()+".store" with open(fname, 'w') as f: f.write(many_smiles), opts = make_store.MakeStorageOptions( storage=storefname, smilesfile=fname, hasHeader=False, smilesColumn=0, nameColumn=1, seperator=" ", descriptors="MorganChiral3Counts", index_inchikey=True ) make_store.make_store(opts) generator = DescriptorGenerator.REGISTRY["MorganChiral3Counts".lower()] with contextlib.closing(DescriptaStore(storefname)) as store: for i in range(10): r = store.descriptors().get(i) self.assertEqual(r, expected_chiral_data[i]) finally: if os.path.exists(fname): os.unlink(fname) if os.path.exists(storefname): shutil.rmtree(storefname)
def testCanonicalSmiles2(self): try: fname = tempfile.mktemp() + ".smi" storefname = tempfile.mktemp() + ".store" with open(fname, 'w') as f: f.write(many_smiles) opts = make_store.MakeStorageOptions(storage=storefname, smilesfile=fname, hasHeader=False, smilesColumn=0, nameColumn=1, seperator=" ", descriptors="Canonicalize", index_inchikey=False) make_store.make_store(opts) with contextlib.closing(DescriptaStore(storefname)) as store: counts = [] for i in range(10): r = store.descriptors().get(i) counts.append(r[0]) counts.sort() self.assertEqual(counts, list(range(8, 18))) finally: if os.path.exists(fname): os.unlink(fname) if os.path.exists(storefname): shutil.rmtree(storefname)
def testRawNones(self): try: fname = tempfile.mktemp() + ".smi" storefname = tempfile.mktemp() + ".store" with open(fname, 'w') as f: f.write(one_smiles) opts = make_store.MakeStorageOptions(storage=storefname, smilesfile=fname, hasHeader=False, smilesColumn=0, nameColumn=1, seperator=" ", descriptors="NANDescriptors", index_inchikey=True) try: make_store.make_store(opts) self.assertFalse(True) # should be a type error except TypeError as e: self.assertTrue("For column" in str(e)) self.assertTrue("can't convert" in str(e)) finally: if os.path.exists(fname): os.unlink(fname) if os.path.exists(storefname): shutil.rmtree(storefname)
def testNonesWithCalcFlags(self): try: fname = tempfile.mktemp() + ".smi" storefname = tempfile.mktemp() + ".store" with open(fname, 'w') as f: f.write(one_smiles) opts = make_store.MakeStorageOptions( storage=storefname, smilesfile=fname, hasHeader=False, smilesColumn=0, nameColumn=1, seperator=" ", descriptors="NANDescriptorsWithCalcFlags", index_inchikey=True) make_store.make_store(opts) with contextlib.closing(DescriptaStore(storefname)) as store: self.assertFalse(store.descriptors().get(0)[0]) finally: if os.path.exists(fname): os.unlink(fname) if os.path.exists(storefname): shutil.rmtree(storefname)
def testRDKitFPBits(self): try: fname = tempfile.mktemp() + ".smi" storefname = tempfile.mktemp() + ".store" with open(fname, 'w') as f: f.write("\n".join( ['{0} {1}'.format(v, k) for k, v in testSmiles.items()])) opts = make_store.MakeStorageOptions(storage=storefname, smilesfile=fname, hasHeader=False, smilesColumn=0, nameColumn=1, seperator=" ", descriptors="RDKitFPBits", index_inchikey=False) make_store.make_store(opts) with contextlib.closing(DescriptaStore(storefname)) as store: for i in range(4): r = store.descriptors().get(i) self.assertEqual(r, expected_RDKFP_data[i]) finally: if os.path.exists(fname): os.unlink(fname) if os.path.exists(storefname): shutil.rmtree(storefname)
def testNormalized(self): try: fname = tempfile.mktemp() + ".smi" storefname = tempfile.mktemp() + ".store" with open(fname, 'w') as f: f.write(many_smiles), opts = make_store.MakeStorageOptions( storage=storefname, smilesfile=fname, hasHeader=False, smilesColumn=0, nameColumn=1, seperator=" ", descriptors="RDKit2DNormalized", index_inchikey=True) make_store.make_store(opts) generator = DescriptorGenerator.REGISTRY[ "RDKit2DNormalized".lower()] results = [] with contextlib.closing(DescriptaStore(storefname)) as store: for i in range(10): r = store.descriptors().get(i) compare_results(self, r, expected[i]) finally: if os.path.exists(fname): os.unlink(fname) if os.path.exists(storefname): shutil.rmtree(storefname)
def testAppend(self): try: fname = tempfile.mktemp() + ".smi" fname2 = tempfile.mktemp() + "-2.smi" storefname = tempfile.mktemp() + ".store" with open(fname, 'w') as f: f.write(one_smiles) opts = make_store.MakeStorageOptions(storage=storefname, smilesfile=fname, hasHeader=False, batchsize=1, smilesColumn=0, nameColumn=1, seperator=" ", descriptors="RDKit2DSubset", index_inchikey=True) make_store.make_store(opts) with contextlib.closing(DescriptaStore(storefname)) as store: self.assertEqual(store.lookupName("0"), 0) self.assertEqual( store.lookupInchiKey("UHOVQNZJYSORNB-UHFFFAOYSA-N"), [0]) self.assertEqual(store.descriptors().get(0), (True, 78.046950192, 0.0, 1.0, 0.0, 1.0)) # now append some junk with open(fname2, 'w') as f: f.write(two_smiles) opts.smilesfile = fname2 append_store.append_store(opts) with contextlib.closing(DescriptaStore(storefname)) as store: self.assertEqual(len(store), 2) self.assertEqual(store.lookupName("0"), 0) self.assertEqual( store.lookupInchiKey("UHOVQNZJYSORNB-UHFFFAOYSA-N"), [0, 1]) self.assertEqual(store.descriptors().get(0), (True, 78.046950192, 0.0, 1.0, 0.0, 1.0)) self.assertEqual(store.lookupName("1"), 1) self.assertEqual( store.lookupInchiKey("UHOVQNZJYSORNB-UHFFFAOYSA-N"), [0, 1]) self.assertEqual(store.descriptors().get(1), (True, 78.046950192, 0.0, 1.0, 0.0, 1.0)) finally: if os.path.exists(fname): os.unlink(fname) if os.path.exists(fname2): os.unlink(fname2) if os.path.exists(storefname): shutil.rmtree(storefname)
def testManyNoInchi(self): try: fname = tempfile.mktemp() + ".smi" storefname = tempfile.mktemp() + ".store" with open(fname, 'w') as f: f.write(many_smiles) opts = make_store.MakeStorageOptions(storage=storefname, smilesfile=fname, hasHeader=False, batchsize=1, smilesColumn=0, nameColumn=1, seperator=" ", descriptors="RDKit2DSubset", index_inchikey=False) make_store.make_store(opts) origdata = many_smiles.split("\n") with contextlib.closing(DescriptaStore(storefname)) as store: for i in range(10): self.assertEqual(store.lookupName(str(i)), i) self.assertEqual(store.descriptors().get(0), (True, 78.046950192, 0.0, 1.0, 0.0, 1.0)) self.assertEqual(store.descriptors().get(1), (True, 92.062600256, 0.0, 1.0, 0.0, 1.0)) self.assertEqual(store.descriptors().get(2), (True, 106.07825032, 0.0, 1.0, 0.0, 1.0)) self.assertEqual(store.descriptors().get(3), (True, 120.093900384, 0.0, 1.0, 0.0, 1.0)) self.assertEqual(store.descriptors().get(4), (True, 134.109550448, 0.0, 1.0, 0.0, 1.0)) self.assertEqual(store.descriptors().get(5), (True, 148.125200512, 0.0, 1.0, 0.0, 1.0)) self.assertEqual(store.descriptors().get(6), (True, 162.140850576, 0.0, 1.0, 0.0, 1.0)) self.assertEqual(store.descriptors().get(7), (True, 176.15650064, 0.0, 1.0, 0.0, 1.0)) self.assertEqual(store.descriptors().get(8), (True, 190.172150704, 0.0, 1.0, 0.0, 1.0)) self.assertEqual(store.descriptors().get(9), (True, 204.187800768, 0.0, 1.0, 0.0, 1.0)) self.assertEqual(store.descriptors().get(10), (False, 0.0, 0.0, 0.0, 0.0, 0.0)) for i in range(10): m = store.molIndex().getRDMol(i) smiles, name = store.molIndex().get(i) self.assertEqual(name, str(i)) self.assertEqual(smiles, origdata[i].split()[0]) finally: if os.path.exists(fname): os.unlink(fname) if os.path.exists(storefname): shutil.rmtree(storefname)
def testColCache(self): try: fname = tempfile.mktemp() + ".smi" storefname = tempfile.mktemp() + ".store" with open(fname, 'w') as f: f.write(many_smiles) opts = make_store.MakeStorageOptions(storage=storefname, smilesfile=fname, hasHeader=False, smilesColumn=0, nameColumn=1, seperator=" ", descriptors="RDKit2DSubset", index_inchikey=True) make_store.make_store(opts) with contextlib.closing(DescriptaStore(storefname)) as store: cols = [] # get normal data for idx, _ in enumerate(store.db.colnames): col = list(store.db.getColByIdx(idx)) cols.append(col) # cache the columns store.db.cacheColumns() # make sure the datafiles are written for idx, _ in enumerate(store.db.colnames): fn = os.path.join(store.db.colCacheDir, str(idx)) self.assertTrue(os.path.exists(fn), fn) for idx, _ in enumerate(store.db.colnames): col = list(store.db.getColByIdx(idx)) self.assertEqual(col, cols[idx]) # swap a data file idx = 0, 1 fn0 = os.path.join(store.db.colCacheDir, str(0)) fn1 = os.path.join(store.db.colCacheDir, str(1)) shutil.move(fn0, fn0 + ".bak") shutil.move(fn1, fn0) shutil.move(fn0 + ".bak", fn1) try: col = list(store.db.getColByIdx(0)) caught = false except struct.error: caught = True self.assertTrue( caught, "moving cache file should have broken the cache") finally: if os.path.exists(fname): os.unlink(fname) if os.path.exists(storefname): shutil.rmtree(storefname)
def main(): opts = parser.parse_args() if opts.verbose: logging.getLogger().setLevel(logging.INFO) if not opts.append: d = vars(opts) del d['append'] make_store.make_store(make_store.MakeStorageOptions(**d)) else: append_store.append_store( append_store.AppendStorageOptions(**vars(opts)))
def testMakeStore(self): fname = tempfile.mktemp()+".smi" storefname = tempfile.mktemp()+".store" with open(fname, 'w') as f: f.write(many_smiles) args = ["--index-inchikey", "--smilesColumn", "0", "--nameColumn", "1", "--seperator", " ", "--numprocs", "1", "--descriptors", "RDKit2DSubset", fname, storefname] try: opts = storus.parser.parse_args(args) logging.error(repr(opts)) make_store.make_store(make_store.MakeStorageOptions(**vars(opts))) with contextlib.closing(DescriptaStore(storefname)) as store: for i in range(10): self.assertEqual( store.lookupName(str(i)), i) self.assertEqual(store.descriptors().get(0), (True, 78.046950192, 0.0, 1.0, 0.0, 1.0)) self.assertEqual(store.descriptors().get(1), (True, 92.062600256, 0.0, 1.0, 0.0, 1.0)) self.assertEqual(store.descriptors().get(2), (True, 106.07825032, 0.0, 1.0, 0.0, 1.0)) self.assertEqual(store.descriptors().get(3), (True, 120.093900384, 0.0, 1.0, 0.0, 1.0)) self.assertEqual(store.descriptors().get(4), (True, 134.109550448, 0.0, 1.0, 0.0, 1.0)) self.assertEqual(store.descriptors().get(5), (True, 148.125200512, 0.0, 1.0, 0.0, 1.0)) self.assertEqual(store.descriptors().get(6), (True, 162.140850576, 0.0, 1.0, 0.0, 1.0)) self.assertEqual(store.descriptors().get(7), (True, 176.15650064, 0.0, 1.0, 0.0, 1.0)) self.assertEqual(store.descriptors().get(8), (True, 190.172150704, 0.0, 1.0, 0.0, 1.0)) self.assertEqual(store.descriptors().get(9), (True, 204.187800768, 0.0, 1.0, 0.0, 1.0)) self.assertEqual(store.descriptors().get(10), (False, 0.0, 0.0, 0.0, 0.0, 0.0)) self.assertEqual(store.descriptors().getDict(7), toDict((True, 176.15650064, 0.0, 1.0, 0.0, 1.0))) calc = store.getDescriptorCalculator() for i in range(10): m = store.molIndex().getRDMol(i) sm = AllChem.MolToSmiles(m) inchi = AllChem.InchiToInchiKey(AllChem.MolToInchi(m)) self.assertEqual( store.lookupInchiKey(inchi), [i]) v = store.descriptors().get(i) sv = tuple(calc.process(sm)) self.assertEqual(v, sv) finally: if os.path.exists(fname): os.unlink(fname) if os.path.exists(storefname): shutil.rmtree(storefname)
def main(): opts = parser.parse_args() if opts.verbose: logging.getLogger().setLevel(logging.INFO) if opts.append and opts.append_store: logging.error("Use one of --append --append-store") if opts.append: append_store.append_smiles(append_store.AppendStorageOptions(**vars(opts))) elif opts.append_store: append_store.append_store(append_store.AppendStorageOptions(**vars(opts))) else: d = vars(opts) del d['append'] make_store.make_store(make_store.MakeStorageOptions(**d))
def testOffByOne(self): try: fname = tempfile.mktemp() + ".smi" storefname = tempfile.mktemp() + ".store" with open(fname, 'w') as f: f.write(one_smiles) opts = make_store.MakeStorageOptions(storage=storefname, smilesfile=fname, hasHeader=False, batchsize=1, smilesColumn=0, nameColumn=1, seperator=" ", descriptors="RDKit2DSubset", index_inchikey=True) make_store.make_store(opts) with contextlib.closing(DescriptaStore(storefname)) as store: self.assertEqual(store.lookupName("0"), 0) self.assertEqual( store.lookupInchiKey("UHOVQNZJYSORNB-UHFFFAOYSA-N"), [0]) self.assertEqual(store.descriptors().get(0), (True, 78.046950192, 0.0, 1.0, 0.0, 1.0)) try: store.lookupInchiKey("MY DOG HAS FLEAS") self.assertTrue(False) # should not get here except KeyError: pass finally: if os.path.exists(fname): os.unlink(fname) if os.path.exists(storefname): shutil.rmtree(storefname)
def testAppendStore(self): fname = tempfile.mktemp()+".smi" fname2 = tempfile.mktemp()+"2.smi" storefname = tempfile.mktemp()+".store" with open(fname, 'w') as f: f.write(many_smiles) with open(fname2, 'w') as f: f.write(many_smiles2) # make the first store args = ["--index-inchikey", "--smilesColumn", "0", "--nameColumn", "1", "--seperator", " ", "--numprocs", "1", "--descriptors", "RDKit2DSubset", fname, storefname] args2 = ["--append", "--index-inchikey", "--smilesColumn", "0", "--nameColumn", "1", "--seperator", " ", "--numprocs", "1", "--descriptors", "RDKit2DSubset", fname2, storefname] try: opts = storus.parser.parse_args(args) make_store.make_store(make_store.MakeStorageOptions(**vars(opts))) opts = storus.parser.parse_args(args2) append_store.append_store(append_store.AppendStorageOptions(**vars(opts))) with contextlib.closing(DescriptaStore(storefname)) as store: for i in range(20): m = store.molIndex().getRDMol(i) if m: sm = AllChem.MolToSmiles(m) inchi = AllChem.InchiToInchiKey(AllChem.MolToInchi(m)) logging.info("%s: %s %s"%(i, sm, inchi)) else: logging.info("%s: nostruct"%i) for i in range(10): self.assertEqual( store.lookupName(str(i)), i) for i in range(10): m = store.molIndex().getRDMol(i) inchi = AllChem.InchiToInchiKey(AllChem.MolToInchi(m)) self.assertEqual( store.lookupInchiKey(inchi), [i, i+11]) self.assertEqual(store.descriptors().get(0), (True, 78.046950192, 0.0, 1.0, 0.0, 1.0)) self.assertEqual(store.descriptors().get(1), (True, 92.062600256, 0.0, 1.0, 0.0, 1.0)) self.assertEqual(store.descriptors().get(2), (True, 106.07825032, 0.0, 1.0, 0.0, 1.0)) self.assertEqual(store.descriptors().get(3), (True, 120.093900384, 0.0, 1.0, 0.0, 1.0)) self.assertEqual(store.descriptors().get(4), (True, 134.109550448, 0.0, 1.0, 0.0, 1.0)) self.assertEqual(store.descriptors().get(5), (True, 148.125200512, 0.0, 1.0, 0.0, 1.0)) self.assertEqual(store.descriptors().get(6), (True, 162.140850576, 0.0, 1.0, 0.0, 1.0)) self.assertEqual(store.descriptors().get(7), (True, 176.15650064, 0.0, 1.0, 0.0, 1.0)) self.assertEqual(store.descriptors().get(8), (True, 190.172150704, 0.0, 1.0, 0.0, 1.0)) self.assertEqual(store.descriptors().get(9), (True, 204.187800768, 0.0, 1.0, 0.0, 1.0)) self.assertEqual(store.descriptors().get(10), (False, 0.0, 0.0, 0.0, 0.0, 0.0)) self.assertEqual(store.descriptors().getDict(7), toDict((True, 176.15650064, 0.0, 1.0, 0.0, 1.0))) calc = store.getDescriptorCalculator() for i in range(10): m = store.molIndex().getRDMol(i) if m: sm = AllChem.MolToSmiles(m) inchi = AllChem.InchiToInchiKey(AllChem.MolToInchi(m)) logging.info("%s: %s"%(i, inchi)) self.assertEqual( store.lookupInchiKey(inchi), [i, i+11]) v = store.descriptors().get(i) sv = tuple(calc.process(sm)) self.assertEqual(v, sv) for i in range(10): m = store.molIndex().getRDMol(i) if not m: continue inchi = AllChem.InchiToInchiKey(AllChem.MolToInchi(m)) m = store.molIndex().getRDMol(i+11) self.assertTrue(m!=None) inchi2 = AllChem.InchiToInchiKey(AllChem.MolToInchi(m)) self.assertEqual(inchi, inchi2) self.assertEqual( store.lookupInchiKey(inchi), [i, i+11]) for i in range(2): self.assertEqual(store.descriptors().get(11+0), (True, 78.046950192, 0.0, 1.0, 0.0, 1.0)) self.assertEqual(store.descriptors().get(11+1), (True, 92.062600256, 0.0, 1.0, 0.0, 1.0)) self.assertEqual(store.descriptors().get(11+2), (True, 106.07825032, 0.0, 1.0, 0.0, 1.0)) self.assertEqual(store.descriptors().get(11+3), (True, 120.093900384, 0.0, 1.0, 0.0, 1.0)) self.assertEqual(store.descriptors().get(11+4), (True, 134.109550448, 0.0, 1.0, 0.0, 1.0)) self.assertEqual(store.descriptors().get(11+5), (True, 148.125200512, 0.0, 1.0, 0.0, 1.0)) self.assertEqual(store.descriptors().get(11+6), (True, 162.140850576, 0.0, 1.0, 0.0, 1.0)) self.assertEqual(store.descriptors().get(11+7), (True, 176.15650064, 0.0, 1.0, 0.0, 1.0)) self.assertEqual(store.descriptors().get(11+8), (True, 190.172150704, 0.0, 1.0, 0.0, 1.0)) self.assertEqual(store.descriptors().get(11+9), (True, 204.187800768, 0.0, 1.0, 0.0, 1.0)) self.assertEqual(store.descriptors().get(11+10), (False, 0.0, 0.0, 0.0, 0.0, 0.0)) finally: if os.path.exists(fname): os.unlink(fname) if os.path.exists(storefname): shutil.rmtree(storefname)
def testAppend(self): try: fname = tempfile.mktemp() + ".smi" storefname = tempfile.mktemp() + ".store" with open(fname, 'w') as f: f.write(many_smiles) opts = make_store.MakeStorageOptions(storage=storefname, smilesfile=fname, hasHeader=False, smilesColumn=0, nameColumn=1, seperator=" ", descriptors="RDKit2DSubset", index_inchikey=True) make_store.make_store(opts) with contextlib.closing(DescriptaStore(storefname)) as store: for i in range(10): self.assertEqual(store.lookupName(str(i)), i) for i in range(10): m = store.molIndex().getRDMol(i) inchi = AllChem.InchiToInchiKey(AllChem.MolToInchi(m)) self.assertEqual(store.lookupInchiKey(inchi), [i]) self.assertEqual(store.descriptors().get(0), (True, 78.046950192, 0.0, 1.0, 0.0, 1.0)) self.assertEqual(store.descriptors().get(1), (True, 92.062600256, 0.0, 1.0, 0.0, 1.0)) self.assertEqual(store.descriptors().get(2), (True, 106.07825032, 0.0, 1.0, 0.0, 1.0)) self.assertEqual(store.descriptors().get(3), (True, 120.093900384, 0.0, 1.0, 0.0, 1.0)) self.assertEqual(store.descriptors().get(4), (True, 134.109550448, 0.0, 1.0, 0.0, 1.0)) self.assertEqual(store.descriptors().get(5), (True, 148.125200512, 0.0, 1.0, 0.0, 1.0)) self.assertEqual(store.descriptors().get(6), (True, 162.140850576, 0.0, 1.0, 0.0, 1.0)) self.assertEqual(store.descriptors().get(7), (True, 176.15650064, 0.0, 1.0, 0.0, 1.0)) self.assertEqual(store.descriptors().get(8), (True, 190.172150704, 0.0, 1.0, 0.0, 1.0)) self.assertEqual(store.descriptors().get(9), (True, 204.187800768, 0.0, 1.0, 0.0, 1.0)) self.assertEqual(store.descriptors().get(10), (False, 0.0, 0.0, 0.0, 0.0, 0.0)) fname = tempfile.mktemp() + ".smi" with open(fname, 'w') as f: f.write(many_smiles2) opts.smilesfile = fname append_store.append_store(opts) with contextlib.closing(DescriptaStore(storefname)) as store: for i in range(10): self.assertEqual(store.lookupName(str(i)), i) for i in range(10): m = store.molIndex().getRDMol(i) inchi = AllChem.InchiToInchiKey(AllChem.MolToInchi(m)) m = store.molIndex().getRDMol(i + 11) self.assertTrue(m != None) inchi2 = AllChem.InchiToInchiKey(AllChem.MolToInchi(m)) self.assertEqual(inchi, inchi2) self.assertEqual(store.lookupInchiKey(inchi), [i, i + 11]) for i in range(2): self.assertEqual(store.descriptors().get(11 + 0), (True, 78.046950192, 0.0, 1.0, 0.0, 1.0)) self.assertEqual(store.descriptors().get(11 + 1), (True, 92.062600256, 0.0, 1.0, 0.0, 1.0)) self.assertEqual(store.descriptors().get(11 + 2), (True, 106.07825032, 0.0, 1.0, 0.0, 1.0)) self.assertEqual(store.descriptors().get(11 + 3), (True, 120.093900384, 0.0, 1.0, 0.0, 1.0)) self.assertEqual(store.descriptors().get(11 + 4), (True, 134.109550448, 0.0, 1.0, 0.0, 1.0)) self.assertEqual(store.descriptors().get(11 + 5), (True, 148.125200512, 0.0, 1.0, 0.0, 1.0)) self.assertEqual(store.descriptors().get(11 + 6), (True, 162.140850576, 0.0, 1.0, 0.0, 1.0)) self.assertEqual(store.descriptors().get(11 + 7), (True, 176.15650064, 0.0, 1.0, 0.0, 1.0)) self.assertEqual(store.descriptors().get(11 + 8), (True, 190.172150704, 0.0, 1.0, 0.0, 1.0)) self.assertEqual(store.descriptors().get(11 + 9), (True, 204.187800768, 0.0, 1.0, 0.0, 1.0)) self.assertEqual(store.descriptors().get(11 + 10), (False, 0.0, 0.0, 0.0, 0.0, 0.0)) finally: if os.path.exists(fname): os.unlink(fname) if os.path.exists(storefname): shutil.rmtree(storefname)