def popc_tanimoto(fp1, fp2): """Using C wrapped bitwise operations Code from: http://www.dalkescientific.com/writings/diary/archive/2020/09/28/simple_fps_fingerprint_search.html Requires you to run `python popc.py` to compile """ fp1 = DataStructs.BitVectToBinaryText(fp1) fp2 = DataStructs.BitVectToBinaryText(fp2) return _popc.lib.byte_tanimoto_256(fp1, fp2)
def smiles_to_fingerprint_bin(smiles, trust_smiles=False): mol = Chem.MolFromSmiles(smiles, sanitize=(not trust_smiles)) if mol is None: return None if trust_smiles: mol.UpdatePropertyCache() Chem.FastFindRings(mol) fp = rdMolDescriptors.GetMorganFingerprintAsBitVect(mol, 2, BITCOUNT) return DataStructs.BitVectToBinaryText(fp)
def smiles_to_fingerprint_bin(smiles, trust_smiles=False): mol = Chem.MolFromSmiles(smiles, sanitize=(not trust_smiles)) if mol is None: raise RuntimeError("Bad structure") if trust_smiles: mol.UpdatePropertyCache() Chem.FastFindRings(mol) fp = rdMolDescriptors.GetMorganFingerprintAsBitVect(mol, 2, BITCOUNT) canon_smiles = Chem.MolToSmiles(mol) canon_smiles = str.encode(canon_smiles) return DataStructs.BitVectToBinaryText(fp), canon_smiles
def test8BinText(self): bv = DataStructs.ExplicitBitVect(32) bv.SetBit(0) bv.SetBit(1) bv.SetBit(17) bv.SetBit(23) bv.SetBit(31) bv2 = DataStructs.CreateFromBinaryText(DataStructs.BitVectToBinaryText(bv)) self.assertEqual(bv, bv2) bv2 = DataStructs.CreateFromBinaryText("") self.assertEqual(bv2.GetNumBits(), 0)
def test9MultiFPBReaderEdges(self): basen = os.path.join(RDConfig.RDBaseDir, 'Code', 'DataStructs', 'testData') mfpbr = DataStructs.MultiFPBReader() mfpbr.Init() fps = "0000000000404000100000001000040000300040222000002004000240000020000000"+\ "8200010200000090000024040860070044003214820000220401054008018000226000"+\ "4800800140000042000080008008020482400000200410800000300430200800400000"+\ "0000080a0000800400010c800200648818100010880040" ebv = DataStructs.CreateFromFPSText(fps) bytes = DataStructs.BitVectToBinaryText(ebv) nbrs = mfpbr.GetTanimotoNeighbors(bytes, threshold=0.6) self.assertEqual(len(nbrs), 0)
def test8MultiFPBReaderContainsInitOnSearch(self): basen = os.path.join(RDConfig.RDBaseDir, 'Code', 'DataStructs', 'testData') mfpbr = DataStructs.MultiFPBReader(initOnSearch=True) self.assertEqual( mfpbr.AddReader( DataStructs.FPBReader( os.path.join(basen, "zinc_random200.1.patt.fpb"))), 1) self.assertEqual( mfpbr.AddReader( DataStructs.FPBReader( os.path.join(basen, "zinc_random200.2.patt.fpb"))), 2) self.assertEqual( mfpbr.AddReader( DataStructs.FPBReader( os.path.join(basen, "zinc_random200.3.patt.fpb"))), 3) self.assertEqual( mfpbr.AddReader( DataStructs.FPBReader( os.path.join(basen, "zinc_random200.4.patt.fpb"))), 4) fps = "40081010824820021000500010110410003000402b20285000a4040240010030050000"+\ "080001420040009000003d04086007080c03b31d920004220400074008098010206080"+\ "00488001080000c64002a00080000200024c2000602410049200340820200002400010"+\ "02200106090401056801080182006088101000088a0048" ebv = DataStructs.CreateFromFPSText(fps) bytes = DataStructs.BitVectToBinaryText(ebv) nbrs = mfpbr.GetContainingNeighbors(bytes, numThreads=4) self.assertEqual(len(nbrs), 9) self.assertEqual(nbrs[0][0], 160) self.assertEqual(nbrs[0][1], 0) self.assertEqual(nbrs[1][0], 163) self.assertEqual(nbrs[1][1], 0) self.assertEqual(nbrs[2][0], 170) self.assertEqual(nbrs[2][1], 0) self.assertEqual(nbrs[3][0], 180) self.assertEqual(nbrs[3][1], 2) self.assertEqual(nbrs[4][0], 182) self.assertEqual(nbrs[4][1], 3) self.assertEqual(nbrs[5][0], 185) self.assertEqual(nbrs[5][1], 0) self.assertEqual(nbrs[6][0], 189) self.assertEqual(nbrs[6][1], 0) self.assertEqual(nbrs[7][0], 192) self.assertEqual(nbrs[7][1], 3) self.assertEqual(nbrs[8][0], 193) self.assertEqual(nbrs[8][1], 0)
def test6MultiFPBReaderTani(self): basen = os.path.join(RDConfig.RDBaseDir, 'Code', 'DataStructs', 'testData') mfpbr = DataStructs.MultiFPBReader() self.assertEqual( mfpbr.AddReader( DataStructs.FPBReader( os.path.join(basen, "zinc_random200.1.patt.fpb"))), 1) self.assertEqual( mfpbr.AddReader( DataStructs.FPBReader( os.path.join(basen, "zinc_random200.2.patt.fpb"))), 2) self.assertEqual( mfpbr.AddReader( DataStructs.FPBReader( os.path.join(basen, "zinc_random200.3.patt.fpb"))), 3) self.assertEqual( mfpbr.AddReader( DataStructs.FPBReader( os.path.join(basen, "zinc_random200.4.patt.fpb"))), 4) mfpbr.Init() self.assertEqual(mfpbr.GetNumBits(), 1024) self.assertEqual(len(mfpbr), 4) fps = "0000000000404000100000001000040000300040222000002004000240000020000000"+\ "8200010200000090000024040860070044003214820000220401054008018000226000"+\ "4800800140000042000080008008020482400000200410800000300430200800400000"+\ "0000080a0000800400010c800200648818100010880040" ebv = DataStructs.CreateFromFPSText(fps) bytes = DataStructs.BitVectToBinaryText(ebv) nbrs = mfpbr.GetTanimotoNeighbors(bytes, threshold=0.6) self.assertEqual(len(nbrs), 6) self.assertAlmostEqual(nbrs[0][0], 0.66412, 4) self.assertEqual(nbrs[0][1], 0) self.assertEqual(nbrs[0][2], 3) self.assertAlmostEqual(nbrs[1][0], 0.65289, 4) self.assertEqual(nbrs[1][1], 1) self.assertEqual(nbrs[1][2], 2) self.assertAlmostEqual(nbrs[2][0], 0.64341, 4) self.assertEqual(nbrs[2][1], 2) self.assertEqual(nbrs[2][2], 1) self.assertAlmostEqual(nbrs[3][0], 0.61940, 4) self.assertEqual(nbrs[3][1], 1) self.assertEqual(nbrs[3][2], 0) self.assertAlmostEqual(nbrs[4][0], 0.61905, 4) self.assertEqual(nbrs[4][1], 0) self.assertEqual(nbrs[4][2], 0) self.assertAlmostEqual(nbrs[5][0], 0.61344, 4) self.assertEqual(nbrs[5][1], 0) self.assertEqual(nbrs[5][2], 1) # test multi-threaded (won't do anything if the RDKit isn't compiled with threads support) nbrs = mfpbr.GetTanimotoNeighbors(bytes, threshold=0.6, numThreads=4) self.assertEqual(len(nbrs), 6) self.assertAlmostEqual(nbrs[0][0], 0.66412, 4) self.assertEqual(nbrs[0][1], 0) self.assertEqual(nbrs[0][2], 3) self.assertAlmostEqual(nbrs[1][0], 0.65289, 4) self.assertEqual(nbrs[1][1], 1) self.assertEqual(nbrs[1][2], 2) self.assertAlmostEqual(nbrs[2][0], 0.64341, 4) self.assertEqual(nbrs[2][1], 2) self.assertEqual(nbrs[2][2], 1) self.assertAlmostEqual(nbrs[3][0], 0.61940, 4) self.assertEqual(nbrs[3][1], 1) self.assertEqual(nbrs[3][2], 0) self.assertAlmostEqual(nbrs[4][0], 0.61905, 4) self.assertEqual(nbrs[4][1], 0) self.assertEqual(nbrs[4][2], 0) self.assertAlmostEqual(nbrs[5][0], 0.61344, 4) self.assertEqual(nbrs[5][1], 0) self.assertEqual(nbrs[5][2], 1)
def get_prep_value(self, value): # convert the ExplicitBitVect instance to the value used by the # db driver if isinstance(value, ExplicitBitVect): value = six.memoryview(DataStructs.BitVectToBinaryText(value)) return value
def process(value): if isinstance(value, ExplicitBitVect): value = memoryview(DataStructs.BitVectToBinaryText(value)) return value