def base64_to_bfp(b64): packed = [ord(c) for c in b64.decode("base64")] unpacked = ''.join('{:08b}'.format(i) for i in packed) n = len(unpacked) on_bits = [idx for idx, val in enumerate(unpacked) if val == '1'] bfp = ExplicitBitVect(n) bfp.SetBitsFromList(on_bits) return bfp
def test3(self): examples = [ ['a', 1, 0, 1, 0, 1], ['b', 1, 0, 0, 0, 1], ['c', 1, 0, 1, 0, 0], ['d', 0, 1, 1, 0, 0], ['e', 0, 1, 1, 1, 0], ] nvars = len(examples[0]) - 2 attrs = list(range(1, nvars + 1)) npvals = [0] + [2] * nvars + [2] qBounds = [0] + [0] * nvars + [0] mdl = CrossValidate.makeNBClassificationModel(examples, attrs, npvals, qBounds) nWrong = 0 for eg in examples: p = mdl.ClassifyExample(eg) if p != eg[-1]: nWrong += 1 self.assertEqual(nWrong, 1) bitEx = [] for eg in examples: newEg = [eg[0], None, eg[-1]] bv = ExplicitBitVect(nvars) for i in range(nvars): if eg[i + 1]: bv.SetBit(i) newEg[1] = bv bitEx.append(newEg) attrs = list(range(nvars)) mdl2 = CrossValidate.makeNBClassificationModel(bitEx, attrs, npvals, qBounds, useSigs=True) nWrong = 0 for eg in bitEx: p = mdl2.ClassifyExample(eg) if p != eg[-1]: nWrong += 1 self.assertEqual(nWrong, 1) # now compare: for i in range(len(bitEx)): eg = examples[i] p1 = mdl.ClassifyExample(eg) bitEg = bitEx[i] p2 = mdl2.ClassifyExample(bitEg) self.assertEqual(p1, p2) v1 = mdl.GetClassificationDetails() v2 = mdl.GetClassificationDetails() self.assertAlmostEqual(v1, v2, 4)
def test1(self): t1 = self.tree bv = ExplicitBitVect(5) ex = ['nm',bv] self.assertFalse(t1.ClassifyExample(ex)) bv.SetBit(1) self.assertTrue(t1.ClassifyExample(ex)) bv.SetBit(0) self.assertTrue(t1.ClassifyExample(ex)) bv.SetBit(2) self.assertFalse(t1.ClassifyExample(ex))
def test1(self): t1 = self.tree bv = ExplicitBitVect(5) ex = ['nm', bv] self.failIf(t1.ClassifyExample(ex)) bv.SetBit(1) self.failUnless(t1.ClassifyExample(ex)) bv.SetBit(0) self.failUnless(t1.ClassifyExample(ex)) bv.SetBit(2) self.failIf(t1.ClassifyExample(ex))
def test4(self): examples = [ ['a', 1, 0, 1, 0, 1], ['b', 1, 0, 0, 0, 1], ['c', 1, 0, 1, 0, 0], ['d', 0, 1, 1, 0, 0], ['e', 0, 1, 1, 1, 0], ] nvars = len(examples[0]) - 2 attrs = range(1, nvars + 1) origNVars = nvars nvars = 10 npvals = [0] + [2] * nvars + [2] qBounds = [0] + [0] * nvars + [0] bitEx = [] for eg in examples: newEg = [eg[0], None, eg[-1]] bv = ExplicitBitVect(nvars) for i in range(origNVars): if eg[i + 1]: bv.SetBit(i) # this bit will yield perfect accuracy if # the attrs argument isn't being used properly: if eg[-1]: bv.SetBit(origNVars) newEg[1] = bv bitEx.append(newEg) attrs = range(origNVars) mdl2 = CrossValidate.makeNBClassificationModel(bitEx, attrs, npvals, qBounds, useSigs=True) nWrong = 0 for eg in bitEx: p = mdl2.ClassifyExample(eg) if p != eg[-1]: nWrong += 1 self.assertEqual(nWrong, 1)
def _test5(self): # disabled because CMIM was removed # pragma: nocover examples = [ ['a', 1, 0, 1, 0, 1, 1, 0, 1], ['b', 1, 0, 0, 0, 1, 0, 0, 1], ['c', 1, 0, 1, 0, 1, 1, 0, 0], ['d', 0, 1, 1, 0, 1, 0, 0, 0], ['e', 0, 1, 1, 1, 0, 1, 0, 0], ] nvars = len(examples[0]) - 2 npvals = [0] + [2] * nvars + [2] qBounds = [0] + [0] * nvars + [0] bitEx = [] for eg in examples: newEg = [eg[0], None, eg[-1]] bv = ExplicitBitVect(nvars) for i in range(nvars): if eg[i + 1]: bv.SetBit(i) # this bit will yield perfect accuracy if # the attrs argument isn't being used properly: newEg[1] = bv bitEx.append(newEg) attrs = list(range(nvars)) mdl2 = CrossValidate.makeNBClassificationModel(bitEx, attrs, npvals, qBounds, useSigs=True, useCMIM=2) nWrong = 0 for eg in bitEx: p = mdl2.ClassifyExample(eg) if p != eg[-1]: nWrong += 1 self.assertEqual(nWrong, 1)
def test2(self): t1 = self.tree vc = VectCollection() bv = ExplicitBitVect(5) bv.SetBitsFromList([0]) vc.AddVect(1,bv) bv = ExplicitBitVect(5) bv.SetBitsFromList([1,2]) vc.AddVect(2,bv) ex = ['nm',bv,1] self.assertTrue(t1.ClassifyExample(ex)) bv = ExplicitBitVect(5) bv.SetBitsFromList([0,2]) vc.AddVect(1,bv) ex = ['nm',bv,1] self.assertFalse(t1.ClassifyExample(ex))
def build_from_mongo(biochem_modelseed): rxn_fingerprints = {} for document in biochem_modelseed.find(): #print(rxn_id) rxn_id = document['_id'] #stoich = document['neutral_smiles_stoich'] if 'diff' in document and 'stru' in document: #print(stoich) diff_bytes = document['diff'] stru_bytes = document['stru'] #print(len(diff_bytes), len(stru_bytes)) #smarts = make_rxn_smarts(stoich) #print(smarts) #stru_bytes2, diff_bytes2 = make_fingerprints(smarts) #print(len(diff_bytes2.ToBinary()), len(stru_bytes2.ToBinary())) diff_fingerprints = UIntSparseIntVect(diff_bytes) stru_fingerprints = ExplicitBitVect(stru_bytes) rxn_fingerprints[rxn_id] = (stru_fingerprints, diff_fingerprints) matcher = FingerprintMatcher() matcher.fingerprints = rxn_fingerprints return matcher
def pandas_series_to_bv(s): bv = ExplicitBitVect(len(s)) on_bits = np.where(s >= True)[0].tolist() bv.SetBitsFromList(on_bits) return bv
def test3(self): from BuildSigTree import BuildSigTree examples = [] bv = ExplicitBitVect(2) vc = VectCollection() vc.AddVect(1,bv) examples.append(['a',vc,1]) bv = ExplicitBitVect(2) bv.SetBit(1) vc = VectCollection() vc.AddVect(1,bv) examples.append(['c',vc,0]) bv = ExplicitBitVect(2) bv.SetBit(1) vc = VectCollection() vc.AddVect(1,bv) examples.append(['c2',vc,0]) bv = ExplicitBitVect(2) bv.SetBit(0) vc = VectCollection() vc.AddVect(1,bv) examples.append(['d',vc,0]) bv = ExplicitBitVect(2) bv.SetBit(0) vc = VectCollection() vc.AddVect(1,bv) bv = ExplicitBitVect(2) bv.SetBit(1) vc.AddVect(2,bv) examples.append(['d2',vc,0]) bv = ExplicitBitVect(2) bv.SetBit(0) bv.SetBit(1) vc = VectCollection() vc.AddVect(1,bv) examples.append(['d',vc,1]) bv = ExplicitBitVect(2) bv.SetBit(0) bv.SetBit(1) vc = VectCollection() vc.AddVect(1,bv) examples.append(['e',vc,1]) t = BuildSigTree(examples,2,metric=InfoTheory.InfoType.ENTROPY, maxDepth=2,verbose=0) self.assertEqual(t.GetName(),'Bit-0') self.assertEqual(t.GetLabel(),0) c0 = t.GetChildren()[0] self.assertEqual(c0.GetName(),'Bit-1') self.assertEqual(c0.GetLabel(),1) c1 = t.GetChildren()[1] self.assertEqual(c1.GetName(),'Bit-1') self.assertEqual(c1.GetLabel(),1) bv = ExplicitBitVect(2) bv.SetBit(0) vc = VectCollection() vc.AddVect(1,bv) bv = ExplicitBitVect(2) bv.SetBit(1) vc.AddVect(2,bv) r = t.ClassifyExample(['t',vc,0]) self.assertEqual(r,0)
def hex_to_stru_fingerprints(hex_str): bts = binascii.unhexlify(hex_str) fp = ExplicitBitVect(bts) return fp
def test3(self): examples = [] bv = ExplicitBitVect(2) vc = VectCollection() vc.AddVect(1, bv) examples.append(['a', vc, 1]) bv = ExplicitBitVect(2) bv.SetBit(1) vc = VectCollection() vc.AddVect(1, bv) examples.append(['c', vc, 0]) bv = ExplicitBitVect(2) bv.SetBit(1) vc = VectCollection() vc.AddVect(1, bv) examples.append(['c2', vc, 0]) bv = ExplicitBitVect(2) bv.SetBit(0) vc = VectCollection() vc.AddVect(1, bv) examples.append(['d', vc, 0]) bv = ExplicitBitVect(2) bv.SetBit(0) vc = VectCollection() vc.AddVect(1, bv) bv = ExplicitBitVect(2) bv.SetBit(1) vc.AddVect(2, bv) examples.append(['d2', vc, 0]) bv = ExplicitBitVect(2) bv.SetBit(0) bv.SetBit(1) vc = VectCollection() vc.AddVect(1, bv) examples.append(['d', vc, 1]) bv = ExplicitBitVect(2) bv.SetBit(0) bv.SetBit(1) vc = VectCollection() vc.AddVect(1, bv) examples.append(['e', vc, 1]) f = StringIO() with redirect_stdout(f): t = BuildSigTree(examples, 2, metric=InfoTheory.InfoType.ENTROPY, maxDepth=2, verbose=True) self.assertIn('Build', f.getvalue()) self.assertEqual(t.GetName(), 'Bit-0') self.assertEqual(t.GetLabel(), 0) c0 = t.GetChildren()[0] self.assertEqual(c0.GetName(), 'Bit-1') self.assertEqual(c0.GetLabel(), 1) c1 = t.GetChildren()[1] self.assertEqual(c1.GetName(), 'Bit-1') self.assertEqual(c1.GetLabel(), 1) bv = ExplicitBitVect(2) bv.SetBit(0) vc = VectCollection() vc.AddVect(1, bv) bv = ExplicitBitVect(2) bv.SetBit(1) vc.AddVect(2, bv) r = t.ClassifyExample(['t', vc, 0]) self.assertEqual(r, 0)