Ejemplo n.º 1
0
def base64_to_bfp(b64):
    packed = [ord(c) for c in b64.decode("base64")]
    unpacked = ''.join('{:08b}'.format(i) for i in packed)
    n = len(unpacked)
    on_bits = [idx for idx, val in enumerate(unpacked) if val == '1']
    bfp = ExplicitBitVect(n)
    bfp.SetBitsFromList(on_bits)
    return bfp
Ejemplo n.º 2
0
    def test3(self):
        examples = [
            ['a', 1, 0, 1, 0, 1],
            ['b', 1, 0, 0, 0, 1],
            ['c', 1, 0, 1, 0, 0],
            ['d', 0, 1, 1, 0, 0],
            ['e', 0, 1, 1, 1, 0],
        ]

        nvars = len(examples[0]) - 2
        attrs = list(range(1, nvars + 1))
        npvals = [0] + [2] * nvars + [2]
        qBounds = [0] + [0] * nvars + [0]
        mdl = CrossValidate.makeNBClassificationModel(examples, attrs, npvals,
                                                      qBounds)
        nWrong = 0
        for eg in examples:
            p = mdl.ClassifyExample(eg)
            if p != eg[-1]:
                nWrong += 1
        self.assertEqual(nWrong, 1)

        bitEx = []
        for eg in examples:
            newEg = [eg[0], None, eg[-1]]
            bv = ExplicitBitVect(nvars)
            for i in range(nvars):
                if eg[i + 1]:
                    bv.SetBit(i)
            newEg[1] = bv
            bitEx.append(newEg)

        attrs = list(range(nvars))
        mdl2 = CrossValidate.makeNBClassificationModel(bitEx,
                                                       attrs,
                                                       npvals,
                                                       qBounds,
                                                       useSigs=True)
        nWrong = 0
        for eg in bitEx:
            p = mdl2.ClassifyExample(eg)
            if p != eg[-1]:
                nWrong += 1
        self.assertEqual(nWrong, 1)

        # now compare:
        for i in range(len(bitEx)):
            eg = examples[i]
            p1 = mdl.ClassifyExample(eg)
            bitEg = bitEx[i]
            p2 = mdl2.ClassifyExample(bitEg)
            self.assertEqual(p1, p2)
            v1 = mdl.GetClassificationDetails()
            v2 = mdl.GetClassificationDetails()
            self.assertAlmostEqual(v1, v2, 4)
Ejemplo n.º 3
0
  def test1(self):
    t1 = self.tree
    bv = ExplicitBitVect(5)
    
    ex = ['nm',bv]
    self.assertFalse(t1.ClassifyExample(ex))
    bv.SetBit(1)
    self.assertTrue(t1.ClassifyExample(ex))
    
    bv.SetBit(0)
    self.assertTrue(t1.ClassifyExample(ex))

    bv.SetBit(2)
    self.assertFalse(t1.ClassifyExample(ex))
Ejemplo n.º 4
0
    def test1(self):
        t1 = self.tree
        bv = ExplicitBitVect(5)

        ex = ['nm', bv]
        self.failIf(t1.ClassifyExample(ex))
        bv.SetBit(1)
        self.failUnless(t1.ClassifyExample(ex))

        bv.SetBit(0)
        self.failUnless(t1.ClassifyExample(ex))

        bv.SetBit(2)
        self.failIf(t1.ClassifyExample(ex))
Ejemplo n.º 5
0
    def test4(self):
        examples = [
            ['a', 1, 0, 1, 0, 1],
            ['b', 1, 0, 0, 0, 1],
            ['c', 1, 0, 1, 0, 0],
            ['d', 0, 1, 1, 0, 0],
            ['e', 0, 1, 1, 1, 0],
        ]

        nvars = len(examples[0]) - 2
        attrs = range(1, nvars + 1)
        origNVars = nvars
        nvars = 10
        npvals = [0] + [2] * nvars + [2]
        qBounds = [0] + [0] * nvars + [0]

        bitEx = []
        for eg in examples:
            newEg = [eg[0], None, eg[-1]]
            bv = ExplicitBitVect(nvars)
            for i in range(origNVars):
                if eg[i + 1]:
                    bv.SetBit(i)

            # this bit will yield perfect accuracy if
            #  the attrs argument isn't being used properly:
            if eg[-1]:
                bv.SetBit(origNVars)
            newEg[1] = bv
            bitEx.append(newEg)

        attrs = range(origNVars)
        mdl2 = CrossValidate.makeNBClassificationModel(bitEx,
                                                       attrs,
                                                       npvals,
                                                       qBounds,
                                                       useSigs=True)
        nWrong = 0
        for eg in bitEx:
            p = mdl2.ClassifyExample(eg)
            if p != eg[-1]:
                nWrong += 1
        self.assertEqual(nWrong, 1)
Ejemplo n.º 6
0
    def _test5(self):  # disabled because CMIM was removed # pragma: nocover
        examples = [
            ['a', 1, 0, 1, 0, 1, 1, 0, 1],
            ['b', 1, 0, 0, 0, 1, 0, 0, 1],
            ['c', 1, 0, 1, 0, 1, 1, 0, 0],
            ['d', 0, 1, 1, 0, 1, 0, 0, 0],
            ['e', 0, 1, 1, 1, 0, 1, 0, 0],
        ]

        nvars = len(examples[0]) - 2
        npvals = [0] + [2] * nvars + [2]
        qBounds = [0] + [0] * nvars + [0]

        bitEx = []
        for eg in examples:
            newEg = [eg[0], None, eg[-1]]
            bv = ExplicitBitVect(nvars)
            for i in range(nvars):
                if eg[i + 1]:
                    bv.SetBit(i)

            # this bit will yield perfect accuracy if
            #  the attrs argument isn't being used properly:
            newEg[1] = bv
            bitEx.append(newEg)

        attrs = list(range(nvars))
        mdl2 = CrossValidate.makeNBClassificationModel(bitEx,
                                                       attrs,
                                                       npvals,
                                                       qBounds,
                                                       useSigs=True,
                                                       useCMIM=2)
        nWrong = 0
        for eg in bitEx:
            p = mdl2.ClassifyExample(eg)
            if p != eg[-1]:
                nWrong += 1
        self.assertEqual(nWrong, 1)
Ejemplo n.º 7
0
  def test2(self):
    t1 = self.tree
    vc = VectCollection()

    bv = ExplicitBitVect(5)
    bv.SetBitsFromList([0])
    vc.AddVect(1,bv)

    bv = ExplicitBitVect(5)
    bv.SetBitsFromList([1,2])
    vc.AddVect(2,bv)

    ex = ['nm',bv,1]
    self.assertTrue(t1.ClassifyExample(ex))
    
    bv = ExplicitBitVect(5)
    bv.SetBitsFromList([0,2])
    vc.AddVect(1,bv)
    ex = ['nm',bv,1]
    self.assertFalse(t1.ClassifyExample(ex))
Ejemplo n.º 8
0
def build_from_mongo(biochem_modelseed):
    rxn_fingerprints = {}

    for document in biochem_modelseed.find():
        #print(rxn_id)
        rxn_id = document['_id']
        #stoich = document['neutral_smiles_stoich']
        if 'diff' in document and 'stru' in document:
            #print(stoich)
            diff_bytes = document['diff']
            stru_bytes = document['stru']
            #print(len(diff_bytes), len(stru_bytes))
            #smarts = make_rxn_smarts(stoich)
            #print(smarts)
            #stru_bytes2, diff_bytes2 = make_fingerprints(smarts)
            #print(len(diff_bytes2.ToBinary()), len(stru_bytes2.ToBinary()))
            diff_fingerprints = UIntSparseIntVect(diff_bytes)
            stru_fingerprints = ExplicitBitVect(stru_bytes)
            rxn_fingerprints[rxn_id] = (stru_fingerprints, diff_fingerprints)

    matcher = FingerprintMatcher()
    matcher.fingerprints = rxn_fingerprints
    return matcher
Ejemplo n.º 9
0
def pandas_series_to_bv(s):
    bv = ExplicitBitVect(len(s))
    on_bits = np.where(s >= True)[0].tolist()
    bv.SetBitsFromList(on_bits)
    return bv
Ejemplo n.º 10
0
  def test3(self):
    from BuildSigTree import BuildSigTree
    examples = []

    bv = ExplicitBitVect(2)
    vc = VectCollection()
    vc.AddVect(1,bv)
    examples.append(['a',vc,1])

    bv = ExplicitBitVect(2)
    bv.SetBit(1)
    vc = VectCollection()
    vc.AddVect(1,bv)
    examples.append(['c',vc,0])

    bv = ExplicitBitVect(2)
    bv.SetBit(1)
    vc = VectCollection()
    vc.AddVect(1,bv)
    examples.append(['c2',vc,0])

    bv = ExplicitBitVect(2)
    bv.SetBit(0)
    vc = VectCollection()
    vc.AddVect(1,bv)
    examples.append(['d',vc,0])

    bv = ExplicitBitVect(2)
    bv.SetBit(0)
    vc = VectCollection()
    vc.AddVect(1,bv)
    bv = ExplicitBitVect(2)
    bv.SetBit(1)
    vc.AddVect(2,bv)
    examples.append(['d2',vc,0])

    bv = ExplicitBitVect(2)
    bv.SetBit(0)
    bv.SetBit(1)
    vc = VectCollection()
    vc.AddVect(1,bv)
    examples.append(['d',vc,1])

    bv = ExplicitBitVect(2)
    bv.SetBit(0)
    bv.SetBit(1)
    vc = VectCollection()
    vc.AddVect(1,bv)
    examples.append(['e',vc,1])

    t = BuildSigTree(examples,2,metric=InfoTheory.InfoType.ENTROPY,
                     maxDepth=2,verbose=0)

    self.assertEqual(t.GetName(),'Bit-0')
    self.assertEqual(t.GetLabel(),0)
    c0 = t.GetChildren()[0]
    self.assertEqual(c0.GetName(),'Bit-1')
    self.assertEqual(c0.GetLabel(),1)
    c1 = t.GetChildren()[1]
    self.assertEqual(c1.GetName(),'Bit-1')
    self.assertEqual(c1.GetLabel(),1)

    bv = ExplicitBitVect(2)
    bv.SetBit(0)
    vc = VectCollection()
    vc.AddVect(1,bv)
    bv = ExplicitBitVect(2)
    bv.SetBit(1)
    vc.AddVect(2,bv)
    r = t.ClassifyExample(['t',vc,0])
    self.assertEqual(r,0)
Ejemplo n.º 11
0
def hex_to_stru_fingerprints(hex_str):
    bts = binascii.unhexlify(hex_str)
    fp = ExplicitBitVect(bts)
    return fp
Ejemplo n.º 12
0
    def test3(self):
        examples = []

        bv = ExplicitBitVect(2)
        vc = VectCollection()
        vc.AddVect(1, bv)
        examples.append(['a', vc, 1])

        bv = ExplicitBitVect(2)
        bv.SetBit(1)
        vc = VectCollection()
        vc.AddVect(1, bv)
        examples.append(['c', vc, 0])

        bv = ExplicitBitVect(2)
        bv.SetBit(1)
        vc = VectCollection()
        vc.AddVect(1, bv)
        examples.append(['c2', vc, 0])

        bv = ExplicitBitVect(2)
        bv.SetBit(0)
        vc = VectCollection()
        vc.AddVect(1, bv)
        examples.append(['d', vc, 0])

        bv = ExplicitBitVect(2)
        bv.SetBit(0)
        vc = VectCollection()
        vc.AddVect(1, bv)
        bv = ExplicitBitVect(2)
        bv.SetBit(1)
        vc.AddVect(2, bv)
        examples.append(['d2', vc, 0])

        bv = ExplicitBitVect(2)
        bv.SetBit(0)
        bv.SetBit(1)
        vc = VectCollection()
        vc.AddVect(1, bv)
        examples.append(['d', vc, 1])

        bv = ExplicitBitVect(2)
        bv.SetBit(0)
        bv.SetBit(1)
        vc = VectCollection()
        vc.AddVect(1, bv)
        examples.append(['e', vc, 1])

        f = StringIO()
        with redirect_stdout(f):
            t = BuildSigTree(examples,
                             2,
                             metric=InfoTheory.InfoType.ENTROPY,
                             maxDepth=2,
                             verbose=True)
        self.assertIn('Build', f.getvalue())

        self.assertEqual(t.GetName(), 'Bit-0')
        self.assertEqual(t.GetLabel(), 0)
        c0 = t.GetChildren()[0]
        self.assertEqual(c0.GetName(), 'Bit-1')
        self.assertEqual(c0.GetLabel(), 1)
        c1 = t.GetChildren()[1]
        self.assertEqual(c1.GetName(), 'Bit-1')
        self.assertEqual(c1.GetLabel(), 1)

        bv = ExplicitBitVect(2)
        bv.SetBit(0)
        vc = VectCollection()
        vc.AddVect(1, bv)
        bv = ExplicitBitVect(2)
        bv.SetBit(1)
        vc.AddVect(2, bv)
        r = t.ClassifyExample(['t', vc, 0])
        self.assertEqual(r, 0)