예제 #1
0
파일: testBV.py 프로젝트: lmmentel/rdkit
  def test7FPS(self):
    bv = DataStructs.ExplicitBitVect(32)
    bv.SetBit(0)
    bv.SetBit(1)
    bv.SetBit(17)
    bv.SetBit(23)
    bv.SetBit(31)

    self.assertEqual(DataStructs.BitVectToFPSText(bv), "03008280")
    bv2 = DataStructs.CreateFromFPSText("03008280")
    self.assertEqual(bv, bv2)

    self.assertRaises(ValueError, lambda: DataStructs.CreateFromFPSText("030082801"))

    bv2 = DataStructs.CreateFromFPSText("")
    self.assertEqual(bv2.GetNumBits(), 0)
예제 #2
0
    def testBitVectorMaxMin4(self):
        # threshold tests
        fname = os.path.join(RDConfig.RDBaseDir, 'Code', 'SimDivPickers',
                             'Wrap', 'test_data', 'chembl_cyps.head.fps')
        fps = []
        with open(fname) as infil:
            for line in infil:
                fp = DataStructs.CreateFromFPSText(line.strip())
                fps.append(fp)
        mmp = rdSimDivPickers.MaxMinPicker()
        ids, threshold = mmp.LazyBitVectorPickWithThreshold(fps,
                                                            len(fps),
                                                            20,
                                                            -1.0,
                                                            seed=42)
        self.assertEqual(list(ids), [
            374, 720, 690, 339, 875, 842, 404, 725, 120, 385, 115, 868, 630,
            881, 516, 497, 412, 718, 869, 407
        ])

        self.assertAlmostEqual(threshold, 0.8977, 4)

        ids, threshold = mmp.LazyBitVectorPickWithThreshold(fps,
                                                            len(fps),
                                                            20,
                                                            0.91,
                                                            seed=42)
        self.assertEqual(
            list(ids),
            [374, 720, 690, 339, 875, 842, 404, 725, 120, 385, 115, 868, 630])
        self.assertTrue(threshold >= 0.91)
예제 #3
0
    def testBitVectorMaxMin3(self):
        fname = os.path.join(RDConfig.RDBaseDir, 'Code', 'SimDivPickers',
                             'Wrap', 'test_data', 'chembl_cyps.head.fps')
        fps = []
        with open(fname) as infil:
            for line in infil:
                fp = DataStructs.CreateFromFPSText(line.strip())
                fps.append(fp)
        mmp = rdSimDivPickers.MaxMinPicker()
        ids = list(mmp.LazyBitVectorPick(fps, len(fps), 20, seed=42))
        self.assertEqual(ids, [
            374, 720, 690, 339, 875, 842, 404, 725, 120, 385, 115, 868, 630,
            881, 516, 497, 412, 718, 869, 407
        ])

        ids = list(
            mmp.LazyBitVectorPick(fps,
                                  len(fps),
                                  20,
                                  firstPicks=[374, 720, 690, 339, 875],
                                  seed=42))
        self.assertEqual(ids, [
            374, 720, 690, 339, 875, 842, 404, 725, 120, 385, 115, 868, 630,
            881, 516, 497, 412, 718, 869, 407
        ])
예제 #4
0
    def test9MultiFPBReaderEdges(self):
        basen = os.path.join(RDConfig.RDBaseDir, 'Code', 'DataStructs',
                             'testData')
        mfpbr = DataStructs.MultiFPBReader()
        mfpbr.Init()

        fps = "0000000000404000100000001000040000300040222000002004000240000020000000"+\
 "8200010200000090000024040860070044003214820000220401054008018000226000"+\
 "4800800140000042000080008008020482400000200410800000300430200800400000"+\
 "0000080a0000800400010c800200648818100010880040"
        ebv = DataStructs.CreateFromFPSText(fps)
        bytes = DataStructs.BitVectToBinaryText(ebv)
        nbrs = mfpbr.GetTanimotoNeighbors(bytes, threshold=0.6)
        self.assertEqual(len(nbrs), 0)
예제 #5
0
    def test8MultiFPBReaderContainsInitOnSearch(self):
        basen = os.path.join(RDConfig.RDBaseDir, 'Code', 'DataStructs',
                             'testData')
        mfpbr = DataStructs.MultiFPBReader(initOnSearch=True)
        self.assertEqual(
            mfpbr.AddReader(
                DataStructs.FPBReader(
                    os.path.join(basen, "zinc_random200.1.patt.fpb"))), 1)
        self.assertEqual(
            mfpbr.AddReader(
                DataStructs.FPBReader(
                    os.path.join(basen, "zinc_random200.2.patt.fpb"))), 2)
        self.assertEqual(
            mfpbr.AddReader(
                DataStructs.FPBReader(
                    os.path.join(basen, "zinc_random200.3.patt.fpb"))), 3)
        self.assertEqual(
            mfpbr.AddReader(
                DataStructs.FPBReader(
                    os.path.join(basen, "zinc_random200.4.patt.fpb"))), 4)

        fps = "40081010824820021000500010110410003000402b20285000a4040240010030050000"+\
                "080001420040009000003d04086007080c03b31d920004220400074008098010206080"+\
                "00488001080000c64002a00080000200024c2000602410049200340820200002400010"+\
                "02200106090401056801080182006088101000088a0048"
        ebv = DataStructs.CreateFromFPSText(fps)
        bytes = DataStructs.BitVectToBinaryText(ebv)
        nbrs = mfpbr.GetContainingNeighbors(bytes, numThreads=4)
        self.assertEqual(len(nbrs), 9)
        self.assertEqual(nbrs[0][0], 160)
        self.assertEqual(nbrs[0][1], 0)
        self.assertEqual(nbrs[1][0], 163)
        self.assertEqual(nbrs[1][1], 0)
        self.assertEqual(nbrs[2][0], 170)
        self.assertEqual(nbrs[2][1], 0)
        self.assertEqual(nbrs[3][0], 180)
        self.assertEqual(nbrs[3][1], 2)
        self.assertEqual(nbrs[4][0], 182)
        self.assertEqual(nbrs[4][1], 3)
        self.assertEqual(nbrs[5][0], 185)
        self.assertEqual(nbrs[5][1], 0)
        self.assertEqual(nbrs[6][0], 189)
        self.assertEqual(nbrs[6][1], 0)
        self.assertEqual(nbrs[7][0], 192)
        self.assertEqual(nbrs[7][1], 3)
        self.assertEqual(nbrs[8][0], 193)
        self.assertEqual(nbrs[8][1], 0)
예제 #6
0
    def testBitVectorLeader1(self):
        # threshold tests
        fname = os.path.join(RDConfig.RDBaseDir, 'Code', 'SimDivPickers',
                             'Wrap', 'test_data', 'chembl_cyps.head.fps')
        fps = []
        with open(fname) as infil:
            for line in infil:
                fp = DataStructs.CreateFromFPSText(line.strip())
                fps.append(fp)
        mmp = rdSimDivPickers.LeaderPicker()
        thresh = 0.8
        ids = mmp.LazyBitVectorPick(fps, len(fps), thresh)
        self.assertEqual(len(ids), 146)
        for i in range(len(ids)):
            for j in range(i):
                self.assertGreaterEqual(
                    1 -
                    DataStructs.TanimotoSimilarity(fps[ids[i]], fps[ids[j]]),
                    thresh)
        thresh = 0.9
        ids = mmp.LazyBitVectorPick(fps, len(fps), thresh)
        self.assertEqual(len(ids), 14)
        for i in range(len(ids)):
            for j in range(i):
                self.assertGreaterEqual(
                    1 -
                    DataStructs.TanimotoSimilarity(fps[ids[i]], fps[ids[j]]),
                    thresh)

        ids = mmp.LazyBitVectorPick(fps, len(fps), thresh, pickSize=10)
        self.assertEqual(len(ids), 10)
        for i in range(len(ids)):
            for j in range(i):
                self.assertGreaterEqual(
                    1 -
                    DataStructs.TanimotoSimilarity(fps[ids[i]], fps[ids[j]]),
                    thresh)
예제 #7
0
    def test6MultiFPBReaderTani(self):
        basen = os.path.join(RDConfig.RDBaseDir, 'Code', 'DataStructs',
                             'testData')
        mfpbr = DataStructs.MultiFPBReader()
        self.assertEqual(
            mfpbr.AddReader(
                DataStructs.FPBReader(
                    os.path.join(basen, "zinc_random200.1.patt.fpb"))), 1)
        self.assertEqual(
            mfpbr.AddReader(
                DataStructs.FPBReader(
                    os.path.join(basen, "zinc_random200.2.patt.fpb"))), 2)
        self.assertEqual(
            mfpbr.AddReader(
                DataStructs.FPBReader(
                    os.path.join(basen, "zinc_random200.3.patt.fpb"))), 3)
        self.assertEqual(
            mfpbr.AddReader(
                DataStructs.FPBReader(
                    os.path.join(basen, "zinc_random200.4.patt.fpb"))), 4)
        mfpbr.Init()
        self.assertEqual(mfpbr.GetNumBits(), 1024)
        self.assertEqual(len(mfpbr), 4)

        fps = "0000000000404000100000001000040000300040222000002004000240000020000000"+\
 "8200010200000090000024040860070044003214820000220401054008018000226000"+\
 "4800800140000042000080008008020482400000200410800000300430200800400000"+\
 "0000080a0000800400010c800200648818100010880040"
        ebv = DataStructs.CreateFromFPSText(fps)
        bytes = DataStructs.BitVectToBinaryText(ebv)
        nbrs = mfpbr.GetTanimotoNeighbors(bytes, threshold=0.6)
        self.assertEqual(len(nbrs), 6)
        self.assertAlmostEqual(nbrs[0][0], 0.66412, 4)
        self.assertEqual(nbrs[0][1], 0)
        self.assertEqual(nbrs[0][2], 3)
        self.assertAlmostEqual(nbrs[1][0], 0.65289, 4)
        self.assertEqual(nbrs[1][1], 1)
        self.assertEqual(nbrs[1][2], 2)
        self.assertAlmostEqual(nbrs[2][0], 0.64341, 4)
        self.assertEqual(nbrs[2][1], 2)
        self.assertEqual(nbrs[2][2], 1)
        self.assertAlmostEqual(nbrs[3][0], 0.61940, 4)
        self.assertEqual(nbrs[3][1], 1)
        self.assertEqual(nbrs[3][2], 0)
        self.assertAlmostEqual(nbrs[4][0], 0.61905, 4)
        self.assertEqual(nbrs[4][1], 0)
        self.assertEqual(nbrs[4][2], 0)
        self.assertAlmostEqual(nbrs[5][0], 0.61344, 4)
        self.assertEqual(nbrs[5][1], 0)
        self.assertEqual(nbrs[5][2], 1)

        # test multi-threaded (won't do anything if the RDKit isn't compiled with threads support)
        nbrs = mfpbr.GetTanimotoNeighbors(bytes, threshold=0.6, numThreads=4)
        self.assertEqual(len(nbrs), 6)
        self.assertAlmostEqual(nbrs[0][0], 0.66412, 4)
        self.assertEqual(nbrs[0][1], 0)
        self.assertEqual(nbrs[0][2], 3)
        self.assertAlmostEqual(nbrs[1][0], 0.65289, 4)
        self.assertEqual(nbrs[1][1], 1)
        self.assertEqual(nbrs[1][2], 2)
        self.assertAlmostEqual(nbrs[2][0], 0.64341, 4)
        self.assertEqual(nbrs[2][1], 2)
        self.assertEqual(nbrs[2][2], 1)
        self.assertAlmostEqual(nbrs[3][0], 0.61940, 4)
        self.assertEqual(nbrs[3][1], 1)
        self.assertEqual(nbrs[3][2], 0)
        self.assertAlmostEqual(nbrs[4][0], 0.61905, 4)
        self.assertEqual(nbrs[4][1], 0)
        self.assertEqual(nbrs[4][2], 0)
        self.assertAlmostEqual(nbrs[5][0], 0.61344, 4)
        self.assertEqual(nbrs[5][1], 0)
        self.assertEqual(nbrs[5][2], 1)
예제 #8
0
def transform(infile, outfile, outformat, delimiter, numericID, numlines,
              verbose, debug):

    ins = open(infile)
    outs = open(outfile, 'w')

    # inform
    if debug or verbose:
        print(
            "About to read rdkit fingerprint file {} and transform to bivector file ... {}"
            .format(infile, outfile))

    # run
    nid = numericID
    ctr = 0
    # vlen = 0
    for line in ins:
        line.replace(r'\r', '')
        # if comment line ignore
        if line[0] == '#':
            if debug:
                print(line)
            continue

        # split line
        chunky_line = string.split(line, delimiter)

        # get rid of the newline character if it exists
        if chunky_line[-1][-1] == '\n' or chunky_line[-1][-1] == '^M':
            chunky_line[-1] = chunky_line[-1][:-1]

        # get the fp needed
        fp = DataStructs.CreateFromFPSText(chunky_line[0])
        bs = fp.ToBitString()

        if nid != -99999999:
            pcn = nid
            nid += 1
        else:
            try:
                pcn = chunky_line[1]
            except:
                #raise IOError("Line " + str(ctr) + " missing id element...")
                print("Line " + str(ctr) + " missing id element...")
                continue

        # write
        if outformat:
            outs.write("$SMI<C>\n")
            outs.write("PCN<" + str(pcn) + ">\n")
            outs.write("FPRDK<" + bs + ">\n")
            outs.write("|\n")
        else:
            outs.write(bs + " " + str(pcn) + "\n")

        if debug:
            print(len(bs))

        if verbose and ctr % numlines == 0:
            print("Processed {} fingerprints...".format(ctr))

        ctr += 1

    # inform
    if debug or verbose:
        print("\nRead/transformed/wrote {} fingerprints.".format(ctr))

    ins.close()

    return