예제 #1
1
def SBDFP_Calc(DF, FP="ECFP4", FORMAT="RDKit"):
# This function requires a dataframe with a column identified as "ECFP4FP" or "MACCSFP" containing the respective fingerprints as RDKit objects
# The function also requires the files ECFP4.counts or MACCS.counts that contain the "1" Bit counts for the respective fingerprints
# The input dataframe can be taken from the LoadDatasetFromCSV function
# FP = "ECFP4" or "MACCS" according to the respective SB-DFP
# FORMAT = "RDKit" or "TEXT" according to the output format, RDKit object or TEXT string

    if FP == "ECFP4":
        FPSTEXT = [DataStructs.BitVectToText(row.ECFP4FP) for index, row in DF.iterrows()]
        DF_COUNTS = [0 for i in range(len(FPSTEXT[0]))]
        for i in FPSTEXT:
            b = [int(j) for j in i]
            DF_COUNTS = [x + y for x, y in zip(DF_COUNTS, b)]
        REF = open("ECFP4.counts")
        line = REF.readline()
        a = line.split(",")
        REF_COUNTS = [int(x) for x in a]
        SBDFP = []
        for i in range(len(REF_COUNTS)):
            stat, pval = proportions_ztest([REF_COUNTS[i], DF_COUNTS[i]], [15403690,DF.shape[0]], alternative='smaller')
            if pval < 0.01:
                SBDFP.append(1)
            else:
                SBDFP.append(0)
        SBDFP = [str(x) for x in SBDFP]
        SBDFP = "".join(SBDFP)
        SBDFP_RDKIT = DataStructs.CreateFromBitString(SBDFP)
    
    elif FP == "MACCS":
        FPSTEXT = [DataStructs.BitVectToText(row.MACCSFP) for index, row in DF.iterrows()]
        DF_COUNTS = [0 for i in range(len(FPSTEXT[0]))]
        for i in FPSTEXT:
            b = [int(j) for j in i]
            DF_COUNTS = [x + y for x, y in zip(DF_COUNTS, b)]
        REF = open("MACCS.counts")
        line = REF.readline()
        a = line.split(",")
        REF_COUNTS = [int(x) for x in a]
        SBDFP = []
        for i in range(len(REF_COUNTS)):
            stat, pval = proportions_ztest([REF_COUNTS[i], DF_COUNTS[i]], [15403690,DF.shape[0]], alternative='smaller')
            if pval < 0.01:
                SBDFP.append(1)
            else:
                SBDFP.append(0)
        SBDFP = [str(x) for x in SBDFP]
        SBDFP = "".join(SBDFP)
        SBDFP_RDKIT = DataStructs.CreateFromBitString(SBDFP)
    if FORMAT == "RDKit":
        return SBDFP_RDKIT
    elif FORMAT == "TEXT":
        return SBDFP
예제 #2
0
def DFP_Calc(DF, FP="ECFP4", FORMAT="RDKit"):
# This function requires a dataframe with a column identified as "ECFP4FP" or "MACCSFP" containing the respective fingerprints as RDKit objects
# The input dataframe can be taken from the LoadDatasetFromCSV function
# FP = "ECFP4" or "MACCS" according to the respective DFP
# FORMAT = "RDKit" or "TEXT" according to the output format, RDKit object or TEXT string
    
    if FP == "ECFP4":
        FPSTEXT = [DataStructs.BitVectToText(row.ECFP4FP) for index, row in DF.iterrows()]
        DF_COUNTS = [0 for i in range(len(FPSTEXT[0]))]
        for i in FPSTEXT:
            b = [int(j) for j in i]
            DF_COUNTS = [x + y for x, y in zip(DF_COUNTS, b)]
        DF_PROPORTIONS = [float(x)/DF.shape[0] for x in DF_COUNTS]
        DFP = []
        for i in range(0, len(DF_PROPORTIONS)):
            if DF_PROPORTIONS[i] > 0.5:
                DFP.append(1)
            else:
                DFP.append(0)
        DFP = [str(i) for i in DFP]
        DFP = "".join(DFP)
        DFP_RDKIT = DataStructs.CreateFromBitString(DFP)
    elif FP == "MACCS":
        FPSTEXT = [DataStructs.BitVectToText(row.MACCSFP) for index, row in DF.iterrows()]
        DF_COUNTS = [0 for i in range(len(FPSTEXT[0]))]
        for i in FPSTEXT:
            b = [int(j) for j in i]
            DF_COUNTS = [x + y for x, y in zip(DF_COUNTS, b)]
        DF_PROPORTIONS = [float(x)/DF.shape[0] for x in DF_COUNTS]
        DFP = []
        for i in range(0, len(DF_PROPORTIONS)):
            if DF_PROPORTIONS[i] > 0.5:
                DFP.append(1)
            else:
                DFP.append(0)
        DFP = [str(i) for i in DFP]
        DFP = "".join(DFP)
        DFP_RDKIT = DataStructs.CreateFromBitString(DFP)
    if FORMAT == "RDKit":
        return DFP_RDKIT
    elif FORMAT == "TEXT":
        return DFP
예제 #3
0
def testPattern(ptfile, bit=False):
    """ Test how to reload PatternFingerprint """
    print('Validating fingerprint...')
    data = np.load(ptfile, allow_pickle=True)
    fps = data['x']
    fpNames = data['y']
    if bit:
        fp = [DataStructs.CreateFromBitString(z) for z in fps]
    else:
        fp = fps
    sim = DataStructs.BulkTanimotoSimilarity(fp[0], list(fp))
    return fp
예제 #4
0
def loadFingerprint(datadir, fpid):
    fpi = fingerprint()[fpid]
    fpfile = os.path.join(datadir, fpi[0] + '.npz')
    data = np.load(fpfile, allow_pickle=True)
    fp = data['x']
    fpn = data['y']
    fpparam = fpi[1]
    # Some fingerprints are stored as bit strings
    if fpi[2] == True:
        fp = [DataStructs.CreateFromBitString(z) for z in fp]
    fpfun = fpi[3]
    data.close()
    return fp, fpn, fpparam, fpfun
예제 #5
0
  def testBitVectorMaxMin2(self):
    fps = [
      "11110010101000000000", "00000000000010010000", "11001010000000000001",
      "00100110101000001000", "01010110000100011001", "11000110101001000011",
      "00000000001100001111", "00011110110000001101", "00000011011110100010",
      "11000010110001000000", "00000100010000010000", "10000001000010110010",
      "00010010000000010100", "00011100100110101000", "10001001100110100000",
      "10000110100110010000", "00101110000101000000", "11011101100011100000",
      "10000110000100101000", "00101000100000010001", "01000001000010000000",
      "00101101010100000110", "10001000100110110001", "00011000010100000001",
      "00101000001000100011", "00010000100010011001", "01100001000100010001",
      "10000101000001101101", "00001000011001011000", "11110000100100100000",
      "10100110000000011010", "00110100010110010010", "00000000000001010010",
      "00100000000010100001", "11110011000010001000", "10110001010100001000",
      "00001100100110011011", "00010010100100001110", "10100101100010100010",
      "01100100010100000001", "10101110011100000000", "01011000000001000001",
      "00000011100110100010", "01100001010001001001", "00001000000001001100",
      "10011001110000000100", "10110000001001100100", "00011000000001001011",
      "11001011010001100010", "10010000000001001011", "00010000100111100000",
      "00001000001110001000", "11010000010001100110", "01101001100000111000",
      "01001000001110111000", "10000000000100010010", "11001000010010000000",
      "01010010000100110001", "00010001010100100001", "01110010000000010000",
      "10001010000011000001", "00000110000000100100", "00010000010001000000",
      "11101100011010000011", "00000010100001010001", "00010000110010000101",
      "00010001001000111001", "01000010001100100110", "00110110000000100001",
      "00100010010110110010", "01000000110011001111", "00011000001000110010",
      "01111010101000110100", "00001010000010110110", "00110011000011011010",
      "00111010111010000110", "00010011101010000011", "00000001011000010000",
      "00011011101110110000", "00010001101000000001", "00010000001010011010",
      "00000010100100100010", "00000010001011000100", "11010000000001011100",
      "00001000110101000001", "00000010000000110010", "10000000010011000001",
      "11110110100100010000", "10001111000110001001", "00100110000110000100",
      "00000100100000100100", "00110000101100010100", "00001010100000100000",
      "01011000000011000111", "00010000100001010001", "10000010100000010000",
      "00001000000000110010", "00001000101011010001", "00011110000100100000", "11001001010001010100"
    ]
    N = 5
    fps = [DataStructs.CreateFromBitString(x) for x in fps]
    picker = rdSimDivPickers.MaxMinPicker()
    mm1 = picker.LazyBitVectorPick(fps, len(fps), N, seed=42)
    self.assertEqual(len(mm1), N)
    self.assertEqual(list(mm1), [37, 1, 43, 38, 16])

    mm2 = picker.LazyBitVectorPick(fps, len(fps), N, useCache=False, seed=42)
    self.assertEqual(len(mm2), N)
    self.assertEqual(list(mm1), list(mm2))
예제 #6
0
파일: testBV.py 프로젝트: dahuilangda/rdkit
 def test5FromBitString(self):
     s1 = '1010'
     bv = DataStructs.CreateFromBitString(s1)
     self.failUnless(len(bv) == 4)
     self.failUnless(list(bv.GetOnBits()) == [0, 2])
예제 #7
0
def fpList_to_bit(fp_list):
    return DataStructs.CreateFromBitString("".join(fp_list))
예제 #8
0
 def convert_to_fp(bitstring):
     return DataStructs.CreateFromBitString(bitstring)