Example #1
0
def convertdata(positives, negatives, bitsize, radius):

    posmolecules = [Chem.MolFromSmiles(x) for x in positives]
    null = [i for i, item in enumerate(posmolecules) if item is None]
    for i in sorted(null, reverse=True):
        del posmolecules[i]

    # converts into morgan bitvectors

    Morgan6pos = [
        AllChem.GetMorganFingerprintAsBitVect(
            y, int(radius), nBits=int(bitsize)).ToBitString()
        for y in posmolecules
    ]
    Avalonpos = [AvT.GetAvalonFP(y).ToBitString() for y in posmolecules]
    Combinedpos = [Morgan6pos[i] + y for i, y in list(enumerate(Avalonpos))]
    ACombinedpos = [numpy.array(list(map(int, x))) for x in Combinedpos]

    negmolecules = [Chem.MolFromSmiles(x) for x in negatives]
    null = [i for i, item in enumerate(negmolecules) if item is None]
    for i in sorted(null, reverse=True):
        del negmolecules[i]

    # converts into morgan bitvectors
    Morgan6neg = [
        AllChem.GetMorganFingerprintAsBitVect(
            y, int(radius), nBits=int(bitsize)).ToBitString()
        for y in negmolecules
    ]
    Avalonneg = [AvT.GetAvalonFP(y).ToBitString() for y in negmolecules]
    Combinedneg = [Morgan6neg[i] + y for i, y in list(enumerate(Avalonneg))]
    ACombinedneg = [numpy.array(list(map(int, x))) for x in Combinedneg]

    return ACombinedpos, ACombinedneg
Example #2
0
def BuildAvalonFP(mol, smiles=None):
    from rdkit.Avalon import pyAvalonTools
    if smiles is None:
        fp = pyAvalonTools.GetAvalonFP(mol)
    else:
        fp = pyAvalonTools.GetAvalonFP(smiles, True)
    return fp
Example #3
0
def rd_kit(dir_sdf = "../data/sdf/"):

    temp_str = "ls " + dir_sdf
    temp = os.popen(temp_str).read()
    temp = str(temp).split()
    bit_length = 1024

    sim_matrix_morgan = []
    sim_matrix_rdk = []
    sim_matrix_aval = []
    sim_matrix_layer = []

    baseline = SDMolSupplier(dir_sdf + temp[0])

    baseline_morgan = AllChem.GetMorganFingerprintAsBitVect(baseline[0], 2, nBits=bit_length)
    baseline_rdk = AllChem.RDKFingerprint(baseline[0], maxPath=2)
    baseline_aval = pyAvalonTools.GetAvalonFP(baseline[0], 128)
    baseline_layer = AllChem.LayeredFingerprint(baseline[0])
    count = 0
    for item in temp:
        suppl = SDMolSupplier(dir_sdf + item)
        count += 1
        fp = AllChem.GetMorganFingerprint(suppl[0], 2)

        fp_bit = AllChem.GetMorganFingerprintAsBitVect(suppl[0], 3, nBits=bit_length)
        fp_rdk = AllChem.RDKFingerprint(suppl[0], maxPath=3)
        fp_aval = pyAvalonTools.GetAvalonFP(suppl[0], 128)
        fp_layer = AllChem.LayeredFingerprint(suppl[0])

        sim_matrix_morgan.append(
            DataStructs.FingerprintSimilarity(baseline_morgan, fp_bit, metric=DataStructs.TanimotoSimilarity))
        sim_matrix_rdk.append(
            DataStructs.FingerprintSimilarity(baseline_rdk, fp_rdk, metric=DataStructs.TanimotoSimilarity))
        sim_matrix_aval.append(
            DataStructs.FingerprintSimilarity(baseline_aval, fp_aval, metric=DataStructs.TanimotoSimilarity))
        sim_matrix_layer.append(
            DataStructs.FingerprintSimilarity(baseline_layer, fp_layer, metric=DataStructs.TanimotoSimilarity))

    sim_matrix_morgan = np.array(sim_matrix_morgan)
    sim_matrix_rdk = np.array(sim_matrix_rdk)
    sim_matrix_aval = np.array(sim_matrix_aval)
    sim_matrix_layer = np.array(sim_matrix_layer)

    label_morgan = "morgan" + str(bit_length)
    plt.hist(sim_matrix_morgan, label = label_morgan)
    plt.hist(sim_matrix_rdk, label = "rdk2")
    #plt.hist(sim_matrix_aval, label = "avalon128")
    #plt.hist(sim_matrix_layer, label = "layer")
    print(np.mean(sim_matrix_rdk))
    print(count)
    plt.xlabel("Similarity to Baseline")
    plt.ylabel("Counts")
    plt.title("Different Fingerprinting Methods, Similarity to Baseline")
    plt.legend()
    plt.show()
Example #4
0
  def test3(self):
    bv = pyAvalonTools.GetAvalonFP(Chem.MolFromSmiles('c1ccccn1'))
    self.assertEqual(len(bv), 512)
    self.assertEqual(bv.GetNumOnBits(), 20)
    bv = pyAvalonTools.GetAvalonFP(Chem.MolFromSmiles('c1ccccc1'))
    self.assertEqual(bv.GetNumOnBits(), 8)
    bv = pyAvalonTools.GetAvalonFP(Chem.MolFromSmiles('c1nnccc1'))
    self.assertEqual(bv.GetNumOnBits(), 30)
    bv = pyAvalonTools.GetAvalonFP(Chem.MolFromSmiles('c1ncncc1'))
    self.assertEqual(bv.GetNumOnBits(), 27)

    bv = pyAvalonTools.GetAvalonFP(Chem.MolFromSmiles('c1ncncc1'), nBits=1024)
    self.assertEqual(len(bv), 1024)
    self.assertTrue(bv.GetNumOnBits() > 27)
def pipe_sim_filter(stream,
                    query,
                    cutoff=80,
                    summary=None,
                    comp_id="pipe_sim_filter"):
    """Filter for compounds that have a similarity greater or equal
    than `cutoff` (in percent) to the `query` Smiles.
    If the field `FP_b64` (e.g. pre-calculated) is present, this will be used,
    otherwise the fingerprint of the Murcko scaffold will be generated on-the-fly (much slower)."""
    rec_counter = 0

    query_mol = Chem.MolFromSmiles(query)
    if not query_mol:
        print("* {} ERROR: could not generate query from SMILES.".format(
            comp_id))
        return None

    murcko_mol = MurckoScaffold.GetScaffoldForMol(query_mol)
    if USE_FP == "morgan":
        query_fp = Desc.rdMolDescriptors.GetMorganFingerprintAsBitVect(
            murcko_mol, 2)
    elif USE_FP == "avalon":
        query_fp = pyAv.GetAvalonFP(murcko_mol, 1024)
    else:
        query_fp = FingerprintMols.FingerprintMol(murcko_mol)

    for rec in stream:
        if "mol" not in rec: continue

        if "FP_b64" in rec:  # use the pre-defined fingerprint if it is present in the stream
            mol_fp = pickle.loads(b64.b64decode(rec["FP_b64"]))
        else:
            murcko_mol = MurckoScaffold.GetScaffoldForMol(rec["mol"])
            if USE_FP == "morgan":
                mol_fp = Desc.rdMolDescriptors.GetMorganFingerprintAsBitVect(
                    murcko_mol, 2)
            elif USE_FP == "avalon":
                mol_fp = pyAv.GetAvalonFP(murcko_mol, 1024)
            else:
                mol_fp = FingerprintMols.FingerprintMol(murcko_mol)

        sim = DataStructs.FingerprintSimilarity(query_fp, mol_fp)
        if sim * 100 >= cutoff:
            rec_counter += 1
            rec["Sim"] = np.round(sim * 100, 2)

            if summary is not None:
                summary[comp_id] = rec_counter

            yield rec
 def __init__(self):
     self.binaryfp_names = [
         "MACCSkeys",
         "Avalon",
         "Morgan2(1024bits)",
         "Morgan2F(1024bits)",
         "Morgan4(2048bits)",
         "Morgan4F(2048bits)",
         # "AtomPair",
         # "Topological",
         # "TopologicalTortion",
     ]
     self.binaryfp = [
         lambda mol: MACCSkeys.GenMACCSKeys(mol),
         lambda mol: pyAvalonTools.GetAvalonFP(mol),
         lambda mol: AllChem.GetMorganFingerprintAsBitVect(mol, 2, nBits=1024),
         lambda mol: AllChem.GetMorganFingerprintAsBitVect(
             mol, 2, nBits=1024, useFeatures=True
         ),
         lambda mol: AllChem.GetMorganFingerprintAsBitVect(mol, 4, nBits=2048),
         lambda mol: AllChem.GetMorganFingerprintAsBitVect(
             mol, 4, nBits=2048, useFeatures=True
         ),
         # lambda mol: Pairs.GetAtomPairFingerprintAsBitVect(mol), # クラッシュする
         # lambda mol: FingerprintMols.FingerprintMol(mol), #Topological Fingerprint # NaNを生成する
         # lambda mol: Torsions.GetTopologicalTorsionFingerprintAsIntVect(mol), # ToBitString を持ってない
     ]
     self.countfp_names = [
         "ECFP2",
         "FCFP2",
         "ECFP4",
         "FCFP4",
         "ECFP6",
         "FCFP6",
     ]
     self.countfp = [
         lambda mol: AllChem.GetMorganFingerprint(
             mol, radius=1, bitInfo=self.bit_info, useFeatures=False
         ),
         lambda mol: AllChem.GetMorganFingerprint(
             mol, radius=1, bitInfo=self.bit_info, useFeatures=True
         ),
         lambda mol: AllChem.GetMorganFingerprint(
             mol, radius=2, bitInfo=self.bit_info, useFeatures=False
         ),
         lambda mol: AllChem.GetMorganFingerprint(
             mol, radius=2, bitInfo=self.bit_info, useFeatures=True
         ),
         lambda mol: AllChem.GetMorganFingerprint(
             mol, radius=3, bitInfo=self.bit_info, useFeatures=False
         ),
         lambda mol: AllChem.GetMorganFingerprint(
             mol, radius=3, bitInfo=self.bit_info, useFeatures=True
         ),
     ]
     self.bit_info = {}
     self.bit_infos = {}
     self.vectors = []
     self.all_bit_info_keys = {}
     self.mols = []
Example #7
0
    def calculate_fp(smi: str, fp_type: str):
        """Calculates fp based on fp_type and smiles"""
        
        mol = Chem.MolFromSmiles(smi)
        if mol:
            #Circular fingerprints
            if fp_type == "ECFP4":
                fp = rdMolDescriptors.GetMorganFingerprintAsBitVect(mol, radius=2, nBits=1024) # ECFP4
            elif fp_type == "ECFP6":
                fp = rdMolDescriptors.GetMorganFingerprintAsBitVect(mol, radius=3, nBits=1024) # ECFP6

            # Structural fingerprints:
            elif fp_type == "Avalon":
                fp = pyAvalonTools.GetAvalonFP(mol, nBits=1024) # Avalon
            elif fp_type == "MACCSkeys":
                fp = rdkit.Chem.rdMolDescriptors.GetMACCSKeysFingerprint(mol) #MACCS Keys
            
            # Path-based fingerprints
            elif fp_type == "hashAP":
                fp = rdkit.Chem.rdMolDescriptors.GetHashedAtomPairFingerprintAsBitVect(mol, nBits=1024)
            elif fp_type == "hashTT":
                fp = rdkit.Chem.rdMolDescriptors.GetHashedTopologicalTorsionFingerprintAsBitVect(mol, nBits=1024)
            elif fp_type == "RDK5":
                fp = rdkit.Chem.rdmolops.RDKFingerprint(mol, maxPath=5, fpSize=1024, nBitsPerHash=2)
            elif fp_type == "RDK6":
                fp = rdkit.Chem.rdmolops.RDKFingerprint(mol, maxPath=6, fpSize=1024, nBitsPerHash=2)
            elif fp_type == "RDK7":
                fp = rdkit.Chem.rdmolops.RDKFingerprint(mol, maxPath=7, fpSize=1024, nBitsPerHash=2)
        
            return np.asarray(fp).reshape(1, -1)

        else:
            return None
Example #8
0
def latent_space(smiles, N_BITS=512):
    m = Chem.MolFromSmiles(smiles)
    if m is None:
        return None
        raise ValueError('SMILES cannot be converted to a RDKit molecules:',
                         smiles)
    return np.array(AllChem.GetMorganFingerprintAsBitVect(m, 2, nBits=N_BITS))
    return np.array(A.GetAvalonFP(m))
Example #9
0
 def _avalon(self, molecules: List, parameters: {}):
     size = parameters.get('size', 512)
     fingerprints = []
     fps = [pyAvalonTools.GetAvalonFP(mol) for mol in molecules]
     for fp in fps:
         fp_np = np.zeros((1, size), dtype=np.int32)
         DataStructs.ConvertToNumpyArray(fp, fp_np)
         fingerprints.append(fp_np)
     return fingerprints
Example #10
0
def rd_kit_aval(dir_sdf = "../data/sdf/"):

	temp_str = "ls " + dir_sdf
	temp = os.popen(temp_str).read()
	temp = str(temp).split()

	bit_length = 256
	sim_matrix_aval = []
	baseline = SDMolSupplier("../data/sdf/" + temp[0])

	baseline_aval = pyAvalonTools.GetAvalonFP(baseline[0], 128)

	for item in temp:
		suppl = SDMolSupplier("../data/sdf/" + item)
		fp_aval = pyAvalonTools.GetAvalonFP(suppl[0], 128)
		sim_matrix_aval.append(fp_aval)
	sim_matrix_aval = np.array(sim_matrix_aval)
	return sim_matrix_aval
Example #11
0
def aval(dir="../data/sdf/DB/", bit_length=128):
    dir = "ls " + dir
    temp = os.popen(dir).read()
    temp = str(temp).split()
    avalon = []

    for item in temp:
        suppl = SDMolSupplier("../data/sdf/DB/" + item)
        fp_aval = pyAvalonTools.GetAvalonFP(suppl[0], bit_length)
        avalon.append(fp_aval)

    avalon = np.array(avalon)
    return avalon
Example #12
0
def Fingerprints(mols, fingerprint):

    # Indigo fingerprints
    if fingerprint in indigofps:
        return [mol.fingerprint(fingerprint) for mol in mols]

    # RDKit fingerprints
    if fingerprint in rdkitfps:
        if fingerprint == "atompair":
            return [Pairs.GetAtomPairFingerprintAsBitVect(mol) for mol in mols]
        elif fingerprint == "avalon":
            return [pyAvalonTools.GetAvalonFP(mol) for mol in mols]
        elif fingerprint == "daylight":
            return [Chem.RDKFingerprint(mol, fpSize=2048) for mol in mols]
        elif fingerprint == "maccs":
            return [MACCSkeys.GenMACCSKeys(mol) for mol in mols]
        elif fingerprint == "morgan":
            return [(AllChem.GetMorganFingerprintAsBitVect(mol, 2, nBits=1024))
                    for mol in mols]
        elif fingerprint == "pharm2d":
            return [
                Generate.Gen2DFingerprint(mol, Gobbi_Pharm2D.factory)
                for mol in mols
            ]
        elif fingerprint == "topological":
            return [FingerprintMols.FingerprintMol(mol) for mol in mols]

    # RDKit non-bit (integer or float) fingerprints
    if fingerprint in rdkitnonbitfps:
        if fingerprint == "sheridan":
            return [Sheridan.GetBPFingerprint(mol) for mol in mols]
        elif fingerprint == "topotorsion":
            return [
                Torsions.GetTopologicalTorsionFingerprint(mol) for mol in mols
            ]

    # E-state fingerprints
    if fingerprint in rdkitestatefps:
        if fingerprint == "estate1":
            return [Fingerprinter.FingerprintMol(mol)[0] for mol in mols]
        elif fingerprint == "estate2":
            return [Fingerprinter.FingerprintMol(mol)[1] for mol in mols]

    # unknown fingerprint
    return None
Example #13
0
def pipe_calc_fp_b64(stream, summary=None, comp_id="pipe_calc_fp"):
    """Calculate the Fingerprint. This is usefule to do in a separate pipeline
    before performing a similarity search, where this FP is used.
    The FP is calculated from the Murcko scaffold of the mol."""
    rec_counter = 0
    for rec in stream:
        if "mol" not in rec: continue
        murcko_mol = MurckoScaffold.GetScaffoldForMol(rec["mol"])
        if USE_FP == "morgan":
            mol_fp = Desc.rdMolDescriptors.GetMorganFingerprintAsBitVect(
                murcko_mol, 2)
        elif USE_FP == "avalon":
            mol_fp = pyAv.GetAvalonFP(murcko_mol, 1024)
        else:
            mol_fp = FingerprintMols.FingerprintMol(murcko_mol)
        rec_counter += 1
        rec["FP_b64"] = b64.b64encode(pickle.dumps(mol_fp)).decode()
        if summary is not None:
            summary[comp_id] = rec_counter
        yield rec
Example #14
0
def aval(dir="../data/sdf/DB/", bit_length=256):
    aval = []
    names = []
    h**o = []
    homo1 = []
    diff = []
    dir_fl_names, list_to_sort = merge_dir_and_data(dir=dir)
    #---------------------------------------------------------------------------
    for tmp, item in enumerate(dir_fl_names):
        try:
            suppl = SDMolSupplier(dir + item)
            fp_aval = pyAvalonTools.GetAvalonFP(suppl[0], bit_length)

            if (item[0:-4] == list_to_sort[tmp].split(":")[0]):
                aval.append(fp_aval)
                names.append(item)
                homo_temp = float(list_to_sort[tmp].split(":")[1])
                homo1_temp = float(list_to_sort[tmp].split(":")[2])
                h**o.append(homo_temp)
                homo1.append(homo1_temp)
                diff.append(homo_temp - homo1_temp)
            else:
                try:
                    if (item[0:-4] == list_to_sort[tmp + 1].split(":")[0]):
                        aval.append(fp_aval)
                        names.append(item)
                        homo_temp = float(list_to_sort[tmp + 1].split(":")[1])
                        homo1_temp = float(list_to_sort[tmp + 1].split(":")[2])
                        h**o.append(homo_temp)
                        homo1.append(homo1_temp)
                        diff.append(homo_temp - homo1_temp)
                except:
                    print(list_to_sort[tmp].split(":")[0], item[0:-4])
                    pass
            sys.stdout.write("\r %s /" % tmp + str(len(dir_fl_names)))
            sys.stdout.flush()
        except:
            pass
    aval = np.array(layer)
    return names, aval, h**o, homo1, diff
Example #15
0
    def test4(self):
        bv = pyAvalonTools.GetAvalonFP('c1ccccn1', True)
        self.failUnlessEqual(bv.GetNumOnBits(), 20)
        bv = pyAvalonTools.GetAvalonFP('c1ccccc1', True)
        self.failUnlessEqual(bv.GetNumOnBits(), 8)
        bv = pyAvalonTools.GetAvalonFP('c1nnccc1', True)
        self.failUnlessEqual(bv.GetNumOnBits(), 30)
        bv = pyAvalonTools.GetAvalonFP('c1ncncc1', True)
        self.failUnlessEqual(bv.GetNumOnBits(), 27)
        bv = pyAvalonTools.GetAvalonFP('c1ncncc1', True, nBits=1024)
        self.failUnlessEqual(len(bv), 1024)
        self.failUnless(bv.GetNumOnBits() > 27)

        bv = pyAvalonTools.GetAvalonFP(
            Chem.MolToMolBlock(Chem.MolFromSmiles('c1ccccn1')), False)
        self.failUnlessEqual(len(bv), 512)
        self.failUnlessEqual(bv.GetNumOnBits(), 20)
        bv = pyAvalonTools.GetAvalonFP(
            Chem.MolToMolBlock(Chem.MolFromSmiles('c1ccccc1')), False)
        self.failUnlessEqual(bv.GetNumOnBits(), 8)
Example #16
0
FPDICT['maccs'] = lambda m: MACCSkeys.GenMACCSKeys(m)
FPDICT['ap'] = lambda m: Pairs.GetAtomPairFingerprint(m)
FPDICT['tt'] = lambda m: Torsions.GetTopologicalTorsionFingerprintAsIntVect(m)
FPDICT['hashap'] = lambda m: Desc.GetHashedAtomPairFingerprintAsBitVect(
    m, nBits=nbits)
FPDICT[
    'hashtt'] = lambda m: Desc.GetHashedTopologicalTorsionFingerprintAsBitVect(
        m, nBits=nbits)
FPDICT['rdk5'] = lambda m: Chem.RDKFingerprint(
    m, maxPath=5, fpSize=nbits, nBitsPerHash=2)
FPDICT['rdk6'] = lambda m: Chem.RDKFingerprint(
    m, maxPath=6, fpSize=nbits, nBitsPerHash=2)
FPDICT['rdk7'] = lambda m: Chem.RDKFingerprint(
    m, maxPath=7, fpSize=nbits, nBitsPerHash=2)
if USE_AVALON:
    FPDICT['avalon'] = lambda m: pyAv.GetAvalonFP(m, nbits)
    FPDICT['avalon_l'] = lambda m: pyAv.GetAvalonFP(m, nbits_long)


def mpl_hist(data,
             bins=10,
             xlabel="values",
             ylabel="Occurrence",
             show=False,
             save=True,
             **kwargs):
    """Useful kwargs: size (tuple<int>), dpi (int), fn (filename, str), title (str)"""
    my_dpi = kwargs.get("dpi", 96)
    size = kwargs.get("size", (300, 350))
    title = kwargs.get("title", None)
    figsize = (size[0] / my_dpi, size[1] / my_dpi)
    def avalon_fp(self):
        """
        Receives the csv file which is used to generate avalon fingerprints (512) and saves as numpy file
        
        Parameter
        ---------
        
        input smiles : str
            Compouds in the form of smiles are used
    
        return : np.array
            Features are saved in the form of numpy files
        """
        df = pd.read_csv(self.csv_path)
        smiles_list = df['Smiles'].tolist()

        fingerprints = []
        not_found = []

        for i in tqdm(range(len(smiles_list))):
            try:

                mol = Chem.MolFromSmiles(smiles_list[i])
                fp = pyAvalonTools.GetAvalonFP(mol, nBits=512)
                bits_array = (np.fromstring(fp.ToBitString(), 'u1') - ord('0'))
                fingerprints.append(bits_array)

            except:

                fingerprints.append(np.nan)
                not_found.append(i)
                pass

        df.drop(not_found, axis=0, inplace=True)

        print('Number of FPs not found: {}'.format(len(not_found)))

        df.reset_index(drop=True, inplace=True)
        labelencoder = LabelEncoder()
        Y = labelencoder.fit_transform(df['Label'].values)
        Y = Y.reshape(Y.shape[0], 1)

        print('Output shape: {}'.format(Y.shape))

        fp_array = (np.asarray((fingerprints), dtype=object))
        X = np.delete(fp_array, not_found, axis=0)
        X = np.vstack(X).astype(np.float32)

        print('Input shape: {}'.format(X.shape))

        final_array = np.concatenate((X, Y), axis=1)

        # Removing rows, from final_array, where duplicate FPs are present
        final_array_slice = final_array[:, 0:(final_array.shape[1] - 1)]
        _, unq_row_indices = np.unique(final_array_slice,
                                       return_index=True,
                                       axis=0)
        final_array_unique = final_array[unq_row_indices]

        print(
            'Number of Duplicate FPs: {}'.format(final_array.shape[0] -
                                                 final_array_unique.shape[0]))

        print('Final Numpy array shape: {}'.format(final_array_unique.shape))
        print('Type of final array: {}'.format(type(final_array_unique)))
        final_numpy_array = np.asarray((final_array_unique), dtype=np.float32)

        return final_numpy_array
fpdict[
    "hashap_cas_length"
] = lambda m: rdMolDescriptors.GetHashedAtomPairFingerprintAsBitVect(
    m, nBits=n_cas_bits
)
fpdict[
    "hashtt"
] = lambda m: rdMolDescriptors.GetHashedTopologicalTorsionFingerprintAsBitVect(
    m, nBits=nbits
)
fpdict[
    "hashtt_cas_length"
] = lambda m: rdMolDescriptors.GetHashedTopologicalTorsionFingerprintAsBitVect(
    m, nBits=n_cas_bits
)
fpdict["avalon"] = lambda m: fpAvalon.GetAvalonFP(m, nbits)
fpdict["avalon_cas_length"] = lambda m: fpAvalon.GetAvalonFP(m, n_cas_bits)
fpdict["laval"] = lambda m: fpAvalon.GetAvalonFP(m, longbits)
fpdict["rdk5"] = lambda m: Chem.RDKFingerprint(
    m, maxPath=5, fpSize=nbits, nBitsPerHash=2
)
fpdict["rdk6"] = lambda m: Chem.RDKFingerprint(
    m, maxPath=6, fpSize=nbits, nBitsPerHash=2
)
fpdict["rdk6_cas_length"] = lambda m: Chem.RDKFingerprint(
    m, maxPath=6, fpSize=n_cas_bits, nBitsPerHash=2
)
fpdict["rdk7"] = lambda m: Chem.RDKFingerprint(
    m, maxPath=7, fpSize=nbits, nBitsPerHash=2
)
fpdict["cas"] = lambda m: create_cas_fp(m)
Example #19
0
# dictionary
fpFunc_dict = {}
fpFunc_dict['ecfp0'] = lambda m: AllChem.GetMorganFingerprintAsBitVect(m, 0, nBits=nbits)
fpFunc_dict['ecfp2'] = lambda m: AllChem.GetMorganFingerprintAsBitVect(m, 1, nBits=nbits)
fpFunc_dict['ecfp4'] = lambda m: AllChem.GetMorganFingerprintAsBitVect(m, 2, nBits=nbits)
fpFunc_dict['ecfp6'] = lambda m: AllChem.GetMorganFingerprintAsBitVect(m, 3, nBits=nbits)
fpFunc_dict['fcfp2'] = lambda m: AllChem.GetMorganFingerprintAsBitVect(m, 1, useFeatures=True, nBits=nbits)
fpFunc_dict['fcfp4'] = lambda m: AllChem.GetMorganFingerprintAsBitVect(m, 2, useFeatures=True, nBits=nbits)
fpFunc_dict['fcfp6'] = lambda m: AllChem.GetMorganFingerprintAsBitVect(m, 3, useFeatures=True, nBits=nbits)
fpFunc_dict['lecfp4'] = lambda m: AllChem.GetMorganFingerprintAsBitVect(m, 2, nBits=longbits)
fpFunc_dict['lecfp6'] = lambda m: AllChem.GetMorganFingerprintAsBitVect(m, 3, nBits=longbits)
fpFunc_dict['lfcfp4'] = lambda m: AllChem.GetMorganFingerprintAsBitVect(m, 2, useFeatures=True, nBits=longbits)
fpFunc_dict['lfcfp6'] = lambda m: AllChem.GetMorganFingerprintAsBitVect(m, 3, useFeatures=True, nBits=longbits)
fpFunc_dict['maccs'] = lambda m: MACCSkeys.GenMACCSKeys(m)
fpFunc_dict['hashap'] = lambda m: rdMolDescriptors.GetHashedAtomPairFingerprintAsBitVect(m, nBits=nbits)
fpFunc_dict['hashtt'] = lambda m: rdMolDescriptors.GetHashedTopologicalTorsionFingerprintAsBitVect(m, nBits=nbits)
fpFunc_dict['avalon'] = lambda m: fpAvalon.GetAvalonFP(m, nbits)
fpFunc_dict['laval'] = lambda m: fpAvalon.GetAvalonFP(m, longbits)
fpFunc_dict['rdk5'] = lambda m: Chem.RDKFingerprint(m, maxPath=5, fpSize=nbits, nBitsPerHash=2)
fpFunc_dict['rdk6'] = lambda m: Chem.RDKFingerprint(m, maxPath=6, fpSize=nbits, nBitsPerHash=2)
fpFunc_dict['rdk7'] = lambda m: Chem.RDKFingerprint(m, maxPath=7, fpSize=nbits, nBitsPerHash=2)
fpFunc_dict['tpatf'] = lambda m: get_tpatf(m)
fpFunc_dict['rdkDes'] = lambda m: calc.CalcDescriptors(m)

long_fps = {'laval', 'lecfp4', 'lecfp6', 'lfcfp4', 'lfcfp6'}
fps_to_generate = ['fcfp4', 'rdkDes', 'tpatf', 'rdk5', 'hashap', 'avalon', 'laval', 'rdk7']

ModFileName_LoadedModel_dict = {}

Example #20
0
 def _fingerprinter(x, y):
     return pyAvalonTools.GetAvalonFP(x,
                                      isQuery=y,
                                      bitFlags=pyAvalonTools.avalonSSSBits)
Example #21
0
    print(
        "* holoviews could not be imported. scatter() and struct_hover() are not available."
    )

IPYTHON = nbt.is_interactive_ipython()

if IPYTHON:
    from IPython.core.display import HTML

DEBUG = False
nbits = 1024
FPDICT = {}

try:
    import rdkit.Avalon.pyAvalonTools as pyAv
    FPDICT["avalon"] = lambda m: pyAv.GetAvalonFP(m)
except ImportError:
    pass

FPDICT["ecfp0"] = lambda m: Chem.GetMorganFingerprintAsBitVect(
    m, 0, nBits=nbits)
FPDICT["ecfp2"] = lambda m: Chem.GetMorganFingerprintAsBitVect(
    m, 1, nBits=nbits)
FPDICT["ecfp4"] = lambda m: Chem.GetMorganFingerprintAsBitVect(
    m, 2, nBits=nbits)
FPDICT["ecfp6"] = lambda m: Chem.GetMorganFingerprintAsBitVect(
    m, 3, nBits=nbits)
FPDICT["ecfc0"] = lambda m: Chem.GetMorganFingerprint(m, 0)
FPDICT["ecfc2"] = lambda m: Chem.GetMorganFingerprint(m, 1)
FPDICT["ecfc4"] = lambda m: Chem.GetMorganFingerprint(m, 2)
FPDICT["ecfc6"] = lambda m: Chem.GetMorganFingerprint(m, 3)
Example #22
0
    import pandas as pd
    sio = StringIO()
    x.save(sio, format='PNG')
    s = b64encode(sio.getvalue())
    pd.set_option('display.max_columns', len(s) + 1000)
    pd.set_option('display.max_rows', len(s) + 1000)
    if len(s) + 100 > pd.get_option("display.max_colwidth"):
        pd.set_option("display.max_colwidth", len(s) + 1000)
    return s


from rdkit import DataStructs

try:
    from rdkit.Avalon import pyAvalonTools as pyAvalonTools
    _fingerprinter = lambda x, y: pyAvalonTools.GetAvalonFP(
        x, isQuery=y, bitFlags=pyAvalonTools.avalonSSSBits)
except ImportError:
    _fingerprinter = lambda x, y: Chem.PatternFingerprint(x, fpSize=2048)


def _molge(x, y):
    """Allows for substructure check using the >= operator (X has substructure Y -> X >= Y) by
  monkey-patching the __ge__ function 
  This has the effect that the pandas/numpy rowfilter can be used for substructure filtering (filtered = dframe[dframe['RDKitColumn'] >= SubstructureMolecule])
  """
    if x is None or y is None: return False
    if hasattr(x, '_substructfp'):
        if not hasattr(y, '_substructfp'):
            y._substructfp = _fingerprinter(y, True)
        if not DataStructs.AllProbeBitsMatch(y._substructfp, x._substructfp):
            return False
Example #23
0
<<<<<<< HEAD:source/test_files/descriptor_test.py
temp = os.popen("ls ../data/sdf/").read()
temp = str(temp).split()

bit_length = 256

sim_matrix_morgan = []
sim_matrix_rdk = []
sim_matrix_aval = []
sim_matrix_layer = []

baseline = SDMolSupplier("../data/sdf/" + temp[0])

baseline_morgan = AllChem.GetMorganFingerprintAsBitVect(baseline[0], 2, nBits=bit_length)
baseline_rdk = AllChem.RDKFingerprint(baseline[0], maxPath=2)
baseline_aval = pyAvalonTools.GetAvalonFP(baseline[0], 128)
baseline_layer = AllChem.LayeredFingerprint(baseline[0])

for item in temp:
    suppl = SDMolSupplier("../data/sdf/" + item)

    fp = AllChem.GetMorganFingerprint(suppl[0], 2)

    fp_bit = AllChem.GetMorganFingerprintAsBitVect(suppl[0], 2, nBits=bit_length)
    fp_rdk = AllChem.RDKFingerprint(suppl[0], maxPath=2)
    fp_aval = pyAvalonTools.GetAvalonFP(suppl[0], 128)
    fp_layer = AllChem.LayeredFingerprint(suppl[0])

    sim_matrix_morgan.append(
        DataStructs.FingerprintSimilarity(baseline_morgan, fp_bit, metric=DataStructs.TanimotoSimilarity))
    sim_matrix_rdk.append(