def convertdata(positives, negatives, bitsize, radius): posmolecules = [Chem.MolFromSmiles(x) for x in positives] null = [i for i, item in enumerate(posmolecules) if item is None] for i in sorted(null, reverse=True): del posmolecules[i] # converts into morgan bitvectors Morgan6pos = [ AllChem.GetMorganFingerprintAsBitVect( y, int(radius), nBits=int(bitsize)).ToBitString() for y in posmolecules ] Avalonpos = [AvT.GetAvalonFP(y).ToBitString() for y in posmolecules] Combinedpos = [Morgan6pos[i] + y for i, y in list(enumerate(Avalonpos))] ACombinedpos = [numpy.array(list(map(int, x))) for x in Combinedpos] negmolecules = [Chem.MolFromSmiles(x) for x in negatives] null = [i for i, item in enumerate(negmolecules) if item is None] for i in sorted(null, reverse=True): del negmolecules[i] # converts into morgan bitvectors Morgan6neg = [ AllChem.GetMorganFingerprintAsBitVect( y, int(radius), nBits=int(bitsize)).ToBitString() for y in negmolecules ] Avalonneg = [AvT.GetAvalonFP(y).ToBitString() for y in negmolecules] Combinedneg = [Morgan6neg[i] + y for i, y in list(enumerate(Avalonneg))] ACombinedneg = [numpy.array(list(map(int, x))) for x in Combinedneg] return ACombinedpos, ACombinedneg
def BuildAvalonFP(mol, smiles=None): from rdkit.Avalon import pyAvalonTools if smiles is None: fp = pyAvalonTools.GetAvalonFP(mol) else: fp = pyAvalonTools.GetAvalonFP(smiles, True) return fp
def rd_kit(dir_sdf = "../data/sdf/"): temp_str = "ls " + dir_sdf temp = os.popen(temp_str).read() temp = str(temp).split() bit_length = 1024 sim_matrix_morgan = [] sim_matrix_rdk = [] sim_matrix_aval = [] sim_matrix_layer = [] baseline = SDMolSupplier(dir_sdf + temp[0]) baseline_morgan = AllChem.GetMorganFingerprintAsBitVect(baseline[0], 2, nBits=bit_length) baseline_rdk = AllChem.RDKFingerprint(baseline[0], maxPath=2) baseline_aval = pyAvalonTools.GetAvalonFP(baseline[0], 128) baseline_layer = AllChem.LayeredFingerprint(baseline[0]) count = 0 for item in temp: suppl = SDMolSupplier(dir_sdf + item) count += 1 fp = AllChem.GetMorganFingerprint(suppl[0], 2) fp_bit = AllChem.GetMorganFingerprintAsBitVect(suppl[0], 3, nBits=bit_length) fp_rdk = AllChem.RDKFingerprint(suppl[0], maxPath=3) fp_aval = pyAvalonTools.GetAvalonFP(suppl[0], 128) fp_layer = AllChem.LayeredFingerprint(suppl[0]) sim_matrix_morgan.append( DataStructs.FingerprintSimilarity(baseline_morgan, fp_bit, metric=DataStructs.TanimotoSimilarity)) sim_matrix_rdk.append( DataStructs.FingerprintSimilarity(baseline_rdk, fp_rdk, metric=DataStructs.TanimotoSimilarity)) sim_matrix_aval.append( DataStructs.FingerprintSimilarity(baseline_aval, fp_aval, metric=DataStructs.TanimotoSimilarity)) sim_matrix_layer.append( DataStructs.FingerprintSimilarity(baseline_layer, fp_layer, metric=DataStructs.TanimotoSimilarity)) sim_matrix_morgan = np.array(sim_matrix_morgan) sim_matrix_rdk = np.array(sim_matrix_rdk) sim_matrix_aval = np.array(sim_matrix_aval) sim_matrix_layer = np.array(sim_matrix_layer) label_morgan = "morgan" + str(bit_length) plt.hist(sim_matrix_morgan, label = label_morgan) plt.hist(sim_matrix_rdk, label = "rdk2") #plt.hist(sim_matrix_aval, label = "avalon128") #plt.hist(sim_matrix_layer, label = "layer") print(np.mean(sim_matrix_rdk)) print(count) plt.xlabel("Similarity to Baseline") plt.ylabel("Counts") plt.title("Different Fingerprinting Methods, Similarity to Baseline") plt.legend() plt.show()
def test3(self): bv = pyAvalonTools.GetAvalonFP(Chem.MolFromSmiles('c1ccccn1')) self.assertEqual(len(bv), 512) self.assertEqual(bv.GetNumOnBits(), 20) bv = pyAvalonTools.GetAvalonFP(Chem.MolFromSmiles('c1ccccc1')) self.assertEqual(bv.GetNumOnBits(), 8) bv = pyAvalonTools.GetAvalonFP(Chem.MolFromSmiles('c1nnccc1')) self.assertEqual(bv.GetNumOnBits(), 30) bv = pyAvalonTools.GetAvalonFP(Chem.MolFromSmiles('c1ncncc1')) self.assertEqual(bv.GetNumOnBits(), 27) bv = pyAvalonTools.GetAvalonFP(Chem.MolFromSmiles('c1ncncc1'), nBits=1024) self.assertEqual(len(bv), 1024) self.assertTrue(bv.GetNumOnBits() > 27)
def pipe_sim_filter(stream, query, cutoff=80, summary=None, comp_id="pipe_sim_filter"): """Filter for compounds that have a similarity greater or equal than `cutoff` (in percent) to the `query` Smiles. If the field `FP_b64` (e.g. pre-calculated) is present, this will be used, otherwise the fingerprint of the Murcko scaffold will be generated on-the-fly (much slower).""" rec_counter = 0 query_mol = Chem.MolFromSmiles(query) if not query_mol: print("* {} ERROR: could not generate query from SMILES.".format( comp_id)) return None murcko_mol = MurckoScaffold.GetScaffoldForMol(query_mol) if USE_FP == "morgan": query_fp = Desc.rdMolDescriptors.GetMorganFingerprintAsBitVect( murcko_mol, 2) elif USE_FP == "avalon": query_fp = pyAv.GetAvalonFP(murcko_mol, 1024) else: query_fp = FingerprintMols.FingerprintMol(murcko_mol) for rec in stream: if "mol" not in rec: continue if "FP_b64" in rec: # use the pre-defined fingerprint if it is present in the stream mol_fp = pickle.loads(b64.b64decode(rec["FP_b64"])) else: murcko_mol = MurckoScaffold.GetScaffoldForMol(rec["mol"]) if USE_FP == "morgan": mol_fp = Desc.rdMolDescriptors.GetMorganFingerprintAsBitVect( murcko_mol, 2) elif USE_FP == "avalon": mol_fp = pyAv.GetAvalonFP(murcko_mol, 1024) else: mol_fp = FingerprintMols.FingerprintMol(murcko_mol) sim = DataStructs.FingerprintSimilarity(query_fp, mol_fp) if sim * 100 >= cutoff: rec_counter += 1 rec["Sim"] = np.round(sim * 100, 2) if summary is not None: summary[comp_id] = rec_counter yield rec
def __init__(self): self.binaryfp_names = [ "MACCSkeys", "Avalon", "Morgan2(1024bits)", "Morgan2F(1024bits)", "Morgan4(2048bits)", "Morgan4F(2048bits)", # "AtomPair", # "Topological", # "TopologicalTortion", ] self.binaryfp = [ lambda mol: MACCSkeys.GenMACCSKeys(mol), lambda mol: pyAvalonTools.GetAvalonFP(mol), lambda mol: AllChem.GetMorganFingerprintAsBitVect(mol, 2, nBits=1024), lambda mol: AllChem.GetMorganFingerprintAsBitVect( mol, 2, nBits=1024, useFeatures=True ), lambda mol: AllChem.GetMorganFingerprintAsBitVect(mol, 4, nBits=2048), lambda mol: AllChem.GetMorganFingerprintAsBitVect( mol, 4, nBits=2048, useFeatures=True ), # lambda mol: Pairs.GetAtomPairFingerprintAsBitVect(mol), # クラッシュする # lambda mol: FingerprintMols.FingerprintMol(mol), #Topological Fingerprint # NaNを生成する # lambda mol: Torsions.GetTopologicalTorsionFingerprintAsIntVect(mol), # ToBitString を持ってない ] self.countfp_names = [ "ECFP2", "FCFP2", "ECFP4", "FCFP4", "ECFP6", "FCFP6", ] self.countfp = [ lambda mol: AllChem.GetMorganFingerprint( mol, radius=1, bitInfo=self.bit_info, useFeatures=False ), lambda mol: AllChem.GetMorganFingerprint( mol, radius=1, bitInfo=self.bit_info, useFeatures=True ), lambda mol: AllChem.GetMorganFingerprint( mol, radius=2, bitInfo=self.bit_info, useFeatures=False ), lambda mol: AllChem.GetMorganFingerprint( mol, radius=2, bitInfo=self.bit_info, useFeatures=True ), lambda mol: AllChem.GetMorganFingerprint( mol, radius=3, bitInfo=self.bit_info, useFeatures=False ), lambda mol: AllChem.GetMorganFingerprint( mol, radius=3, bitInfo=self.bit_info, useFeatures=True ), ] self.bit_info = {} self.bit_infos = {} self.vectors = [] self.all_bit_info_keys = {} self.mols = []
def calculate_fp(smi: str, fp_type: str): """Calculates fp based on fp_type and smiles""" mol = Chem.MolFromSmiles(smi) if mol: #Circular fingerprints if fp_type == "ECFP4": fp = rdMolDescriptors.GetMorganFingerprintAsBitVect(mol, radius=2, nBits=1024) # ECFP4 elif fp_type == "ECFP6": fp = rdMolDescriptors.GetMorganFingerprintAsBitVect(mol, radius=3, nBits=1024) # ECFP6 # Structural fingerprints: elif fp_type == "Avalon": fp = pyAvalonTools.GetAvalonFP(mol, nBits=1024) # Avalon elif fp_type == "MACCSkeys": fp = rdkit.Chem.rdMolDescriptors.GetMACCSKeysFingerprint(mol) #MACCS Keys # Path-based fingerprints elif fp_type == "hashAP": fp = rdkit.Chem.rdMolDescriptors.GetHashedAtomPairFingerprintAsBitVect(mol, nBits=1024) elif fp_type == "hashTT": fp = rdkit.Chem.rdMolDescriptors.GetHashedTopologicalTorsionFingerprintAsBitVect(mol, nBits=1024) elif fp_type == "RDK5": fp = rdkit.Chem.rdmolops.RDKFingerprint(mol, maxPath=5, fpSize=1024, nBitsPerHash=2) elif fp_type == "RDK6": fp = rdkit.Chem.rdmolops.RDKFingerprint(mol, maxPath=6, fpSize=1024, nBitsPerHash=2) elif fp_type == "RDK7": fp = rdkit.Chem.rdmolops.RDKFingerprint(mol, maxPath=7, fpSize=1024, nBitsPerHash=2) return np.asarray(fp).reshape(1, -1) else: return None
def latent_space(smiles, N_BITS=512): m = Chem.MolFromSmiles(smiles) if m is None: return None raise ValueError('SMILES cannot be converted to a RDKit molecules:', smiles) return np.array(AllChem.GetMorganFingerprintAsBitVect(m, 2, nBits=N_BITS)) return np.array(A.GetAvalonFP(m))
def _avalon(self, molecules: List, parameters: {}): size = parameters.get('size', 512) fingerprints = [] fps = [pyAvalonTools.GetAvalonFP(mol) for mol in molecules] for fp in fps: fp_np = np.zeros((1, size), dtype=np.int32) DataStructs.ConvertToNumpyArray(fp, fp_np) fingerprints.append(fp_np) return fingerprints
def rd_kit_aval(dir_sdf = "../data/sdf/"): temp_str = "ls " + dir_sdf temp = os.popen(temp_str).read() temp = str(temp).split() bit_length = 256 sim_matrix_aval = [] baseline = SDMolSupplier("../data/sdf/" + temp[0]) baseline_aval = pyAvalonTools.GetAvalonFP(baseline[0], 128) for item in temp: suppl = SDMolSupplier("../data/sdf/" + item) fp_aval = pyAvalonTools.GetAvalonFP(suppl[0], 128) sim_matrix_aval.append(fp_aval) sim_matrix_aval = np.array(sim_matrix_aval) return sim_matrix_aval
def aval(dir="../data/sdf/DB/", bit_length=128): dir = "ls " + dir temp = os.popen(dir).read() temp = str(temp).split() avalon = [] for item in temp: suppl = SDMolSupplier("../data/sdf/DB/" + item) fp_aval = pyAvalonTools.GetAvalonFP(suppl[0], bit_length) avalon.append(fp_aval) avalon = np.array(avalon) return avalon
def Fingerprints(mols, fingerprint): # Indigo fingerprints if fingerprint in indigofps: return [mol.fingerprint(fingerprint) for mol in mols] # RDKit fingerprints if fingerprint in rdkitfps: if fingerprint == "atompair": return [Pairs.GetAtomPairFingerprintAsBitVect(mol) for mol in mols] elif fingerprint == "avalon": return [pyAvalonTools.GetAvalonFP(mol) for mol in mols] elif fingerprint == "daylight": return [Chem.RDKFingerprint(mol, fpSize=2048) for mol in mols] elif fingerprint == "maccs": return [MACCSkeys.GenMACCSKeys(mol) for mol in mols] elif fingerprint == "morgan": return [(AllChem.GetMorganFingerprintAsBitVect(mol, 2, nBits=1024)) for mol in mols] elif fingerprint == "pharm2d": return [ Generate.Gen2DFingerprint(mol, Gobbi_Pharm2D.factory) for mol in mols ] elif fingerprint == "topological": return [FingerprintMols.FingerprintMol(mol) for mol in mols] # RDKit non-bit (integer or float) fingerprints if fingerprint in rdkitnonbitfps: if fingerprint == "sheridan": return [Sheridan.GetBPFingerprint(mol) for mol in mols] elif fingerprint == "topotorsion": return [ Torsions.GetTopologicalTorsionFingerprint(mol) for mol in mols ] # E-state fingerprints if fingerprint in rdkitestatefps: if fingerprint == "estate1": return [Fingerprinter.FingerprintMol(mol)[0] for mol in mols] elif fingerprint == "estate2": return [Fingerprinter.FingerprintMol(mol)[1] for mol in mols] # unknown fingerprint return None
def pipe_calc_fp_b64(stream, summary=None, comp_id="pipe_calc_fp"): """Calculate the Fingerprint. This is usefule to do in a separate pipeline before performing a similarity search, where this FP is used. The FP is calculated from the Murcko scaffold of the mol.""" rec_counter = 0 for rec in stream: if "mol" not in rec: continue murcko_mol = MurckoScaffold.GetScaffoldForMol(rec["mol"]) if USE_FP == "morgan": mol_fp = Desc.rdMolDescriptors.GetMorganFingerprintAsBitVect( murcko_mol, 2) elif USE_FP == "avalon": mol_fp = pyAv.GetAvalonFP(murcko_mol, 1024) else: mol_fp = FingerprintMols.FingerprintMol(murcko_mol) rec_counter += 1 rec["FP_b64"] = b64.b64encode(pickle.dumps(mol_fp)).decode() if summary is not None: summary[comp_id] = rec_counter yield rec
def aval(dir="../data/sdf/DB/", bit_length=256): aval = [] names = [] h**o = [] homo1 = [] diff = [] dir_fl_names, list_to_sort = merge_dir_and_data(dir=dir) #--------------------------------------------------------------------------- for tmp, item in enumerate(dir_fl_names): try: suppl = SDMolSupplier(dir + item) fp_aval = pyAvalonTools.GetAvalonFP(suppl[0], bit_length) if (item[0:-4] == list_to_sort[tmp].split(":")[0]): aval.append(fp_aval) names.append(item) homo_temp = float(list_to_sort[tmp].split(":")[1]) homo1_temp = float(list_to_sort[tmp].split(":")[2]) h**o.append(homo_temp) homo1.append(homo1_temp) diff.append(homo_temp - homo1_temp) else: try: if (item[0:-4] == list_to_sort[tmp + 1].split(":")[0]): aval.append(fp_aval) names.append(item) homo_temp = float(list_to_sort[tmp + 1].split(":")[1]) homo1_temp = float(list_to_sort[tmp + 1].split(":")[2]) h**o.append(homo_temp) homo1.append(homo1_temp) diff.append(homo_temp - homo1_temp) except: print(list_to_sort[tmp].split(":")[0], item[0:-4]) pass sys.stdout.write("\r %s /" % tmp + str(len(dir_fl_names))) sys.stdout.flush() except: pass aval = np.array(layer) return names, aval, h**o, homo1, diff
def test4(self): bv = pyAvalonTools.GetAvalonFP('c1ccccn1', True) self.failUnlessEqual(bv.GetNumOnBits(), 20) bv = pyAvalonTools.GetAvalonFP('c1ccccc1', True) self.failUnlessEqual(bv.GetNumOnBits(), 8) bv = pyAvalonTools.GetAvalonFP('c1nnccc1', True) self.failUnlessEqual(bv.GetNumOnBits(), 30) bv = pyAvalonTools.GetAvalonFP('c1ncncc1', True) self.failUnlessEqual(bv.GetNumOnBits(), 27) bv = pyAvalonTools.GetAvalonFP('c1ncncc1', True, nBits=1024) self.failUnlessEqual(len(bv), 1024) self.failUnless(bv.GetNumOnBits() > 27) bv = pyAvalonTools.GetAvalonFP( Chem.MolToMolBlock(Chem.MolFromSmiles('c1ccccn1')), False) self.failUnlessEqual(len(bv), 512) self.failUnlessEqual(bv.GetNumOnBits(), 20) bv = pyAvalonTools.GetAvalonFP( Chem.MolToMolBlock(Chem.MolFromSmiles('c1ccccc1')), False) self.failUnlessEqual(bv.GetNumOnBits(), 8)
FPDICT['maccs'] = lambda m: MACCSkeys.GenMACCSKeys(m) FPDICT['ap'] = lambda m: Pairs.GetAtomPairFingerprint(m) FPDICT['tt'] = lambda m: Torsions.GetTopologicalTorsionFingerprintAsIntVect(m) FPDICT['hashap'] = lambda m: Desc.GetHashedAtomPairFingerprintAsBitVect( m, nBits=nbits) FPDICT[ 'hashtt'] = lambda m: Desc.GetHashedTopologicalTorsionFingerprintAsBitVect( m, nBits=nbits) FPDICT['rdk5'] = lambda m: Chem.RDKFingerprint( m, maxPath=5, fpSize=nbits, nBitsPerHash=2) FPDICT['rdk6'] = lambda m: Chem.RDKFingerprint( m, maxPath=6, fpSize=nbits, nBitsPerHash=2) FPDICT['rdk7'] = lambda m: Chem.RDKFingerprint( m, maxPath=7, fpSize=nbits, nBitsPerHash=2) if USE_AVALON: FPDICT['avalon'] = lambda m: pyAv.GetAvalonFP(m, nbits) FPDICT['avalon_l'] = lambda m: pyAv.GetAvalonFP(m, nbits_long) def mpl_hist(data, bins=10, xlabel="values", ylabel="Occurrence", show=False, save=True, **kwargs): """Useful kwargs: size (tuple<int>), dpi (int), fn (filename, str), title (str)""" my_dpi = kwargs.get("dpi", 96) size = kwargs.get("size", (300, 350)) title = kwargs.get("title", None) figsize = (size[0] / my_dpi, size[1] / my_dpi)
def avalon_fp(self): """ Receives the csv file which is used to generate avalon fingerprints (512) and saves as numpy file Parameter --------- input smiles : str Compouds in the form of smiles are used return : np.array Features are saved in the form of numpy files """ df = pd.read_csv(self.csv_path) smiles_list = df['Smiles'].tolist() fingerprints = [] not_found = [] for i in tqdm(range(len(smiles_list))): try: mol = Chem.MolFromSmiles(smiles_list[i]) fp = pyAvalonTools.GetAvalonFP(mol, nBits=512) bits_array = (np.fromstring(fp.ToBitString(), 'u1') - ord('0')) fingerprints.append(bits_array) except: fingerprints.append(np.nan) not_found.append(i) pass df.drop(not_found, axis=0, inplace=True) print('Number of FPs not found: {}'.format(len(not_found))) df.reset_index(drop=True, inplace=True) labelencoder = LabelEncoder() Y = labelencoder.fit_transform(df['Label'].values) Y = Y.reshape(Y.shape[0], 1) print('Output shape: {}'.format(Y.shape)) fp_array = (np.asarray((fingerprints), dtype=object)) X = np.delete(fp_array, not_found, axis=0) X = np.vstack(X).astype(np.float32) print('Input shape: {}'.format(X.shape)) final_array = np.concatenate((X, Y), axis=1) # Removing rows, from final_array, where duplicate FPs are present final_array_slice = final_array[:, 0:(final_array.shape[1] - 1)] _, unq_row_indices = np.unique(final_array_slice, return_index=True, axis=0) final_array_unique = final_array[unq_row_indices] print( 'Number of Duplicate FPs: {}'.format(final_array.shape[0] - final_array_unique.shape[0])) print('Final Numpy array shape: {}'.format(final_array_unique.shape)) print('Type of final array: {}'.format(type(final_array_unique))) final_numpy_array = np.asarray((final_array_unique), dtype=np.float32) return final_numpy_array
fpdict[ "hashap_cas_length" ] = lambda m: rdMolDescriptors.GetHashedAtomPairFingerprintAsBitVect( m, nBits=n_cas_bits ) fpdict[ "hashtt" ] = lambda m: rdMolDescriptors.GetHashedTopologicalTorsionFingerprintAsBitVect( m, nBits=nbits ) fpdict[ "hashtt_cas_length" ] = lambda m: rdMolDescriptors.GetHashedTopologicalTorsionFingerprintAsBitVect( m, nBits=n_cas_bits ) fpdict["avalon"] = lambda m: fpAvalon.GetAvalonFP(m, nbits) fpdict["avalon_cas_length"] = lambda m: fpAvalon.GetAvalonFP(m, n_cas_bits) fpdict["laval"] = lambda m: fpAvalon.GetAvalonFP(m, longbits) fpdict["rdk5"] = lambda m: Chem.RDKFingerprint( m, maxPath=5, fpSize=nbits, nBitsPerHash=2 ) fpdict["rdk6"] = lambda m: Chem.RDKFingerprint( m, maxPath=6, fpSize=nbits, nBitsPerHash=2 ) fpdict["rdk6_cas_length"] = lambda m: Chem.RDKFingerprint( m, maxPath=6, fpSize=n_cas_bits, nBitsPerHash=2 ) fpdict["rdk7"] = lambda m: Chem.RDKFingerprint( m, maxPath=7, fpSize=nbits, nBitsPerHash=2 ) fpdict["cas"] = lambda m: create_cas_fp(m)
# dictionary fpFunc_dict = {} fpFunc_dict['ecfp0'] = lambda m: AllChem.GetMorganFingerprintAsBitVect(m, 0, nBits=nbits) fpFunc_dict['ecfp2'] = lambda m: AllChem.GetMorganFingerprintAsBitVect(m, 1, nBits=nbits) fpFunc_dict['ecfp4'] = lambda m: AllChem.GetMorganFingerprintAsBitVect(m, 2, nBits=nbits) fpFunc_dict['ecfp6'] = lambda m: AllChem.GetMorganFingerprintAsBitVect(m, 3, nBits=nbits) fpFunc_dict['fcfp2'] = lambda m: AllChem.GetMorganFingerprintAsBitVect(m, 1, useFeatures=True, nBits=nbits) fpFunc_dict['fcfp4'] = lambda m: AllChem.GetMorganFingerprintAsBitVect(m, 2, useFeatures=True, nBits=nbits) fpFunc_dict['fcfp6'] = lambda m: AllChem.GetMorganFingerprintAsBitVect(m, 3, useFeatures=True, nBits=nbits) fpFunc_dict['lecfp4'] = lambda m: AllChem.GetMorganFingerprintAsBitVect(m, 2, nBits=longbits) fpFunc_dict['lecfp6'] = lambda m: AllChem.GetMorganFingerprintAsBitVect(m, 3, nBits=longbits) fpFunc_dict['lfcfp4'] = lambda m: AllChem.GetMorganFingerprintAsBitVect(m, 2, useFeatures=True, nBits=longbits) fpFunc_dict['lfcfp6'] = lambda m: AllChem.GetMorganFingerprintAsBitVect(m, 3, useFeatures=True, nBits=longbits) fpFunc_dict['maccs'] = lambda m: MACCSkeys.GenMACCSKeys(m) fpFunc_dict['hashap'] = lambda m: rdMolDescriptors.GetHashedAtomPairFingerprintAsBitVect(m, nBits=nbits) fpFunc_dict['hashtt'] = lambda m: rdMolDescriptors.GetHashedTopologicalTorsionFingerprintAsBitVect(m, nBits=nbits) fpFunc_dict['avalon'] = lambda m: fpAvalon.GetAvalonFP(m, nbits) fpFunc_dict['laval'] = lambda m: fpAvalon.GetAvalonFP(m, longbits) fpFunc_dict['rdk5'] = lambda m: Chem.RDKFingerprint(m, maxPath=5, fpSize=nbits, nBitsPerHash=2) fpFunc_dict['rdk6'] = lambda m: Chem.RDKFingerprint(m, maxPath=6, fpSize=nbits, nBitsPerHash=2) fpFunc_dict['rdk7'] = lambda m: Chem.RDKFingerprint(m, maxPath=7, fpSize=nbits, nBitsPerHash=2) fpFunc_dict['tpatf'] = lambda m: get_tpatf(m) fpFunc_dict['rdkDes'] = lambda m: calc.CalcDescriptors(m) long_fps = {'laval', 'lecfp4', 'lecfp6', 'lfcfp4', 'lfcfp6'} fps_to_generate = ['fcfp4', 'rdkDes', 'tpatf', 'rdk5', 'hashap', 'avalon', 'laval', 'rdk7'] ModFileName_LoadedModel_dict = {}
def _fingerprinter(x, y): return pyAvalonTools.GetAvalonFP(x, isQuery=y, bitFlags=pyAvalonTools.avalonSSSBits)
print( "* holoviews could not be imported. scatter() and struct_hover() are not available." ) IPYTHON = nbt.is_interactive_ipython() if IPYTHON: from IPython.core.display import HTML DEBUG = False nbits = 1024 FPDICT = {} try: import rdkit.Avalon.pyAvalonTools as pyAv FPDICT["avalon"] = lambda m: pyAv.GetAvalonFP(m) except ImportError: pass FPDICT["ecfp0"] = lambda m: Chem.GetMorganFingerprintAsBitVect( m, 0, nBits=nbits) FPDICT["ecfp2"] = lambda m: Chem.GetMorganFingerprintAsBitVect( m, 1, nBits=nbits) FPDICT["ecfp4"] = lambda m: Chem.GetMorganFingerprintAsBitVect( m, 2, nBits=nbits) FPDICT["ecfp6"] = lambda m: Chem.GetMorganFingerprintAsBitVect( m, 3, nBits=nbits) FPDICT["ecfc0"] = lambda m: Chem.GetMorganFingerprint(m, 0) FPDICT["ecfc2"] = lambda m: Chem.GetMorganFingerprint(m, 1) FPDICT["ecfc4"] = lambda m: Chem.GetMorganFingerprint(m, 2) FPDICT["ecfc6"] = lambda m: Chem.GetMorganFingerprint(m, 3)
import pandas as pd sio = StringIO() x.save(sio, format='PNG') s = b64encode(sio.getvalue()) pd.set_option('display.max_columns', len(s) + 1000) pd.set_option('display.max_rows', len(s) + 1000) if len(s) + 100 > pd.get_option("display.max_colwidth"): pd.set_option("display.max_colwidth", len(s) + 1000) return s from rdkit import DataStructs try: from rdkit.Avalon import pyAvalonTools as pyAvalonTools _fingerprinter = lambda x, y: pyAvalonTools.GetAvalonFP( x, isQuery=y, bitFlags=pyAvalonTools.avalonSSSBits) except ImportError: _fingerprinter = lambda x, y: Chem.PatternFingerprint(x, fpSize=2048) def _molge(x, y): """Allows for substructure check using the >= operator (X has substructure Y -> X >= Y) by monkey-patching the __ge__ function This has the effect that the pandas/numpy rowfilter can be used for substructure filtering (filtered = dframe[dframe['RDKitColumn'] >= SubstructureMolecule]) """ if x is None or y is None: return False if hasattr(x, '_substructfp'): if not hasattr(y, '_substructfp'): y._substructfp = _fingerprinter(y, True) if not DataStructs.AllProbeBitsMatch(y._substructfp, x._substructfp): return False
<<<<<<< HEAD:source/test_files/descriptor_test.py temp = os.popen("ls ../data/sdf/").read() temp = str(temp).split() bit_length = 256 sim_matrix_morgan = [] sim_matrix_rdk = [] sim_matrix_aval = [] sim_matrix_layer = [] baseline = SDMolSupplier("../data/sdf/" + temp[0]) baseline_morgan = AllChem.GetMorganFingerprintAsBitVect(baseline[0], 2, nBits=bit_length) baseline_rdk = AllChem.RDKFingerprint(baseline[0], maxPath=2) baseline_aval = pyAvalonTools.GetAvalonFP(baseline[0], 128) baseline_layer = AllChem.LayeredFingerprint(baseline[0]) for item in temp: suppl = SDMolSupplier("../data/sdf/" + item) fp = AllChem.GetMorganFingerprint(suppl[0], 2) fp_bit = AllChem.GetMorganFingerprintAsBitVect(suppl[0], 2, nBits=bit_length) fp_rdk = AllChem.RDKFingerprint(suppl[0], maxPath=2) fp_aval = pyAvalonTools.GetAvalonFP(suppl[0], 128) fp_layer = AllChem.LayeredFingerprint(suppl[0]) sim_matrix_morgan.append( DataStructs.FingerprintSimilarity(baseline_morgan, fp_bit, metric=DataStructs.TanimotoSimilarity)) sim_matrix_rdk.append(