smiles = df_descriptors.ix[id, 'Structure'] descriptors = list(df_descriptors.ix[id])[2:] normalized = [(f - min_bounds[i]) / (max_bounds[i] - min_bounds[i]) for i, f in enumerate(descriptors)] fp = pack_normalized_descriptors_to_fingerprint( normalized, density=0.3, byte_size=fp_size_bytes) mol = indigo.loadMolecule(smiles) molecules += [mol] ext_fp = mol.fingerprintExt(fp, fp_size_bytes) bingo.insertWithExtFP(mol, ext_fp) print("Similarity matrix:") for m1 in molecules: for m2 in molecules: similarity = indigo.similarity(m1, m2) print("%.4f " % similarity, end="") print() print("Bingo similarity search:") query_id = 0 # use the fist molecule as a query for an example smiles = df_descriptors.ix[query_id, 'Structure'] print("Query: %s" % smiles) descriptors = list(df_descriptors.ix[query_id])[2:] normalized = [(f - min_bounds[i]) / (max_bounds[i] - min_bounds[i]) for i, f in enumerate(descriptors)] fp = pack_normalized_descriptors_to_fingerprint(normalized, density=0.3, byte_size=fp_size_bytes) mol = indigo.loadMolecule(smiles)
df_descriptors = [] indigo = Indigo() # indigo.setOption("similarity-type", "sim") # indigo.setOption("similarity-type", "chem") # indigo.setOption("similarity-type", "ECFP2") indigo.setOption("similarity-type", "ECFP4") # indigo.setOption("similarity-type", "ECFP6") # indigo.setOption("similarity-type", "ECFP8") fingerprints = [] for sml in smiles: mol = indigo.loadMolecule(sml) fp = mol.fingerprint("sim") fingerprints += [fp] print("*** Fingerprints ***") for i, fp in enumerate(fingerprints): str = fp.toString() density = fp_density(str) print("#%2d: Density: %f ; FP: %s" % (i, density, str)) print("*** Similarity matrix ***") for f0 in fingerprints: for f1 in fingerprints: similarity = indigo.similarity(f0, f1) print(" %.3f" % similarity, end="") print("\n")