def pka_similarities(smile, mol_set, n): mol = Chem.MolFromSmiles(smile) mol_fp = rdMolDescriptors.GetHashedAtomPairFingerprintAsBitVect(mol) similarity = [] for molecule in mol_set: sim = DataStructs.DiceSimilarity(mol_fp, molecule[2]) similarity.append([sim, molecule[1]]) return np.asarray(sorted(similarity)[:n]).flatten()
def _filter_for_similar_ligands_2d( ligand: oechem.OEGraphMol, structures: pd.DataFrame) -> pd.DataFrame: """ Filter KLIFS structures for similar ligands according to a fingerprint comparison. Parameters ---------- ligand: oechem.OEGraphMol An OpenEye molecule holding the ligand to dock. structures: pd.DataFrame A DataFrame containing KLIFS entries. Returns ------- : pd.DataFrame The input DataFrame filtered for KLIFS entries with most similar ligands. """ import pandas as pd from openeye import oechem from rdkit import Chem, RDLogger from rdkit.Chem import AllChem, DataStructs RDLogger.DisableLog("rdApp.*") # disable RDKit logging logging.debug("Converting OpenEye molecule to RDKit molecule ...") ligand = Chem.MolFromSmiles(oechem.OEMolToSmiles(ligand)) logging.debug("Converting SMILES to RDKit molecules ...") rdkit_molecules = [ Chem.MolFromSmiles(smiles) for smiles in structures.smiles ] logging.debug("Adding RDKit molecules to dataframe...") structures["rdkit_molecules"] = rdkit_molecules logging.debug( "Removing KLIFS entries without valid RDKit molecule ...") structures = structures[structures.rdkit_molecules.notnull()] logging.debug("Adding Feature Morgan fingerprint to dataframe...") pd.options.mode.chained_assignment = None # otherwise next line would raise a warning structures["rdkit_fingerprint"] = [ AllChem.GetMorganFingerprint(rdkit_molecule, 2, useFeatures=True) for rdkit_molecule in structures.rdkit_molecules ] logging.debug("Generating Feature Morgan fingerprint of ligand ...") ligand_fingerprint = AllChem.GetMorganFingerprint(ligand, 2, useFeatures=True) logging.debug("Calculating dice similarity between fingerprints ...") fingerprint_similarities = [[ i, DataStructs.DiceSimilarity(ligand_fingerprint, fingerprint) ] for i, fingerprint in enumerate(structures.rdkit_fingerprint)] # if maximal score is 0.87, threshold is set to 0.77 fingerprint_similarity_threshold = ( max([similarity[1] for similarity in fingerprint_similarities]) - 0.1) logging.debug("Picking structures with most similar ligands ...") structures = structures.iloc[[ similarity[0] for similarity in fingerprint_similarities if similarity[1] >= fingerprint_similarity_threshold ]] return structures
def __call__(self, a, b): return 1 - DataStructs.DiceSimilarity(a, b)