예제 #1
0
def pka_similarities(smile, mol_set, n):
    mol = Chem.MolFromSmiles(smile)
    mol_fp = rdMolDescriptors.GetHashedAtomPairFingerprintAsBitVect(mol)
    similarity = []
    for molecule in mol_set:
        sim = DataStructs.DiceSimilarity(mol_fp, molecule[2])
        similarity.append([sim, molecule[1]])

    return np.asarray(sorted(similarity)[:n]).flatten()
예제 #2
0
    def _filter_for_similar_ligands_2d(
            ligand: oechem.OEGraphMol,
            structures: pd.DataFrame) -> pd.DataFrame:
        """
        Filter KLIFS structures for similar ligands according to a fingerprint comparison.

        Parameters
        ----------
        ligand: oechem.OEGraphMol
            An OpenEye molecule holding the ligand to dock.
        structures: pd.DataFrame
            A DataFrame containing KLIFS entries.

        Returns
        -------
        : pd.DataFrame
            The input DataFrame filtered for KLIFS entries with most similar ligands.
        """
        import pandas as pd
        from openeye import oechem
        from rdkit import Chem, RDLogger
        from rdkit.Chem import AllChem, DataStructs

        RDLogger.DisableLog("rdApp.*")  # disable RDKit logging

        logging.debug("Converting OpenEye molecule to RDKit molecule ...")
        ligand = Chem.MolFromSmiles(oechem.OEMolToSmiles(ligand))

        logging.debug("Converting SMILES to RDKit molecules ...")
        rdkit_molecules = [
            Chem.MolFromSmiles(smiles) for smiles in structures.smiles
        ]

        logging.debug("Adding RDKit molecules to dataframe...")
        structures["rdkit_molecules"] = rdkit_molecules

        logging.debug(
            "Removing KLIFS entries without valid RDKit molecule ...")
        structures = structures[structures.rdkit_molecules.notnull()]

        logging.debug("Adding Feature Morgan fingerprint to dataframe...")
        pd.options.mode.chained_assignment = None  # otherwise next line would raise a warning
        structures["rdkit_fingerprint"] = [
            AllChem.GetMorganFingerprint(rdkit_molecule, 2, useFeatures=True)
            for rdkit_molecule in structures.rdkit_molecules
        ]

        logging.debug("Generating Feature Morgan fingerprint of ligand ...")
        ligand_fingerprint = AllChem.GetMorganFingerprint(ligand,
                                                          2,
                                                          useFeatures=True)

        logging.debug("Calculating dice similarity between fingerprints ...")
        fingerprint_similarities = [[
            i, DataStructs.DiceSimilarity(ligand_fingerprint, fingerprint)
        ] for i, fingerprint in enumerate(structures.rdkit_fingerprint)]

        # if maximal score is 0.87, threshold is set to 0.77
        fingerprint_similarity_threshold = (
            max([similarity[1]
                 for similarity in fingerprint_similarities]) - 0.1)

        logging.debug("Picking structures with most similar ligands ...")
        structures = structures.iloc[[
            similarity[0] for similarity in fingerprint_similarities
            if similarity[1] >= fingerprint_similarity_threshold
        ]]

        return structures
예제 #3
0
 def __call__(self, a, b):
     return 1 - DataStructs.DiceSimilarity(a, b)