Esempio n. 1
0
def klifs_kinase_from_uniprot_id(uniprot_id: str) -> pd.DataFrame:
    """
    Retrieve KLIFS kinase details about the kinase matching the given Uniprot ID.

    Parameters
    ----------
    uniprot_id: str
        Uniprot identifier.

    Returns
    -------
    kinase: pd.Series
        KLIFS kinase details.

    Raises
    ------
    ValueError:
        No KLIFS kinase found for UniProt ID.
    ValueError:
        Multiple KLIFS kinases found for UniProt ID.
    """
    from opencadd.databases.klifs import setup_remote

    remote = setup_remote()
    kinase_ids = remote.kinases.all_kinases()["kinase.klifs_id"]
    kinases = remote.kinases.by_kinase_klifs_id(list(kinase_ids))
    kinases = kinases[kinases["kinase.uniprot"] == uniprot_id]
    if len(kinases) == 0:
        raise ValueError("No KLIFS kinase found for UniProt ID.")
    elif len(kinases) > 1:
        raise ValueError("Multiple KLIFS kinases found for UniProt ID.")
    kinase = kinases.iloc[0]

    return kinase
Esempio n. 2
0
    def from_structure_klifs_id(cls,
                                structure_klifs_id,
                                subpockets=None,
                                extension="pdb",
                                klifs_session=None):
        """
        Get a KLIFS pocket (remotely by a structure KLIFS ID) that defines the KLIFS regions and
        subpockets.

        Parameters
        ----------
        structure_klifs_id : int
            Structure KLIFS ID.
        subpockets : dict
            Dictionary with the following keys and values:
            "anchor_residue.klifs_id" : list of int
                List of anchor residues (KLIFS residue IDs) whose centroid defines the subpocket
                center.
            "subpocket.name" : str
                Subpocket name.
            "subpocket.color" : str
                Subpocket color.
        extension : str
            Structure protein data file format. Defaults to PDB format.
        klifs_session : opencadd.databases.klifs.session.Session or None
            Remote or local KLIFS session. If None, a remote session is initialized.

        Returns
        -------
        opencadd.structure.pocket.PocketKlifs
            KLIFS pocket object.
        """

        # Use existing KLIFS session or set up remote session
        if not klifs_session:
            klifs_session = setup_remote()

        # Get pocket and coordinates for a structure (by a structure KLIFS ID)
        if klifs_session._client:
            pocket_residues = klifs_session.pockets.by_structure_klifs_id(
                structure_klifs_id)
        else:
            pocket_residues = klifs_session.pockets.by_structure_klifs_id(
                structure_klifs_id, extension=extension)
        text = klifs_session.coordinates.to_text(structure_klifs_id,
                                                 entity="complex",
                                                 extension=extension)

        pocket = cls.from_text(
            text,
            extension,
            pocket_residues["residue.id"].to_list(),
            pocket_residues["residue.klifs_id"].to_list(),
            structure_klifs_id,
        )
        pocket = pocket.add_klifs_regions(pocket, pocket_residues)
        pocket = pocket.add_klifs_subpockets(pocket, pocket_residues,
                                             subpockets)

        return pocket
Esempio n. 3
0
def _to_kinase_annotation(distance_matrix, outputfile):
    """
    Save kinase annotations to file used for FigTree.

    Parameters
    ----------
    distance_matrix : pandas.DataFrame
        Distance matrix on which clustering is based.
    outputfile : str or pathlib.Path
        Path to kinase annotation file (CSV file) in FigTree format.
    """

    outputfile = Path(outputfile)

    logger.info(f"Writing resulting kinase annotation to {outputfile}")

    # Get kinase names from matrix
    kinase_names = distance_matrix.columns.to_list()

    # Query KLIFS for kinase details
    klifs_session = klifs.setup_remote()
    kinases = klifs_session.kinases.by_kinase_name(kinase_names)
    kinases = kinases[kinases["species.klifs"] == "Human"]
    kinases = kinases[["kinase.klifs_name", "kinase.family", "kinase.group"]]

    # Save to file
    kinases.to_csv(outputfile, sep="\t", index=False)
Esempio n. 4
0
def plot_number_of_kinases_per_kinase_group(structures, remote=None):
    """
    Plot the number of kinases per kinase group.

    Parameters
    ----------
    structures : pandas.DataFrame
        Structures DataFrame from opencadd.databases.klifs module.
    remote : None or opencadd.databases.klifs.session.Session
        Remote KLIFS session. If None, generate new remote session.

    Returns
    -------
    matplotlib.pyplot.axis
        Plot axis.
    """
    kinase_ids = structures["kinase.klifs_id"].to_list()
    # Get kinases by kinase KLIFS IDs
    if remote is None:
        remote = setup_remote()
    kinases = remote.kinases.by_kinase_klifs_id(kinase_ids)
    ax = (kinases.groupby("kinase.group").size().sort_values().plot(
        kind="barh",
        figsize=(4, 3),
        title="Number of kinases per kinase group",
        xlabel="Kinase group",
    ))
    return ax
    def from_structure_klifs_ids(cls, structure_klifs_ids, klifs_session=None, n_cores=1):
        """
        Calculate fingerprints for one or more KLIFS structures (by structure KLIFS IDs).

        Parameters
        ----------
        structure_klifs_id : int
            Input structure KLIFS ID (output fingerprints may contain less IDs because some
            structures could not be encoded).
        klifs_session : opencadd.databases.klifs.session.Session
            Local or remote KLIFS session.
        n_cores : int or None
            Number of cores to be used for fingerprint generation as defined by the user.

        Returns
        -------
        kissim.encoding.fingerprint_generator
            Fingerprint generator object containing fingerprints.
        """

        logger.info("GENERATE FINGERPRINTS")
        logger.info(f"Number of input structures: {len(structure_klifs_ids)}")

        start_time = datetime.datetime.now()
        logger.info(f"Fingerprint generation started at: {start_time}")

        # Set up KLIFS session if needed
        if klifs_session is None:
            klifs_session = setup_remote()

        # Set number of cores to be used
        n_cores = set_n_cores(n_cores)

        # Initialize FingerprintGenerator object
        fingerprint_generator = cls()
        fingerprint_generator.structure_klifs_ids = structure_klifs_ids
        fingerprint_generator.klifs_session = klifs_session
        fingerprints_list = fingerprint_generator._get_fingerprint_list(n_cores)
        fingerprint_generator.data = {
            i.structure_klifs_id: i
            for i in fingerprints_list
            if i is not None  # Removes emtpy fingerprints
        }
        fingerprint_generator.data_normalized = fingerprint_generator._normalize_fingerprints()

        logger.info(f"Number of output fingerprints: {len(fingerprint_generator.data)}")

        end_time = datetime.datetime.now()
        logger.info(f"Runtime: {end_time - start_time}")

        return fingerprint_generator
Esempio n. 6
0
def _get_klifs_residue_colors(remote=None):
    """
    Get KLIFS residue colors from example structure KLIFS ID (12347).

    Parameters
    ----------
    remote : None or opencadd.databases.klifs.session.Session
        Remote KLIFS session. If None, generate new remote session.

    Returns
    -------
    list of str
        KLIFS residue colors (matplotlib color names).
    """

    if remote is None:
        remote = setup_remote()
    klifs_colors = remote.pockets.by_structure_klifs_id(
        12347)["residue.klifs_color"]
    return klifs_colors.to_list()
Esempio n. 7
0
def _setup_klifs_session(local_klifs_download_path=None):
    """
    Set up KLIFS session.

    Parameters
    ----------
    local_klifs_download_path : str or None
        If path to local KLIFS download is given, set up local KLIFS session.
        If None is given, set up remote KLIFS session.

    Returns
    -------
    klifs_session : opencadd.databases.klifs.session.Session
        Local or remote KLIFS session.
    """

    if local_klifs_download_path:
        klifs_session = setup_local(local_klifs_download_path)
    else:
        klifs_session = setup_remote()
    return klifs_session
Esempio n. 8
0
    def from_structure_klifs_id(cls, structure_klifs_id, klifs_session=None):
        """
        Get KLIFS data from structure KLIFS ID.

        Parameters
        ----------
        structure_klifs_id : int
            KLIFS structure ID.
        klifs_session : opencadd.databases.klifs.session.Session
            Local or remote KLIFS session.

        Returns
        -------
        kissim.io.KlifsToKissimData
            KLIFS data.
        """

        data = cls()
        data.structure_klifs_id = structure_klifs_id

        # If no KLIFS session is given, set up remote KLIFS session
        if klifs_session is None:
            klifs_session = setup_remote()
        data.klifs_session = klifs_session

        # Structure KLIFS ID exists
        if not data._structure_klifs_id_exists():
            return None

        # In case of a local KLIFS session, test if complex and pocket structural files exist
        if data.klifs_session._database is not None:
            if not data._local_session_files_exist():
                return None

        data.text, data.extension = data._get_text_and_extension()
        data.residue_ids, data.residue_ixs = data._get_pocket_residue_ids_and_ixs()
        data.kinase_name = data._get_kinase_name()

        return data
Esempio n. 9
0
    def from_structure_klifs_id(cls, structure_klifs_id, subpockets=None):
        """
        Get a KLIFS pocket (remotely by a structure KLIFS ID) that defines the KLIFS regions and
        subpockets.

        Parameters
        ----------
        structure_klifs_id : int
            Structure KLIFS ID.
        subpockets : pandas.DataFrame
            Subpockets (row) with the following details (columns):
            "anchor_residue.klifs_id" : list of int
                List of anchor residues (KLIFS residue IDs) whose centroid defines the subpocket
                center.
            "subpocket.name" : str
                Subpocket name.
            "subpocket.color" : str
                Subpocket color.

        Returns
        -------
        opencadd.structure.pocket.KlifsPocket
            KLIFS pocket object.
        """

        # Set up remote KLIFS session
        remote = setup_remote()

        # Get pocket and coordinates for a structure (by a structure KLIFS ID)
        pocket = remote.pockets.by_structure_klifs_id(structure_klifs_id)
        filepath = remote.coordinates.to_pdb(structure_klifs_id,
                                             ".",
                                             entity="complex")

        pocket_3d = cls.from_file(
            filepath,
            pocket["residue.id"].to_list(),
            "example kinase",
            pocket["residue.klifs_id"].to_list(),
        )

        # Add regions
        for (region, color), group in pocket.groupby(
            ["residue.klifs_region_id", "residue.klifs_color"]):
            pocket_3d.add_region(
                region,
                group["residue.id"].to_list(),
                color,
                group["residue.klifs_region_id"].to_list(),
            )

        # Map residue KLIFS IDs > residue ID
        if subpockets is not None:
            subpockets["anchor_residue.ids"] = subpockets[
                "anchor_residue.klifs_ids"].apply(lambda x: pocket[pocket[
                    "residue.klifs_id"].isin(x)]["residue.id"].to_list())

            # Add subpockets
            for _, subpocket in subpockets.iterrows():
                pocket_3d.add_subpocket(
                    subpocket["subpocket.name"],
                    subpocket["anchor_residue.ids"],
                    subpocket["subpocket.color"],
                    subpocket["anchor_residue.klifs_ids"],
                )

        return pocket_3d
import pytest

import numpy as np
import pandas as pd
from opencadd.databases.klifs import setup_local, setup_remote

from kissim.utils import enter_temp_directory
from kissim.encoding import Fingerprint, FingerprintNormalized, FingerprintGenerator
from kissim.schema import (
    FEATURE_NAMES_PHYSICOCHEMICAL,
    FEATURE_NAMES_PHYSICOCHEMICAL_DICT,
    FEATURE_NAMES_DISTANCES_AND_MOMENTS,
)

PATH_TEST_DATA = Path(__name__).parent / "kissim" / "tests" / "data"
REMOTE = setup_remote()
LOCAL = setup_local(PATH_TEST_DATA / "KLIFS_download")


class TestFingerprintGenerator:
    """
    Test common functionalities in the PocketBioPython and PocketDataFrame classes.
    """
    @pytest.mark.parametrize(
        "structure_klifs_ids, klifs_session, n_cores, fingerprints_values_array_sum",
        [
            ([110, 118], REMOTE, 1, 10152.4256),
            ([110, 118], REMOTE, 2, 10152.4256),
            ([110, 118], LOCAL, 1, 10152.4256),
            ([110, 118], LOCAL, 2, 10152.4256),
            ([110, 118], None, None, 10152.4256),
Esempio n. 11
0
    Draw,
    Descriptors,
    Lipinski,
    PandasTools,
    rdFingerprintGenerator,
    QED,
)
from rdkit.Chem.Draw import IPythonConsole
from rdkit.Chem.MolStandardize import rdMolStandardize
from rdkit.Chem.PropertyMol import PropertyMol
from rdkit.ML.Cluster import Butina
import seaborn as sns

from opencadd.databases.klifs import setup_remote

KLIFS_SESSION = setup_remote()

RDLogger.DisableLog("rdApp.*")

SUBPOCKET_COLORS = {
    "AP": "purple",
    "FP": "forestgreen",
    "SE": "c",
    "GA": "tab:orange",
    "B1": "tab:blue",
    "B2": "darkslateblue",
    "X": "grey",
}


def read_fragment_library(path_to_lib):
Esempio n. 12
0
    def from_structure_klifs_id(
        cls, structure_klifs_id, subpockets=None, extension="pdb", klifs_session=None
    ):
        """
        Get a KLIFS pocket (remotely by a structure KLIFS ID) that defines the KLIFS regions and
        subpockets.

        Parameters
        ----------
        structure_klifs_id : int
            Structure KLIFS ID.
        subpockets : dict
            Dictionary with the following keys and values:
            "anchor_residue.klifs_id" : list of int
                List of anchor residues (KLIFS residue IDs) whose centroid defines the subpocket
                center.
            "subpocket.name" : str
                Subpocket name.
            "subpocket.color" : str
                Subpocket color.
        extension : str
            Structure protein data file format. Defaults to PDB format.
        klifs_session : opencadd.databases.klifs.session.Session or None
            Remote or local KLIFS session. If None, a remote session is initialized.

        Returns
        -------
        opencadd.structure.pocket.KlifsPocket
            KLIFS pocket object.
        """

        # Use existing KLIFS session or set up remote session
        if not klifs_session:
            klifs_session = setup_remote()

        # Get pocket and coordinates for a structure (by a structure KLIFS ID)
        if klifs_session._client:
            pocket = klifs_session.pockets.by_structure_klifs_id(structure_klifs_id)
        else:
            pocket = klifs_session.pockets.by_structure_klifs_id(
                structure_klifs_id, extension=extension
            )
        text = klifs_session.coordinates.to_text(
            structure_klifs_id, entity="complex", extension=extension
        )

        pocket_3d = cls.from_text(
            text,
            extension,
            pocket["residue.id"].to_list(),
            pocket["residue.klifs_id"].to_list(),
            structure_klifs_id,
        )

        # Add regions
        for (region, color), group in pocket.groupby(
            ["residue.klifs_region", "residue.klifs_color"]
        ):
            pocket_3d.add_region(
                name=region,
                residue_ixs=group["residue.klifs_id"].to_list(),
                color=color,
            )

        # Map residue KLIFS IDs > residue ID
        if subpockets is not None:
            subpockets = pd.DataFrame(subpockets)
            subpockets["anchor_residue.ids"] = subpockets["anchor_residue.klifs_ids"].apply(
                lambda x: pocket[pocket["residue.klifs_id"].isin(x)]["residue.id"].to_list()
            )

            # Add subpockets
            for _, subpocket in subpockets.iterrows():
                pocket_3d.add_subpocket(
                    name=subpocket["subpocket.name"],
                    anchor_residue_ixs=subpocket["anchor_residue.klifs_ids"],
                    color=subpocket["subpocket.color"],
                )

        return pocket_3d