예제 #1
0
파일: core.py 프로젝트: ldruizsan/opencadd
    def from_file(cls, filepath, residue_ids, name="", residue_labels=None):
        """
        Initialize Pocket object from structure protein file.

        Attributes
        ----------
        filepath : str or pathlib.Path
            File path to structural protein data.
        residue_ids : list of str
            Pocket residue IDs.
        name : str
            Name of protein (default: empty string).
        residue_labels : None or list of str
            Pocket residue labels. Set to None by default.

        Returns
        -------
        opencadd.structure.pocket.Pocket
            Pocket object.
        """

        pocket = cls()

        pocket.name = name
        pocket._filepath = filepath
        pocket._data = DataFrame.from_file(filepath)
        residue_ids, residue_labels = _format_residue_ids_and_labels(
            residue_ids, residue_labels)
        pocket._residue_ids = residue_ids
        pocket._residue_labels = residue_labels

        return pocket
예제 #2
0
    def test_from_file(self, filepath, verbose):
        """
        Test if input produces a DataFrame.
        """

        dataframe = DataFrame.from_file(filepath, verbose)
        isinstance(dataframe, pd.DataFrame)
예제 #3
0
    def to_dataframe(self,
                     structure_klifs_id_or_filepath,
                     entity="complex",
                     extension="mol2"):  # pylint: disable=W0221

        filepath = self._to_filepath(structure_klifs_id_or_filepath, entity,
                                     extension)
        dataframe = DataFrame.from_file(filepath)
        dataframe = self._add_residue_klifs_ids(dataframe, filepath)
        return dataframe
예제 #4
0
    def by_structure_klifs_id(self, structure_klifs_id, extension="mol2"):  # pylint: disable=W0221

        # Get kinase pocket from structure ID
        structures_local = Structures(self._database,
                                      self._path_to_klifs_download)
        structure = structures_local.by_structure_klifs_id(
            structure_klifs_id).squeeze()
        # Get list of KLIFS positions (starting at 1) excluding gap positions
        klifs_ids = [
            index
            for index, residue in enumerate(structure["structure.pocket"], 1)
            if residue != "_"
        ]

        # Load pocket coordinates from file
        pocket_path = (self._path_to_klifs_download /
                       structure["structure.filepath"] / f"pocket.{extension}")
        dataframe = DataFrame.from_file(pocket_path)
        # Get number of atoms per residue
        # Note: sort=False important otherwise negative residue IDs will be sorted to the top
        number_of_atoms_per_residue = dataframe.groupby(
            ["residue.name", "residue.id"], sort=False).size()

        # Get KLIFS position IDs for each atom in molecule
        klifs_ids_per_atom = []
        for klifs_id, n in zip(klifs_ids, number_of_atoms_per_residue):
            klifs_ids_per_atom.extend([klifs_id] * n)
        # Add column for KLIFS position IDs to molecule
        dataframe["residue.klifs_id"] = klifs_ids_per_atom
        dataframe = dataframe[["residue.id",
                               "residue.klifs_id"]].drop_duplicates()

        # Add KLIFS IDs that are missing in pocket and fill with "_"
        full_klifs_ids_df = pd.Series(range(1, 86),
                                      name="residue.klifs_id").to_frame()
        dataframe = full_klifs_ids_df.merge(dataframe,
                                            on="residue.klifs_id",
                                            how="left")
        dataframe.fillna("_", inplace=True)

        # Add column for KLIFS regions
        dataframe = dataframe.merge(POCKET_KLIFS_REGIONS,
                                    on="residue.klifs_id",
                                    how="left")
        dataframe = dataframe.astype({"residue.klifs_id": "Int64"})

        # Standardize DataFrame
        dataframe = self._standardize_dataframe(
            dataframe,
            DATAFRAME_COLUMNS["pockets"],
        )
        # Add KLIFS region and color  TODO not so nice to have this after standardization
        dataframe = self._add_klifs_region_details(dataframe)

        return dataframe