Esempio n. 1
0
    def test_from_text_raises(self, filepath, format):
        """
        Test if input produces a ValueError for invalid inputs.
        Note: Use file as test function input; file content will be read as string,
        which is the input for the class method to be tested here!
        """

        # Let's load a file's content as string (text) to simulate example input data
        with open(filepath, "r") as f:
            text = f.read()

        with pytest.raises(ValueError):
            DataFrame.from_text(text, format)
Esempio n. 2
0
    def _set_data(self):
        """
        Load atoms as DataFrames from text.
        Keep only atoms with int-castable residue PDB IDs.

        Returns
        -------
        pd.DataFrame
            Structural protein data with the following mandatory columns:
            "residue.id", "atom.name", "atom.x", "atom.y", "atom.z".
        """

        # Load atoms as DataFrame from text
        # Note: Column "residue.id" is of string type
        dataframe = DataFrame.from_text(self._text, self._extension)

        # Cast the IDs to str, so that they can match the DataFrame's ID
        residue_ids = [
            str(residue_id) for residue_id in self._residue_ids if residue_id
        ]
        # Fetch all atoms matching residues IDs and cast them back to integers
        dataframe = dataframe[dataframe["residue.id"].isin(residue_ids)]
        dataframe = dataframe.astype({"residue.id": "int32"})

        return dataframe
Esempio n. 3
0
    def from_file(cls, filepath, residue_ids, name="", residue_labels=None):
        """
        Initialize Pocket object from structure protein file.

        Attributes
        ----------
        filepath : str or pathlib.Path
            File path to structural protein data.
        residue_ids : list of str
            Pocket residue IDs.
        name : str
            Name of protein (default: empty string).
        residue_labels : None or list of str
            Pocket residue labels. Set to None by default.

        Returns
        -------
        opencadd.structure.pocket.Pocket
            Pocket object.
        """

        pocket = cls()

        pocket.name = name
        pocket._filepath = filepath
        pocket._data = DataFrame.from_file(filepath)
        residue_ids, residue_labels = _format_residue_ids_and_labels(
            residue_ids, residue_labels)
        pocket._residue_ids = residue_ids
        pocket._residue_labels = residue_labels

        return pocket
Esempio n. 4
0
    def test_from_file(self, filepath, verbose):
        """
        Test if input produces a DataFrame.
        """

        dataframe = DataFrame.from_file(filepath, verbose)
        isinstance(dataframe, pd.DataFrame)
Esempio n. 5
0
    def to_dataframe(self,
                     structure_klifs_id_or_filepath,
                     entity="complex",
                     extension="mol2"):  # pylint: disable=W0221

        filepath = self._to_filepath(structure_klifs_id_or_filepath, entity,
                                     extension)
        dataframe = DataFrame.from_file(filepath)
        dataframe = self._add_residue_klifs_ids(dataframe, filepath)
        return dataframe
Esempio n. 6
0
    def by_structure_klifs_id(self, structure_klifs_id, extension="mol2"):  # pylint: disable=W0221

        # Get kinase pocket from structure ID
        structures_local = Structures(self._database,
                                      self._path_to_klifs_download)
        structure = structures_local.by_structure_klifs_id(
            structure_klifs_id).squeeze()
        # Get list of KLIFS positions (starting at 1) excluding gap positions
        klifs_ids = [
            index
            for index, residue in enumerate(structure["structure.pocket"], 1)
            if residue != "_"
        ]

        # Load pocket coordinates from file
        pocket_path = (self._path_to_klifs_download /
                       structure["structure.filepath"] / f"pocket.{extension}")
        dataframe = DataFrame.from_file(pocket_path)
        # Get number of atoms per residue
        # Note: sort=False important otherwise negative residue IDs will be sorted to the top
        number_of_atoms_per_residue = dataframe.groupby(
            ["residue.name", "residue.id"], sort=False).size()

        # Get KLIFS position IDs for each atom in molecule
        klifs_ids_per_atom = []
        for klifs_id, n in zip(klifs_ids, number_of_atoms_per_residue):
            klifs_ids_per_atom.extend([klifs_id] * n)
        # Add column for KLIFS position IDs to molecule
        dataframe["residue.klifs_id"] = klifs_ids_per_atom
        dataframe = dataframe[["residue.id",
                               "residue.klifs_id"]].drop_duplicates()

        # Add KLIFS IDs that are missing in pocket and fill with "_"
        full_klifs_ids_df = pd.Series(range(1, 86),
                                      name="residue.klifs_id").to_frame()
        dataframe = full_klifs_ids_df.merge(dataframe,
                                            on="residue.klifs_id",
                                            how="left")
        dataframe.fillna("_", inplace=True)

        # Add column for KLIFS regions
        dataframe = dataframe.merge(POCKET_KLIFS_REGIONS,
                                    on="residue.klifs_id",
                                    how="left")
        dataframe = dataframe.astype({"residue.klifs_id": "Int64"})

        # Standardize DataFrame
        dataframe = self._standardize_dataframe(
            dataframe,
            DATAFRAME_COLUMNS["pockets"],
        )
        # Add KLIFS region and color  TODO not so nice to have this after standardization
        dataframe = self._add_klifs_region_details(dataframe)

        return dataframe
Esempio n. 7
0
    def test_from_pdb_file(self, pdb_file):
        """
        Test loading pdb files as DataFrame.

        Parameters
        ----------
        pdb_file : pathlib.Path or str
            Path to pdb file.
        """

        df = DataFrame._from_pdb_file(pdb_file)
        self._dataframe_format_tests(df)
Esempio n. 8
0
    def test_from_mol2_file(self, mol2_file):
        """
        Test loading mol2 files as DataFrame.

        Parameters
        ----------
        mol2_file : pathlib.Path or str
            Path to mol2 file.
        """

        df = DataFrame._from_mol2_file(mol2_file)
        self._dataframe_format_tests(df)
Esempio n. 9
0
    def test_from_text(self, filepath, format, verbose):
        """
        Test if input produces a DataFrame.
        Note: Use file as test function input; file content will be read as string,
        which is the input for the class method to be tested here!
        """

        # Let's load a file's content as string (text) to simulate example input data
        with open(filepath, "r") as f:
            text = f.read()

        dataframe = DataFrame.from_text(text, format, verbose)
        isinstance(dataframe, pd.DataFrame)
Esempio n. 10
0
    def _map_residue_ids_names_nglixs(self, pocket):
        """
        Map residue IDs and names to nglview indices depending on file format.
        In case of mol2 files, nglview will use indices starting from 1.
        In case of pdb files, nglview will use the residue IDs as indices.

        Parameters
        ----------
        pocket : opencadd.structure.pocket.Pocket
            Pocket object.

        Returns
        -------
        pandas.Series
            Residue IDs (index) and residue nglview indices (values).
        """

        # Get atom data
        # Cast residue IDs to integer - drop atoms where this is not possible!
        dataframe = DataFrame.from_text(pocket._text, pocket._extension)
        drop_ixs = []
        for index, residue_id in dataframe["residue.id"].items():
            try:
                residue_id = int(residue_id)
            except (TypeError, ValueError):
                drop_ixs.append(index)
        dataframe.drop(drop_ixs, inplace=True)
        dataframe = dataframe.astype({"residue.id": "int32"})

        # Get all residue names and IDs (full structure!!)
        residue_id2ix = dataframe[["residue.name",
                                   "residue.id"]].drop_duplicates()

        if pocket._extension == "mol2":

            # Map residue names to nglview index (starting from 1)
            residue_id2ix["residue.ngl_ix"] = [
                str(i) for i in range(1,
                                      len(residue_id2ix) + 1)
            ]

        else:

            # In this case, residue ID and nglview index are the same
            residue_id2ix["residue.ngl_ix"] = [
                str(i) for i in residue_id2ix["residue.id"]
            ]

        self._residue_ids_to_ngl_ixs[pocket.name] = residue_id2ix
Esempio n. 11
0
    def test_from_mol2_text(self, mol2_file):
        """
        Test loading mol2 file contents (text) as DataFrame.

        Parameters
        ----------
        mol2_file : pathlib.Path or str
            Path to mol2 file.
        """

        # Let's load a file's content as string (text) to simulate example input data
        with open(mol2_file, "r") as f:
            mol2_text = f.read()

        df = DataFrame._from_mol2_text(mol2_text)
        self._dataframe_format_tests(df)
Esempio n. 12
0
    def to_dataframe(self, structure_klifs_id, entity="complex", extension="mol2"):

        text = self.to_text(structure_klifs_id, entity, extension)
        dataframe = DataFrame.from_text(text, extension)
        dataframe = self._add_residue_klifs_ids(dataframe, structure_klifs_id)
        return dataframe