Beispiel #1
0
    def test_from_text_raises(self, filepath, format):
        """
        Test if input produces a ValueError for invalid inputs.
        Note: Use file as test function input; file content will be read as string,
        which is the input for the class method to be tested here!
        """

        # Let's load a file's content as string (text) to simulate example input data
        with open(filepath, "r") as f:
            text = f.read()

        with pytest.raises(ValueError):
            DataFrame.from_text(text, format)
Beispiel #2
0
    def _set_data(self):
        """
        Load atoms as DataFrames from text.
        Keep only atoms with int-castable residue PDB IDs.

        Returns
        -------
        pd.DataFrame
            Structural protein data with the following mandatory columns:
            "residue.id", "atom.name", "atom.x", "atom.y", "atom.z".
        """

        # Load atoms as DataFrame from text
        # Note: Column "residue.id" is of string type
        dataframe = DataFrame.from_text(self._text, self._extension)

        # Cast the IDs to str, so that they can match the DataFrame's ID
        residue_ids = [
            str(residue_id) for residue_id in self._residue_ids if residue_id
        ]
        # Fetch all atoms matching residues IDs and cast them back to integers
        dataframe = dataframe[dataframe["residue.id"].isin(residue_ids)]
        dataframe = dataframe.astype({"residue.id": "int32"})

        return dataframe
Beispiel #3
0
    def test_from_text(self, filepath, format, verbose):
        """
        Test if input produces a DataFrame.
        Note: Use file as test function input; file content will be read as string,
        which is the input for the class method to be tested here!
        """

        # Let's load a file's content as string (text) to simulate example input data
        with open(filepath, "r") as f:
            text = f.read()

        dataframe = DataFrame.from_text(text, format, verbose)
        isinstance(dataframe, pd.DataFrame)
Beispiel #4
0
    def _map_residue_ids_names_nglixs(self, pocket):
        """
        Map residue IDs and names to nglview indices depending on file format.
        In case of mol2 files, nglview will use indices starting from 1.
        In case of pdb files, nglview will use the residue IDs as indices.

        Parameters
        ----------
        pocket : opencadd.structure.pocket.Pocket
            Pocket object.

        Returns
        -------
        pandas.Series
            Residue IDs (index) and residue nglview indices (values).
        """

        # Get atom data
        # Cast residue IDs to integer - drop atoms where this is not possible!
        dataframe = DataFrame.from_text(pocket._text, pocket._extension)
        drop_ixs = []
        for index, residue_id in dataframe["residue.id"].items():
            try:
                residue_id = int(residue_id)
            except (TypeError, ValueError):
                drop_ixs.append(index)
        dataframe.drop(drop_ixs, inplace=True)
        dataframe = dataframe.astype({"residue.id": "int32"})

        # Get all residue names and IDs (full structure!!)
        residue_id2ix = dataframe[["residue.name",
                                   "residue.id"]].drop_duplicates()

        if pocket._extension == "mol2":

            # Map residue names to nglview index (starting from 1)
            residue_id2ix["residue.ngl_ix"] = [
                str(i) for i in range(1,
                                      len(residue_id2ix) + 1)
            ]

        else:

            # In this case, residue ID and nglview index are the same
            residue_id2ix["residue.ngl_ix"] = [
                str(i) for i in residue_id2ix["residue.id"]
            ]

        self._residue_ids_to_ngl_ixs[pocket.name] = residue_id2ix
Beispiel #5
0
    def to_dataframe(self, structure_klifs_id, entity="complex", extension="mol2"):

        text = self.to_text(structure_klifs_id, entity, extension)
        dataframe = DataFrame.from_text(text, extension)
        dataframe = self._add_residue_klifs_ids(dataframe, structure_klifs_id)
        return dataframe