def from_file(cls, filepath, residue_ids, name="", residue_labels=None): """ Initialize Pocket object from structure protein file. Attributes ---------- filepath : str or pathlib.Path File path to structural protein data. residue_ids : list of str Pocket residue IDs. name : str Name of protein (default: empty string). residue_labels : None or list of str Pocket residue labels. Set to None by default. Returns ------- opencadd.structure.pocket.Pocket Pocket object. """ pocket = cls() pocket.name = name pocket._filepath = filepath pocket._data = DataFrame.from_file(filepath) residue_ids, residue_labels = _format_residue_ids_and_labels( residue_ids, residue_labels) pocket._residue_ids = residue_ids pocket._residue_labels = residue_labels return pocket
def test_from_file(self, filepath, verbose): """ Test if input produces a DataFrame. """ dataframe = DataFrame.from_file(filepath, verbose) isinstance(dataframe, pd.DataFrame)
def to_dataframe(self, structure_klifs_id_or_filepath, entity="complex", extension="mol2"): # pylint: disable=W0221 filepath = self._to_filepath(structure_klifs_id_or_filepath, entity, extension) dataframe = DataFrame.from_file(filepath) dataframe = self._add_residue_klifs_ids(dataframe, filepath) return dataframe
def by_structure_klifs_id(self, structure_klifs_id, extension="mol2"): # pylint: disable=W0221 # Get kinase pocket from structure ID structures_local = Structures(self._database, self._path_to_klifs_download) structure = structures_local.by_structure_klifs_id( structure_klifs_id).squeeze() # Get list of KLIFS positions (starting at 1) excluding gap positions klifs_ids = [ index for index, residue in enumerate(structure["structure.pocket"], 1) if residue != "_" ] # Load pocket coordinates from file pocket_path = (self._path_to_klifs_download / structure["structure.filepath"] / f"pocket.{extension}") dataframe = DataFrame.from_file(pocket_path) # Get number of atoms per residue # Note: sort=False important otherwise negative residue IDs will be sorted to the top number_of_atoms_per_residue = dataframe.groupby( ["residue.name", "residue.id"], sort=False).size() # Get KLIFS position IDs for each atom in molecule klifs_ids_per_atom = [] for klifs_id, n in zip(klifs_ids, number_of_atoms_per_residue): klifs_ids_per_atom.extend([klifs_id] * n) # Add column for KLIFS position IDs to molecule dataframe["residue.klifs_id"] = klifs_ids_per_atom dataframe = dataframe[["residue.id", "residue.klifs_id"]].drop_duplicates() # Add KLIFS IDs that are missing in pocket and fill with "_" full_klifs_ids_df = pd.Series(range(1, 86), name="residue.klifs_id").to_frame() dataframe = full_klifs_ids_df.merge(dataframe, on="residue.klifs_id", how="left") dataframe.fillna("_", inplace=True) # Add column for KLIFS regions dataframe = dataframe.merge(POCKET_KLIFS_REGIONS, on="residue.klifs_id", how="left") dataframe = dataframe.astype({"residue.klifs_id": "Int64"}) # Standardize DataFrame dataframe = self._standardize_dataframe( dataframe, DATAFRAME_COLUMNS["pockets"], ) # Add KLIFS region and color TODO not so nice to have this after standardization dataframe = self._add_klifs_region_details(dataframe) return dataframe