class PandasBackend(DataBackend): _data: DataFrame _index: PandasIndex _loc: _LocIndexer _iloc: _ILocIndexer def __init__( self, data: Optional[Union(Series, DataFrame, dict[str, list])] = None, index: Optional[PandasIndex] = None, ) -> None: if data is None: self._data = DataFrame(dtype="object") elif type(data) is Series: self._data = cast(Series, data).to_frame().transpose() elif type(data) is DataFrame: self._data = DataFrame(data) elif type(data) is dict: sample_value = next(iter(data.values())) if not isinstance(sample_value, Iterable) or isinstance( sample_value, str): self._data = Series(data).to_frame().transpose() else: self._data = DataFrame(data) else: raise ValueError( f"Received unexpected value type {type(data)}: {data}") if index is None: self._data.index.name = "index" self._index = PandasIndex(self._data.index, []) else: if not isinstance(index, PandasIndex): index = PandasIndex(index) self._data.index = index._data self._index = index self._loc = _LocIndexer(self) self._iloc = _ILocIndexer(self) def is_link(self) -> bool: return False def link_token(self) -> Optional[DataToken]: return None def to_pandas(self) -> DataFrame: return self._data @property def columns(self) -> list[str]: return self._data.columns.tolist() @property def values(self) -> np.ndarray: data_values = self._data.values shape = data_values.shape if shape[1] == 1: return np.squeeze(data_values, axis=1) elif shape[0] == 1: return np.squeeze(data_values, axis=0) else: return data_values @property def dtypes(self) -> dict[str, DataType]: return { col: DataType(dtype) for col, dtype in self._data.dtypes.items() } def cast_columns(self, column_dtypes: dict[str, type]) -> PandasBackend: return PandasBackend(self._data.astype(column_dtypes, errors="ignore")) def to_dict(self) -> dict[str, any]: return self._data.to_dict("list") @property def index(self) -> Index: return self._index @property def index_name(self) -> Union[str, list[str]]: return self._data.index.name @property def loc(self: PandasBackend) -> LocIndexer[PandasBackend]: return self._loc @property def iloc(self: PandasBackend) -> ILocIndexer[PandasBackend]: return self._iloc def equals(self, other: PandasBackend) -> bool: if type(other) is not PandasBackend: return False return np.array_equal(self._data.values, other._data.values) and self._index.equals( other._index) def __eq__(self, other) -> DataFrame: if issubclass(type(other), PandasBackend): other = other._data return self._data == other def __ne__(self, other: Any) -> DataFrame: if issubclass(type(other), PandasBackend): other = other._data return self._data != other def __gt__(self, other: Any) -> DataFrame: if issubclass(type(other), PandasBackend): other = other._data return self._data > other def __ge__(self, other: Any) -> DataFrame: if issubclass(type(other), PandasBackend): other = other._data return self._data >= other def __lt__(self, other: Any) -> DataFrame: if issubclass(type(other), PandasBackend): other = other._data return self._data < other def __le__(self, other: Any) -> DataFrame: if issubclass(type(other), PandasBackend): other = other._data return self._data <= other def __len__(self) -> int: return len(self._data) def __iter__(self) -> Generator[str, None, None]: return iter(self._data) def iterrows(self) -> Generator[tuple[int, PandasBackend], None, None]: for i, row in self._data.iterrows(): yield (i, PandasBackend(row.to_frame().transpose())) def itertuples(self, ignore_index: bool = False): for values in self._data.itertuples(index=not ignore_index): yield values def __getitem__(self, item: str) -> Any: return PandasBackend(self._data[item].to_frame()) def getitems(self, items: list[str]) -> PandasBackend: return PandasBackend(self._data[items]) def getmask(self, mask: list[bool]) -> PandasBackend: return PandasBackend(self._data[mask]) def query(self, query: "Query") -> PandasBackend: from tanuki.database.adapter.query.pandas_query_compiler import PandasQueryCompiler query_compiler = PandasQueryCompiler(self._data) query = query_compiler.compile(query) return PandasBackend(self._data[query]) def __setitem__(self, items: str, value: Any) -> None: if isinstance(value, PandasBackend): value = value._data self._data[items] = value def get_index(self, index_alias: IndexAlias) -> Index: cols = [str(col) for col in index_alias.columns] new_data = self._data.set_index(cols) new_data.index.name = index_alias.name return PandasIndex(new_data.index, cols) def set_index(self, index: Union[Index, IndexAlias]) -> PandasBackend: cols = [str(col) for col in index.columns] new_data = self._data.set_index(cols) new_data.index.name = index.name new_index = PandasIndex(new_data.index, cols) return PandasBackend(new_data, new_index) def reset_index(self: PandasBackend) -> PandasBackend: new_data = self._data.reset_index(drop=True) new_data.index.name = "index" new_index = PandasIndex(new_data.index, []) return PandasBackend(new_data, new_index) def append( self: PandasBackend, new_backend: PandasBackend, ignore_index: bool = False, ) -> PandasBackend: return PandasBackend( self._data.append(new_backend._data, ignore_index=ignore_index)) def drop_indices(self: PandasBackend, indices: list[int]) -> PandasBackend: return PandasBackend(self._data.drop(indices)) @classmethod def concat( cls: type[PandasBackend], all_backends: list[PandasBackend], ignore_index: bool = False, ) -> PandasBackend: all_data = [backend._data for backend in all_backends] return PandasBackend(pd.concat(all_data, ignore_index=ignore_index)) def nunique(self) -> int: return self._data.nunique() def __str__(self) -> str: return str(self._data) def __repr__(self) -> str: return str(self)
class TestPandasIndex: def setup_method(self): self.index = PandasIndex(PIndex(np.arange(0, 3), name="index"), ["a", "b"]) def test_name(self): assert_that(self.index.name, equal_to("index")) def test_columns(self): assert_that(self.index.columns, equal_to(["a", "b"])) def test_to_pandas(self) -> None: assert_that(self.index.to_pandas().equals(self.index), equal_to(True)) def test_getitem(self): expected = PandasIndex(PIndex([1], name="index"), ["a", "b"]) assert_that(self.index[1], equal_to(1)) assert_that(self.index[[1]].equals(expected), equal_to(True)) def test_values(self): assert_that(np.array_equal(self.index.values, np.array([0, 1, 2])), equal_to(True)) def test_tolist(self): assert_that(self.index.tolist(), equal_to([0, 1, 2])) def test_equals(self): test = PandasIndex(PIndex([0, 1, 2], name="index"), ["a", "b"]) assert_that(self.index.equals(test), equal_to(True)) test = PandasIndex(PIndex([0, 1], name="index"), ["a", "b"]) assert_that(self.index.equals(test), equal_to(False)) test = PandasIndex(PIndex([0, 1, 2], name="index2"), ["a", "b"]) assert_that(self.index.equals(test), equal_to(False)) test = PandasIndex(PIndex([0, 1, 2], name="index"), ["a"]) assert_that(self.index.equals(test), equal_to(False)) def test_eq(self): expected = np.array([False, True, False]) actual = self.index == 1 assert_that(np.array_equal(actual, expected), equal_to(True)) def test_ne(self): expected = np.array([True, False, True]) actual = self.index != 1 assert_that(np.array_equal(actual, expected), equal_to(True)) def test_gt(self): expected = np.array([False, False, True]) actual = self.index > 1 assert_that(np.array_equal(actual, expected), equal_to(True)) def test_ge(self): expected = np.array([False, True, True]) actual = self.index >= 1 assert_that(np.array_equal(actual, expected), equal_to(True)) def test_lt(self): expected = np.array([True, False, False]) actual = self.index < 1 assert_that(np.array_equal(actual, expected), equal_to(True)) def test_le(self): expected = np.array([True, True, False]) actual = self.index <= 1 assert_that(np.array_equal(actual, expected), equal_to(True)) def test_len(self) -> int: assert_that(len(self.index), equal_to(3)) def test_str(self) -> str: assert_that(str(self.index), equal_to("Int64Index([0, 1, 2], dtype='int64', name='index')")) def test_repr(self) -> str: assert_that(repr(self.index), equal_to("Int64Index([0, 1, 2], dtype='int64', name='index')"))