예제 #1
0
    def __init__(
        self: "DatabaseBackend",
        store_class: Type[T],
        database: Database,
        data_token: DataToken,
        index: Optional[DatabaseIndex] = None,
        selected_columns: Optional[list[str]] = None,
        read_only: bool = True,
    ) -> None:
        if not read_only:
            raise NotImplementedError("The current version of Tanuki does not support Store to DB writing")
        self._store_class = store_class
        self._database = database
        self._data_token = data_token
        self._read_only = read_only

        if selected_columns is None:
            self._selected_columns = self._database.table_columns(self._data_token)
        else:
            self._selected_columns = selected_columns

        if index is None:
            pindex = PIndex(np.arange(0, len(self)), name="index")
            self._index = PandasIndex(pindex, [])
        else:
            if not isinstance(index, DatabaseIndex) and not isinstance(
                index, PandasIndex
            ):
                index = PandasIndex(index)
            self._index = index
        self._loc = _LocIndexer(self)
        self._iloc = _ILocIndexer(self)
예제 #2
0
    def __init__(
        self,
        data: Optional[Union(Series, DataFrame, dict[str, list])] = None,
        index: Optional[PandasIndex] = None,
    ) -> None:
        if data is None:
            self._data = DataFrame(dtype="object")
        elif type(data) is Series:
            self._data = cast(Series, data).to_frame().transpose()
        elif type(data) is DataFrame:
            self._data = DataFrame(data)
        elif type(data) is dict:
            sample_value = next(iter(data.values()))
            if not isinstance(sample_value, Iterable) or isinstance(
                    sample_value, str):
                self._data = Series(data).to_frame().transpose()
            else:
                self._data = DataFrame(data)
        else:
            raise ValueError(
                f"Received unexpected value type {type(data)}: {data}")

        if index is None:
            self._data.index.name = "index"
            self._index = PandasIndex(self._data.index, [])
        else:
            if not isinstance(index, PandasIndex):
                index = PandasIndex(index)
            self._data.index = index._data
            self._index = index
        self._loc = _LocIndexer(self)
        self._iloc = _ILocIndexer(self)
예제 #3
0
    def test_index(self) -> None:
        expected = PandasIndex(PIndex([0, 1, 2], name="index"), [])
        assert_that(self.data_backend.index.equals(expected), equal_to(True))

        new_frame = self.data_backend.set_index(ExampleStore.ab_index)
        pindex = PIndex([("a", 1), ("b", 2), ("c", 3)])
        pindex.name = "ab_index"
        expected = PandasIndex(pindex, ["a", "b"])
        assert_that(new_frame.index.equals(expected), equal_to(True))
예제 #4
0
    def test_to_pandas(self) -> None:
        assert_that(self.index.to_pandas().equals(self.index), equal_to(True))

        expected = PandasIndex(PIndex(["a", "b", "c"], name="a_index"), ["a"])
        assert_that(self.a_index.to_pandas().equals(expected), equal_to(True))

        multi_index = PIndex([("a", 1), ("b", 2), ("c", 3)])
        multi_index.name = "ab_index"
        expected = PandasIndex(multi_index, ["a", "b"])
        assert_that(self.ab_index.to_pandas().equals(expected), equal_to(True))
예제 #5
0
    def test_equals(self):
        test = PandasIndex(PIndex([0, 1, 2], name="index"), ["a", "b"])
        assert_that(self.index.equals(test), equal_to(True))

        test = PandasIndex(PIndex([0, 1], name="index"), ["a", "b"])
        assert_that(self.index.equals(test), equal_to(False))

        test = PandasIndex(PIndex([0, 1, 2], name="index2"), ["a", "b"])
        assert_that(self.index.equals(test), equal_to(False))

        test = PandasIndex(PIndex([0, 1, 2], name="index"), ["a"])
        assert_that(self.index.equals(test), equal_to(False))
예제 #6
0
    def test_getitem(self):
        expected = PandasIndex(PIndex([1], name="index"), [])
        assert_that(self.index[1], equal_to(1))
        assert_that(self.index[[1]].equals(expected), equal_to(True))

        expected = PandasIndex(PIndex(["b"], name="a_index"), ["a"])
        assert_that(self.a_index[1], equal_to("b"))
        assert_that(self.a_index[[1]].equals(expected), equal_to(True))

        multi_index = PIndex([("b", 2)])
        multi_index.name = "ab_index"
        expected = PandasIndex(multi_index, ["a", "b"])
        assert_that(self.ab_index[1], equal_to(("b", 2)))
        assert_that(self.ab_index[[1]].equals(expected), equal_to(True))
예제 #7
0
 def test_drop_indices(self) -> None:
     new_frame = self.data_backend.drop_indices([1])
     expected = PandasBackend(
         {"a": ["a", "c"], "b": [1, 3], "c": [True, True]},
         index=PandasIndex(PIndex([0, 2], name="index"), []),
     )
     assert_that(new_frame.equals(expected), equal_to(True))
예제 #8
0
 def test_query(self) -> None:
     query = (ExampleStore.a == "a") | (ExampleStore.b == 3)
     test = self.data_backend.query(query)
     expected = PandasBackend(
         {"a": ["a", "c"], "b": [1, 3], "c": [True, True]},
         index=PandasIndex(PIndex([0, 2], name="index"), []),
     )
     assert_that(test.equals(expected), equal_to(True))
예제 #9
0
 def test_getmask(self) -> None:
     test = self.data_backend.getmask([True, False, True])
     expected = PandasBackend(
         {"a": ["a", "c"], "b": [1, 3], "c": [True, True]},
         index=PandasIndex(PIndex([0, 2], name="index"), []),
     )
     repr(expected)
     assert_that(test.equals(expected), equal_to(True))
예제 #10
0
    def test_equals(self):
        test = PandasIndex(PIndex([0, 1, 2], name="index"), [])
        assert_that(self.index.equals(test), equal_to(True))

        test = DatabaseIndex("a_index", self.backend["a"])
        assert_that(self.a_index.equals(test), equal_to(True))

        test = DatabaseIndex("ab_index", self.backend.getitems(["a", "b"]))
        assert_that(self.ab_index.equals(test), equal_to(True))
예제 #11
0
 def reset_index(self) -> PandasBackend:
     pindex = PIndex(np.arange(0, len(self)), name="index")
     return DatabaseBackend(
         self._store_class,
         self._database,
         self._data_token,
         index=PandasIndex(pindex, []),
         selected_columns=self._selected_columns,
         read_only=self._read_only,
     )
예제 #12
0
 def setup_method(self):
     self.index = PandasIndex(PIndex(np.arange(0, 3), name="index"), ["a", "b"])
예제 #13
0
class PandasBackend(DataBackend):
    _data: DataFrame
    _index: PandasIndex
    _loc: _LocIndexer
    _iloc: _ILocIndexer

    def __init__(
        self,
        data: Optional[Union(Series, DataFrame, dict[str, list])] = None,
        index: Optional[PandasIndex] = None,
    ) -> None:
        if data is None:
            self._data = DataFrame(dtype="object")
        elif type(data) is Series:
            self._data = cast(Series, data).to_frame().transpose()
        elif type(data) is DataFrame:
            self._data = DataFrame(data)
        elif type(data) is dict:
            sample_value = next(iter(data.values()))
            if not isinstance(sample_value, Iterable) or isinstance(
                    sample_value, str):
                self._data = Series(data).to_frame().transpose()
            else:
                self._data = DataFrame(data)
        else:
            raise ValueError(
                f"Received unexpected value type {type(data)}: {data}")

        if index is None:
            self._data.index.name = "index"
            self._index = PandasIndex(self._data.index, [])
        else:
            if not isinstance(index, PandasIndex):
                index = PandasIndex(index)
            self._data.index = index._data
            self._index = index
        self._loc = _LocIndexer(self)
        self._iloc = _ILocIndexer(self)

    def is_link(self) -> bool:
        return False

    def link_token(self) -> Optional[DataToken]:
        return None

    def to_pandas(self) -> DataFrame:
        return self._data

    @property
    def columns(self) -> list[str]:
        return self._data.columns.tolist()

    @property
    def values(self) -> np.ndarray:
        data_values = self._data.values
        shape = data_values.shape
        if shape[1] == 1:
            return np.squeeze(data_values, axis=1)
        elif shape[0] == 1:
            return np.squeeze(data_values, axis=0)
        else:
            return data_values

    @property
    def dtypes(self) -> dict[str, DataType]:
        return {
            col: DataType(dtype)
            for col, dtype in self._data.dtypes.items()
        }

    def cast_columns(self, column_dtypes: dict[str, type]) -> PandasBackend:
        return PandasBackend(self._data.astype(column_dtypes, errors="ignore"))

    def to_dict(self) -> dict[str, any]:
        return self._data.to_dict("list")

    @property
    def index(self) -> Index:
        return self._index

    @property
    def index_name(self) -> Union[str, list[str]]:
        return self._data.index.name

    @property
    def loc(self: PandasBackend) -> LocIndexer[PandasBackend]:
        return self._loc

    @property
    def iloc(self: PandasBackend) -> ILocIndexer[PandasBackend]:
        return self._iloc

    def equals(self, other: PandasBackend) -> bool:
        if type(other) is not PandasBackend:
            return False
        return np.array_equal(self._data.values,
                              other._data.values) and self._index.equals(
                                  other._index)

    def __eq__(self, other) -> DataFrame:
        if issubclass(type(other), PandasBackend):
            other = other._data
        return self._data == other

    def __ne__(self, other: Any) -> DataFrame:
        if issubclass(type(other), PandasBackend):
            other = other._data
        return self._data != other

    def __gt__(self, other: Any) -> DataFrame:
        if issubclass(type(other), PandasBackend):
            other = other._data
        return self._data > other

    def __ge__(self, other: Any) -> DataFrame:
        if issubclass(type(other), PandasBackend):
            other = other._data
        return self._data >= other

    def __lt__(self, other: Any) -> DataFrame:
        if issubclass(type(other), PandasBackend):
            other = other._data
        return self._data < other

    def __le__(self, other: Any) -> DataFrame:
        if issubclass(type(other), PandasBackend):
            other = other._data
        return self._data <= other

    def __len__(self) -> int:
        return len(self._data)

    def __iter__(self) -> Generator[str, None, None]:
        return iter(self._data)

    def iterrows(self) -> Generator[tuple[int, PandasBackend], None, None]:
        for i, row in self._data.iterrows():
            yield (i, PandasBackend(row.to_frame().transpose()))

    def itertuples(self, ignore_index: bool = False):
        for values in self._data.itertuples(index=not ignore_index):
            yield values

    def __getitem__(self, item: str) -> Any:
        return PandasBackend(self._data[item].to_frame())

    def getitems(self, items: list[str]) -> PandasBackend:
        return PandasBackend(self._data[items])

    def getmask(self, mask: list[bool]) -> PandasBackend:
        return PandasBackend(self._data[mask])

    def query(self, query: "Query") -> PandasBackend:
        from tanuki.database.adapter.query.pandas_query_compiler import PandasQueryCompiler

        query_compiler = PandasQueryCompiler(self._data)
        query = query_compiler.compile(query)
        return PandasBackend(self._data[query])

    def __setitem__(self, items: str, value: Any) -> None:
        if isinstance(value, PandasBackend):
            value = value._data
        self._data[items] = value

    def get_index(self, index_alias: IndexAlias) -> Index:
        cols = [str(col) for col in index_alias.columns]
        new_data = self._data.set_index(cols)
        new_data.index.name = index_alias.name
        return PandasIndex(new_data.index, cols)

    def set_index(self, index: Union[Index, IndexAlias]) -> PandasBackend:
        cols = [str(col) for col in index.columns]
        new_data = self._data.set_index(cols)
        new_data.index.name = index.name
        new_index = PandasIndex(new_data.index, cols)
        return PandasBackend(new_data, new_index)

    def reset_index(self: PandasBackend) -> PandasBackend:
        new_data = self._data.reset_index(drop=True)
        new_data.index.name = "index"
        new_index = PandasIndex(new_data.index, [])
        return PandasBackend(new_data, new_index)

    def append(
        self: PandasBackend,
        new_backend: PandasBackend,
        ignore_index: bool = False,
    ) -> PandasBackend:
        return PandasBackend(
            self._data.append(new_backend._data, ignore_index=ignore_index))

    def drop_indices(self: PandasBackend, indices: list[int]) -> PandasBackend:
        return PandasBackend(self._data.drop(indices))

    @classmethod
    def concat(
        cls: type[PandasBackend],
        all_backends: list[PandasBackend],
        ignore_index: bool = False,
    ) -> PandasBackend:
        all_data = [backend._data for backend in all_backends]
        return PandasBackend(pd.concat(all_data, ignore_index=ignore_index))

    def nunique(self) -> int:
        return self._data.nunique()

    def __str__(self) -> str:
        return str(self._data)

    def __repr__(self) -> str:
        return str(self)
예제 #14
0
 def reset_index(self: PandasBackend) -> PandasBackend:
     new_data = self._data.reset_index(drop=True)
     new_data.index.name = "index"
     new_index = PandasIndex(new_data.index, [])
     return PandasBackend(new_data, new_index)
예제 #15
0
 def set_index(self, index: Union[Index, IndexAlias]) -> PandasBackend:
     cols = [str(col) for col in index.columns]
     new_data = self._data.set_index(cols)
     new_data.index.name = index.name
     new_index = PandasIndex(new_data.index, cols)
     return PandasBackend(new_data, new_index)
예제 #16
0
 def get_index(self, index_alias: IndexAlias) -> Index:
     cols = [str(col) for col in index_alias.columns]
     new_data = self._data.set_index(cols)
     new_data.index.name = index_alias.name
     return PandasIndex(new_data.index, cols)
예제 #17
0
 def test_getitem(self):
     expected = PandasIndex(PIndex([1], name="index"), ["a", "b"])
     assert_that(self.index[1], equal_to(1))
     assert_that(self.index[[1]].equals(expected), equal_to(True))
예제 #18
0
class TestPandasIndex:

    def setup_method(self):
        self.index = PandasIndex(PIndex(np.arange(0, 3), name="index"), ["a", "b"])
    
    def test_name(self):
        assert_that(self.index.name, equal_to("index"))

    def test_columns(self):
        assert_that(self.index.columns, equal_to(["a", "b"]))

    def test_to_pandas(self) -> None:
        assert_that(self.index.to_pandas().equals(self.index), equal_to(True))

    def test_getitem(self):
        expected = PandasIndex(PIndex([1], name="index"), ["a", "b"])
        assert_that(self.index[1], equal_to(1))
        assert_that(self.index[[1]].equals(expected), equal_to(True))

    def test_values(self):
        assert_that(np.array_equal(self.index.values, np.array([0, 1, 2])), equal_to(True))

    def test_tolist(self):
        assert_that(self.index.tolist(), equal_to([0, 1, 2]))

    def test_equals(self):
        test = PandasIndex(PIndex([0, 1, 2], name="index"), ["a", "b"])
        assert_that(self.index.equals(test), equal_to(True))

        test = PandasIndex(PIndex([0, 1], name="index"), ["a", "b"])
        assert_that(self.index.equals(test), equal_to(False))

        test = PandasIndex(PIndex([0, 1, 2], name="index2"), ["a", "b"])
        assert_that(self.index.equals(test), equal_to(False))

        test = PandasIndex(PIndex([0, 1, 2], name="index"), ["a"])
        assert_that(self.index.equals(test), equal_to(False))

    def test_eq(self):
        expected = np.array([False, True, False])
        actual = self.index == 1
        assert_that(np.array_equal(actual, expected), equal_to(True))

    def test_ne(self):
        expected = np.array([True, False, True])
        actual = self.index != 1
        assert_that(np.array_equal(actual, expected), equal_to(True))

    def test_gt(self):
        expected = np.array([False, False, True])
        actual = self.index > 1
        assert_that(np.array_equal(actual, expected), equal_to(True))

    def test_ge(self):
        expected = np.array([False, True, True])
        actual = self.index >= 1
        assert_that(np.array_equal(actual, expected), equal_to(True))

    def test_lt(self):
        expected = np.array([True, False, False])
        actual = self.index < 1
        assert_that(np.array_equal(actual, expected), equal_to(True))

    def test_le(self):
        expected = np.array([True, True, False])
        actual = self.index <= 1
        assert_that(np.array_equal(actual, expected), equal_to(True))

    def test_len(self) -> int:
        assert_that(len(self.index), equal_to(3))

    def test_str(self) -> str:
        assert_that(str(self.index), equal_to("Int64Index([0, 1, 2], dtype='int64', name='index')"))

    def test_repr(self) -> str:
        assert_that(repr(self.index), equal_to("Int64Index([0, 1, 2], dtype='int64', name='index')"))