Ejemplo n.º 1
0
    def test_to_series_comparison(self):
        kidx1 = ks.Index([1, 2, 3, 4, 5])
        kidx2 = ks.Index([1, 2, 3, 4, 5])

        self.assert_eq((kidx1.to_series() == kidx2.to_series()).all(), True)

        kidx1.name = "koalas"
        kidx2.name = "koalas"

        self.assert_eq((kidx1.to_series() == kidx2.to_series()).all(), True)
Ejemplo n.º 2
0
    def test_sort_values(self):
        pidx = pd.Index([-10, -100, 200, 100])
        kidx = ks.Index([-10, -100, 200, 100])

        self.assert_eq(pidx.sort_values(), kidx.sort_values())
        self.assert_eq(pidx.sort_values(ascending=False),
                       kidx.sort_values(ascending=False))

        pidx.name = "koalas"
        kidx.name = "koalas"

        self.assert_eq(pidx.sort_values(), kidx.sort_values())
        self.assert_eq(pidx.sort_values(ascending=False),
                       kidx.sort_values(ascending=False))

        pidx = pd.MultiIndex.from_tuples([("a", "x", 1), ("b", "y", 2),
                                          ("c", "z", 3)])
        kidx = ks.MultiIndex.from_tuples([("a", "x", 1), ("b", "y", 2),
                                          ("c", "z", 3)])

        pidx.names = ["hello", "koalas", "goodbye"]
        kidx.names = ["hello", "koalas", "goodbye"]

        self.assert_eq(pidx.sort_values(), kidx.sort_values())
        self.assert_eq(pidx.sort_values(ascending=False),
                       kidx.sort_values(ascending=False))
Ejemplo n.º 3
0
    def test_sort_values(self):
        pidx = pd.Index([-10, -100, 200, 100])
        kidx = ks.Index([-10, -100, 200, 100])

        self.assert_eq(pidx.sort_values(), kidx.sort_values())
        self.assert_eq(pidx.sort_values(ascending=False),
                       kidx.sort_values(ascending=False))

        pidx.name = 'koalas'
        kidx.name = 'koalas'

        self.assert_eq(pidx.sort_values(), kidx.sort_values())
        self.assert_eq(pidx.sort_values(ascending=False),
                       kidx.sort_values(ascending=False))

        pidx = pd.MultiIndex.from_tuples([('a', 'x', 1), ('b', 'y', 2),
                                          ('c', 'z', 3)])
        kidx = ks.MultiIndex.from_tuples([('a', 'x', 1), ('b', 'y', 2),
                                          ('c', 'z', 3)])

        pidx.names = ['hello', 'koalas', 'goodbye']
        kidx.names = ['hello', 'koalas', 'goodbye']

        self.assert_eq(pidx.sort_values(), kidx.sort_values())
        self.assert_eq(pidx.sort_values(ascending=False),
                       kidx.sort_values(ascending=False))
Ejemplo n.º 4
0
    def test_series_iloc_setitem(self):
        pser = pd.Series([1, 2, 3], index=["cobra", "viper", "sidewinder"])
        kser = ks.from_pandas(pser)

        piloc = pser.iloc
        kiloc = kser.iloc

        pser1 = pser + 1
        kser1 = kser + 1

        for key, value in [
            ([1, 2], 10),
            (1, 50),
            (slice(None), 10),
            (slice(None, 1), 20),
            (slice(1, None), 30),
        ]:
            with self.subTest(key=key, value=value):
                pser.iloc[key] = value
                kser.iloc[key] = value
                self.assert_eq(kser, pser)

                piloc[key] = -value
                kiloc[key] = -value
                self.assert_eq(kser, pser)

                pser1.iloc[key] = value
                kser1.iloc[key] = value
                self.assert_eq(kser1, pser1)

        with self.assertRaises(ValueError):
            kser.iloc[1] = -kser

        pser = pd.Index([1, 2, 3]).to_series()
        kser = ks.Index([1, 2, 3]).to_series()

        pser1 = pser + 1
        kser1 = kser + 1

        pser.iloc[0] = 10
        kser.iloc[0] = 10
        self.assert_eq(kser, pser)

        pser1.iloc[0] = 20
        kser1.iloc[0] = 20
        self.assert_eq(kser1, pser1)

        pdf = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
        kdf = ks.from_pandas(pdf)

        pser = pdf.a
        kser = kdf.a

        pser.iloc[[1]] = -pdf.b
        kser.iloc[[1]] = -kdf.b
        self.assert_eq(kser, pser)

        with self.assertRaisesRegex(ValueError,
                                    "Incompatible indexer with DataFrame"):
            kser.iloc[1] = kdf[["b"]]
Ejemplo n.º 5
0
    def test_index_drop_duplicates(self):
        pidx = pd.Index([1, 1, 2])
        kidx = ks.Index([1, 1, 2])
        self.assert_eq(pidx.drop_duplicates(), kidx.drop_duplicates())

        pidx = pd.MultiIndex.from_tuples([(1, 1), (1, 1), (2, 2)], names=['level1', 'level2'])
        kidx = ks.MultiIndex.from_tuples([(1, 1), (1, 1), (2, 2)], names=['level1', 'level2'])
        self.assert_eq(pidx.drop_duplicates(), kidx.drop_duplicates())
Ejemplo n.º 6
0
    def test_len(self):
        pidx = pd.Index(range(10000))
        kidx = ks.Index(range(10000))

        self.assert_eq(len(pidx), len(kidx))

        pidx = pd.MultiIndex.from_tuples([('a', 'x', 1), ('b', 'y', 2), ('c', 'z', 3)])
        kidx = ks.MultiIndex.from_tuples([('a', 'x', 1), ('b', 'y', 2), ('c', 'z', 3)])

        self.assert_eq(len(pidx), len(kidx))
Ejemplo n.º 7
0
    def test_len(self):
        pidx = pd.Index(range(10000))
        kidx = ks.Index(range(10000))

        self.assert_eq(len(pidx), len(kidx))

        pidx = pd.MultiIndex.from_tuples([("a", "x", 1), ("b", "y", 2), ("c", "z", 3)])
        kidx = ks.MultiIndex.from_tuples([("a", "x", 1), ("b", "y", 2), ("c", "z", 3)])

        self.assert_eq(len(pidx), len(kidx))
Ejemplo n.º 8
0
    def test_multi_index_symmetric_difference(self):
        idx = ks.Index(['a', 'b', 'c'])
        midx = ks.MultiIndex.from_tuples([('a', 'x'), ('b', 'y'), ('c', 'z')])
        midx_ = ks.MultiIndex.from_tuples([('a', 'x'), ('b', 'y'), ('c', 'z')])

        self.assert_eq(
            midx.symmetric_difference(midx_),
            midx.to_pandas().symmetric_difference(midx_.to_pandas()))

        with self.assertRaisesRegexp(NotImplementedError, "Doesn't support*"):
            midx.symmetric_difference(idx)
Ejemplo n.º 9
0
    def indexer_between_time(
        self,
        start_time: Union[datetime.time, str],
        end_time: Union[datetime.time, str],
        include_start: bool = True,
        include_end: bool = True,
    ) -> Index:
        """
        Return index locations of values between particular times of day
        (e.g., 9:00-9:30AM).

        Parameters
        ----------
        start_time, end_time : datetime.time, str
            Time passed either as object (datetime.time) or as string in
            appropriate format ("%H:%M", "%H%M", "%I:%M%p", "%I%M%p",
            "%H:%M:%S", "%H%M%S", "%I:%M:%S%p","%I%M%S%p").
        include_start : bool, default True
        include_end : bool, default True

        Returns
        -------
        values_between_time : Index of integers

        Examples
        --------
        >>> kidx = ks.date_range("2000-01-01", periods=3, freq="T")
        >>> kidx  # doctest: +NORMALIZE_WHITESPACE
        DatetimeIndex(['2000-01-01 00:00:00', '2000-01-01 00:01:00',
                       '2000-01-01 00:02:00'],
                      dtype='datetime64[ns]', freq=None)

        >>> kidx.indexer_between_time("00:01", "00:02").sort_values()
        Int64Index([1, 2], dtype='int64')

        >>> kidx.indexer_between_time("00:01", "00:02", include_end=False)
        Int64Index([1], dtype='int64')

        >>> kidx.indexer_between_time("00:01", "00:02", include_start=False)
        Int64Index([2], dtype='int64')
        """
        def pandas_between_time(pdf) -> ks.DataFrame[int]:
            return pdf.between_time(start_time, end_time, include_start,
                                    include_end)

        kdf = self.to_frame()[[]]
        id_column_name = verify_temp_column_name(kdf, "__id_column__")
        kdf = kdf.koalas.attach_id_column("distributed-sequence",
                                          id_column_name)
        with ks.option_context("compute.default_index_type", "distributed"):
            # The attached index in the statement below will be dropped soon,
            # so we enforce “distributed” default index type
            kdf = kdf.koalas.apply_batch(pandas_between_time)
        return ks.Index(first_series(kdf).rename(self.name))
Ejemplo n.º 10
0
    def test_argmax(self):
        pidx = pd.Index([100, 50, 10, 20, 30, 60, 0, 50, 0, 100, 100, 100, 20, 0, 0])
        kidx = ks.Index([100, 50, 10, 20, 30, 60, 0, 50, 0, 100, 100, 100, 20, 0, 0])

        self.assert_eq(pidx.argmax(), kidx.argmax())

        # MultiIndex
        kidx = ks.MultiIndex.from_tuples([("a", "x", 1), ("b", "y", 2), ("c", "z", 3)])
        with self.assertRaisesRegex(
            TypeError, "reduction operation 'argmax' not allowed for this dtype"
        ):
            kidx.argmax()
Ejemplo n.º 11
0
    def test_index_sort(self):
        idx = ks.Index([1, 2, 3, 4, 5])
        midx = ks.MultiIndex.from_tuples([('a', 'x', 1), ('b', 'y', 2)])

        with self.assertRaisesRegex(
                TypeError,
                "cannot sort an Index object in-place, use sort_values instead"):
            idx.sort()
        with self.assertRaisesRegex(
                TypeError,
                "cannot sort an Index object in-place, use sort_values instead"):
            midx.sort()
Ejemplo n.º 12
0
    def test_multi_index_symmetric_difference(self):
        idx = ks.Index(["a", "b", "c"])
        midx = ks.MultiIndex.from_tuples([("a", "x"), ("b", "y"), ("c", "z")])
        midx_ = ks.MultiIndex.from_tuples([("a", "x"), ("b", "y"), ("c", "z")])

        self.assert_eq(
            midx.symmetric_difference(midx_),
            midx.to_pandas().symmetric_difference(midx_.to_pandas()),
        )

        with self.assertRaisesRegexp(NotImplementedError, "Doesn't support*"):
            midx.symmetric_difference(idx)
Ejemplo n.º 13
0
    def test_argmin(self):
        pidx = pd.Index(
            [100, 50, 10, 20, 30, 60, 0, 50, 0, 100, 100, 100, 20, 0, 0])
        kidx = ks.Index(
            [100, 50, 10, 20, 30, 60, 0, 50, 0, 100, 100, 100, 20, 0, 0])

        self.assert_eq(pidx.argmin(), kidx.argmin())

        # MultiIndex
        kidx = ks.MultiIndex.from_tuples([('a', 'x', 1), ('b', 'y', 2),
                                          ('c', 'z', 3)])
        with self.assertRaisesRegex(
                TypeError,
                "reduction operation 'argmin' not allowed for this dtype"):
            kidx.argmin()
Ejemplo n.º 14
0
    def test_categorical_index(self):
        pidx = pd.CategoricalIndex([1, 2, 3])
        kidx = ks.CategoricalIndex([1, 2, 3])

        self.assert_eq(kidx, pidx)
        self.assert_eq(kidx.categories, pidx.categories)
        self.assert_eq(kidx.codes, pd.Index(pidx.codes))
        self.assert_eq(kidx.ordered, pidx.ordered)

        pidx = pd.Index([1, 2, 3], dtype="category")
        kidx = ks.Index([1, 2, 3], dtype="category")

        self.assert_eq(kidx, pidx)
        self.assert_eq(kidx.categories, pidx.categories)
        self.assert_eq(kidx.codes, pd.Index(pidx.codes))
        self.assert_eq(kidx.ordered, pidx.ordered)

        pdf = pd.DataFrame(
            {
                "a":
                pd.Categorical([1, 2, 3, 1, 2, 3]),
                "b":
                pd.Categorical(["a", "b", "c", "a", "b", "c"],
                               categories=["c", "b", "a"]),
            },
            index=pd.Categorical([10, 20, 30, 20, 30, 10],
                                 categories=[30, 10, 20],
                                 ordered=True),
        )
        kdf = ks.from_pandas(pdf)

        pidx = pdf.set_index("b").index
        kidx = kdf.set_index("b").index

        self.assert_eq(kidx, pidx)
        self.assert_eq(kidx.categories, pidx.categories)
        self.assert_eq(kidx.codes, pd.Index(pidx.codes))
        self.assert_eq(kidx.ordered, pidx.ordered)

        pidx = pdf.set_index(["a", "b"]).index.get_level_values(0)
        kidx = kdf.set_index(["a", "b"]).index.get_level_values(0)

        self.assert_eq(kidx, pidx)
        self.assert_eq(kidx.categories, pidx.categories)
        self.assert_eq(kidx.codes, pd.Index(pidx.codes))
        self.assert_eq(kidx.ordered, pidx.ordered)
Ejemplo n.º 15
0
    def test_arithmetic_op_exceptions(self):
        kser = self.ks_start_date
        py_datetime = self.pd_start_date.dt.to_pydatetime()
        datetime_index = ks.Index(self.pd_start_date)

        for other in [1, 0.1, kser, datetime_index, py_datetime]:
            expected_err_msg = "addition can not be applied to date times."
            self.assertRaisesRegex(TypeError, expected_err_msg,
                                   lambda: kser + other)
            self.assertRaisesRegex(TypeError, expected_err_msg,
                                   lambda: other + kser)

            expected_err_msg = "multiplication can not be applied to date times."
            self.assertRaisesRegex(TypeError, expected_err_msg,
                                   lambda: kser * other)
            self.assertRaisesRegex(TypeError, expected_err_msg,
                                   lambda: other * kser)

            expected_err_msg = "division can not be applied to date times."
            self.assertRaisesRegex(TypeError, expected_err_msg,
                                   lambda: kser / other)
            self.assertRaisesRegex(TypeError, expected_err_msg,
                                   lambda: other / kser)
            self.assertRaisesRegex(TypeError, expected_err_msg,
                                   lambda: kser // other)
            self.assertRaisesRegex(TypeError, expected_err_msg,
                                   lambda: other // kser)

            expected_err_msg = "modulo can not be applied to date times."
            self.assertRaisesRegex(TypeError, expected_err_msg,
                                   lambda: kser % other)
            self.assertRaisesRegex(TypeError, expected_err_msg,
                                   lambda: other % kser)

        expected_err_msg = "datetime subtraction can only be applied to datetime series."

        for other in [1, 0.1]:
            self.assertRaisesRegex(TypeError, expected_err_msg,
                                   lambda: kser - other)
            self.assertRaisesRegex(TypeError, expected_err_msg,
                                   lambda: other - kser)

        self.assertRaisesRegex(TypeError, expected_err_msg,
                               lambda: kser - other)
        self.assertRaises(NotImplementedError, lambda: py_datetime - kser)
Ejemplo n.º 16
0
    def indexer_at_time(self, time: Union[datetime.time, str], asof: bool = False) -> Index:
        """
        Return index locations of values at particular time of day
        (e.g. 9:30AM).

        Parameters
        ----------
        time : datetime.time or str
            Time passed in either as object (datetime.time) or as string in
            appropriate format ("%H:%M", "%H%M", "%I:%M%p", "%I%M%p",
            "%H:%M:%S", "%H%M%S", "%I:%M:%S%p", "%I%M%S%p").

        Returns
        -------
        values_at_time : Index of integers

        Examples
        --------
        >>> kidx = ks.date_range("2000-01-01", periods=3, freq="T")
        >>> kidx  # doctest: +NORMALIZE_WHITESPACE
        DatetimeIndex(['2000-01-01 00:00:00', '2000-01-01 00:01:00',
                       '2000-01-01 00:02:00'],
                      dtype='datetime64[ns]', freq=None)

        >>> kidx.indexer_at_time("00:00")
        Int64Index([0], dtype='int64')

        >>> kidx.indexer_at_time("00:01")
        Int64Index([1], dtype='int64')
        """
        if asof:
            raise NotImplementedError("'asof' argument is not supported")

        def pandas_at_time(pdf) -> ks.DataFrame[int]:
            return pdf.at_time(time, asof)

        kdf = self.to_frame()[[]]
        id_column_name = verify_temp_column_name(kdf, "__id_column__")
        kdf = kdf.koalas.attach_id_column("distributed-sequence", id_column_name)
        with ks.option_context("compute.default_index_type", "distributed"):
            # The attached index in the statement below will be dropped soon,
            # so we enforce “distributed” default index type
            kdf = kdf.koalas.apply_batch(pandas_at_time)
        return ks.Index(first_series(kdf).rename(self.name))
Ejemplo n.º 17
0
    def test_series_iloc_setitem(self):
        pser = pd.Series([1, 2, 3], index=["cobra", "viper", "sidewinder"])
        kser = ks.from_pandas(pser)

        pser1 = pser + 1
        kser1 = kser + 1

        for key, value in [
            ([1, 2], 10),
            (1, 50),
            (slice(None), 10),
            (slice(None, 1), 20),
            (slice(1, None), 30),
        ]:
            with self.subTest(key=key, value=value):
                pser.iloc[key] = value
                kser.iloc[key] = value
                self.assert_eq(kser, pser)

                pser1.iloc[key] = value
                kser1.iloc[key] = value
                self.assert_eq(kser1, pser1)

        with self.assertRaises(ValueError):
            kser.iloc[1] = -kser

        pser = pd.Index([1, 2, 3]).to_series()
        kser = ks.Index([1, 2, 3]).to_series()

        pser1 = pser + 1
        kser1 = kser + 1

        pser.iloc[0] = 10
        kser.iloc[0] = 10
        self.assert_eq(kser, pser)

        pser1.iloc[0] = 20
        kser1.iloc[0] = 20
        self.assert_eq(kser1, pser1)
Ejemplo n.º 18
0
    def test_index_symmetric_difference(self):
        pidx1 = pd.Index([1, 2, 3, 4])
        pidx2 = pd.Index([2, 3, 4, 5])
        kidx1 = ks.from_pandas(pidx1)
        kidx2 = ks.from_pandas(pidx2)

        self.assert_eq(
            kidx1.symmetric_difference(kidx2).sort_values(),
            pidx1.symmetric_difference(pidx2).sort_values(),
        )
        self.assert_eq(
            (kidx1 + 1).symmetric_difference(kidx2).sort_values(),
            (pidx1 + 1).symmetric_difference(pidx2).sort_values(),
        )

        pmidx1 = pd.MultiIndex(
            [["lama", "cow", "falcon"], ["speed", "weight", "length"]],
            [[0, 0, 0, 1, 1, 1, 2, 2, 2], [0, 0, 0, 0, 1, 2, 0, 1, 2]],
        )
        pmidx2 = pd.MultiIndex(
            [["koalas", "cow", "falcon"], ["speed", "weight", "length"]],
            [[0, 0, 0, 1, 1, 1, 2, 2, 2], [0, 0, 0, 0, 1, 2, 0, 1, 2]],
        )
        kmidx1 = ks.from_pandas(pmidx1)
        kmidx2 = ks.from_pandas(pmidx2)

        self.assert_eq(
            kmidx1.symmetric_difference(kmidx2).sort_values(),
            pmidx1.symmetric_difference(pmidx2).sort_values(),
        )

        idx = ks.Index(["a", "b", "c"])
        midx = ks.MultiIndex.from_tuples([("a", "x"), ("b", "y"), ("c", "z")])

        with self.assertRaisesRegexp(NotImplementedError, "Doesn't support*"):
            idx.symmetric_difference(midx)
Ejemplo n.º 19
0
    def test_index_nunique(self):
        pidx = pd.Index([1, 1, 2, None])
        kidx = ks.Index([1, 1, 2, None])

        self.assert_eq(pidx.nunique(), kidx.nunique())
        self.assert_eq(pidx.nunique(dropna=True), kidx.nunique(dropna=True))
Ejemplo n.º 20
0
    def test_index_symmetric_difference(self):
        idx = ks.Index(["a", "b", "c"])
        midx = ks.MultiIndex.from_tuples([("a", "x"), ("b", "y"), ("c", "z")])

        with self.assertRaisesRegexp(NotImplementedError, "Doesn't support*"):
            idx.symmetric_difference(midx)
Ejemplo n.º 21
0
    def test_difference(self):
        # Index
        kidx1 = ks.Index([1, 2, 3, 4], name="koalas")
        kidx2 = ks.Index([3, 4, 5, 6], name="koalas")
        pidx1 = kidx1.to_pandas()
        pidx2 = kidx2.to_pandas()

        self.assert_eq(
            kidx1.difference(kidx2).sort_values(),
            pidx1.difference(pidx2).sort_values())
        self.assert_eq(
            kidx1.difference([3, 4, 5, 6]).sort_values(),
            pidx1.difference([3, 4, 5, 6]).sort_values(),
        )
        self.assert_eq(
            kidx1.difference((3, 4, 5, 6)).sort_values(),
            pidx1.difference((3, 4, 5, 6)).sort_values(),
        )
        self.assert_eq(
            kidx1.difference({3, 4, 5, 6}).sort_values(),
            pidx1.difference({3, 4, 5, 6}).sort_values(),
        )
        self.assert_eq(
            kidx1.difference({
                3: 1,
                4: 2,
                5: 3,
                6: 4
            }).sort_values(),
            pidx1.difference({
                3: 1,
                4: 2,
                5: 3,
                6: 4
            }).sort_values(),
        )

        # Exceptions for Index
        with self.assertRaisesRegex(TypeError,
                                    "Input must be Index or array-like"):
            kidx1.difference("1234")
        with self.assertRaisesRegex(TypeError,
                                    "Input must be Index or array-like"):
            kidx1.difference(1234)
        with self.assertRaisesRegex(TypeError,
                                    "Input must be Index or array-like"):
            kidx1.difference(12.34)
        with self.assertRaisesRegex(TypeError,
                                    "Input must be Index or array-like"):
            kidx1.difference(None)
        with self.assertRaisesRegex(TypeError,
                                    "Input must be Index or array-like"):
            kidx1.difference(np.nan)
        with self.assertRaisesRegex(
                ValueError,
                "The 'sort' keyword only takes the values of None or True; 1 was passed."
        ):
            kidx1.difference(kidx2, sort=1)

        # MultiIndex
        kidx1 = ks.MultiIndex.from_tuples([("a", "x", 1), ("b", "y", 2),
                                           ("c", "z", 3)],
                                          names=["hello", "koalas", "world"])
        kidx2 = ks.MultiIndex.from_tuples([("a", "x", 1), ("b", "z", 2),
                                           ("k", "z", 3)],
                                          names=["hello", "koalas", "world"])
        pidx1 = kidx1.to_pandas()
        pidx2 = kidx2.to_pandas()

        self.assert_eq(
            kidx1.difference(kidx2).sort_values(),
            pidx1.difference(pidx2).sort_values())
        self.assert_eq(
            kidx1.difference({("a", "x", 1)}).sort_values(),
            pidx1.difference({("a", "x", 1)}).sort_values(),
        )
        self.assert_eq(
            kidx1.difference({
                ("a", "x", 1): [1, 2, 3]
            }).sort_values(),
            pidx1.difference({
                ("a", "x", 1): [1, 2, 3]
            }).sort_values(),
        )

        # Exceptions for MultiIndex
        with self.assertRaisesRegex(
                TypeError, "other must be a MultiIndex or a list of tuples"):
            kidx1.difference(["b", "z", "2"])
Ejemplo n.º 22
0
    def test_append(self):
        # Index
        pidx = pd.Index(range(10000))
        kidx = ks.Index(range(10000))

        self.assert_eq(pidx.append(pidx), kidx.append(kidx))

        # Index with name
        pidx1 = pd.Index(range(10000), name="a")
        pidx2 = pd.Index(range(10000), name="b")
        kidx1 = ks.Index(range(10000), name="a")
        kidx2 = ks.Index(range(10000), name="b")

        self.assert_eq(pidx1.append(pidx2), kidx1.append(kidx2))

        self.assert_eq(pidx2.append(pidx1), kidx2.append(kidx1))

        # Index from DataFrame
        pdf1 = pd.DataFrame({
            "a": [1, 2, 3],
            "b": [4, 5, 6]
        },
                            index=["a", "b", "c"])
        pdf2 = pd.DataFrame({
            "a": [7, 8, 9],
            "d": [10, 11, 12]
        },
                            index=["x", "y", "z"])
        kdf1 = ks.from_pandas(pdf1)
        kdf2 = ks.from_pandas(pdf2)

        pidx1 = pdf1.set_index("a").index
        pidx2 = pdf2.set_index("d").index
        kidx1 = kdf1.set_index("a").index
        kidx2 = kdf2.set_index("d").index

        self.assert_eq(pidx1.append(pidx2), kidx1.append(kidx2))

        self.assert_eq(pidx2.append(pidx1), kidx2.append(kidx1))

        # Index from DataFrame with MultiIndex columns
        pdf1 = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
        pdf2 = pd.DataFrame({"a": [7, 8, 9], "d": [10, 11, 12]})
        pdf1.columns = pd.MultiIndex.from_tuples([("a", "x"), ("b", "y")])
        pdf2.columns = pd.MultiIndex.from_tuples([("a", "x"), ("d", "y")])
        kdf1 = ks.from_pandas(pdf1)
        kdf2 = ks.from_pandas(pdf2)

        pidx1 = pdf1.set_index(("a", "x")).index
        pidx2 = pdf2.set_index(("d", "y")).index
        kidx1 = kdf1.set_index(("a", "x")).index
        kidx2 = kdf2.set_index(("d", "y")).index

        self.assert_eq(pidx1.append(pidx2), kidx1.append(kidx2))

        self.assert_eq(pidx2.append(pidx1), kidx2.append(kidx1))

        # MultiIndex
        pmidx = pd.MultiIndex.from_tuples([("a", "x", 1), ("b", "y", 2),
                                           ("c", "z", 3)])
        kmidx = ks.MultiIndex.from_tuples([("a", "x", 1), ("b", "y", 2),
                                           ("c", "z", 3)])

        self.assert_eq(pmidx.append(pmidx), kmidx.append(kmidx))

        # MultiIndex with names
        pmidx1 = pd.MultiIndex.from_tuples([("a", "x", 1), ("b", "y", 2),
                                            ("c", "z", 3)],
                                           names=["x", "y", "z"])
        pmidx2 = pd.MultiIndex.from_tuples([("a", "x", 1), ("b", "y", 2),
                                            ("c", "z", 3)],
                                           names=["p", "q", "r"])
        kmidx1 = ks.MultiIndex.from_tuples([("a", "x", 1), ("b", "y", 2),
                                            ("c", "z", 3)],
                                           names=["x", "y", "z"])
        kmidx2 = ks.MultiIndex.from_tuples([("a", "x", 1), ("b", "y", 2),
                                            ("c", "z", 3)],
                                           names=["p", "q", "r"])

        self.assert_eq(pmidx1.append(pmidx2), kmidx1.append(kmidx2))

        self.assert_eq(pmidx2.append(pmidx1), kmidx2.append(kmidx1))

        self.assert_eq(
            pmidx1.append(pmidx2).names,
            kmidx1.append(kmidx2).names)

        self.assert_eq(
            pmidx1.append(pmidx2).names,
            kmidx1.append(kmidx2).names)

        # Index & MultiIndex currently is not supported
        expected_error_message = r"append\(\) between Index & MultiIndex currently is not supported"
        with self.assertRaisesRegex(NotImplementedError,
                                    expected_error_message):
            kidx.append(kmidx)
        with self.assertRaisesRegex(NotImplementedError,
                                    expected_error_message):
            kmidx.append(kidx)
Ejemplo n.º 23
0
    def test_append(self):
        # Index
        pidx = pd.Index(range(10000))
        kidx = ks.Index(range(10000))

        self.assert_eq(pidx.append(pidx), kidx.append(kidx))

        # Index with name
        pidx1 = pd.Index(range(10000), name='a')
        pidx2 = pd.Index(range(10000), name='b')
        kidx1 = ks.Index(range(10000), name='a')
        kidx2 = ks.Index(range(10000), name='b')

        self.assert_eq(pidx1.append(pidx2), kidx1.append(kidx2))

        self.assert_eq(pidx2.append(pidx1), kidx2.append(kidx1))

        # Index from DataFrame
        pdf1 = pd.DataFrame({
            'a': [1, 2, 3],
            'b': [4, 5, 6]
        },
                            index=['a', 'b', 'c'])
        pdf2 = pd.DataFrame({
            'a': [7, 8, 9],
            'd': [10, 11, 12]
        },
                            index=['x', 'y', 'z'])
        kdf1 = ks.from_pandas(pdf1)
        kdf2 = ks.from_pandas(pdf2)

        pidx1 = pdf1.set_index('a').index
        pidx2 = pdf2.set_index('d').index
        kidx1 = kdf1.set_index('a').index
        kidx2 = kdf2.set_index('d').index

        self.assert_eq(pidx1.append(pidx2), kidx1.append(kidx2))

        self.assert_eq(pidx2.append(pidx1), kidx2.append(kidx1))

        # Index from DataFrame with MultiIndex columns
        pdf1 = pd.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]})
        pdf2 = pd.DataFrame({'a': [7, 8, 9], 'd': [10, 11, 12]})
        pdf1.columns = pd.MultiIndex.from_tuples([('a', 'x'), ('b', 'y')])
        pdf2.columns = pd.MultiIndex.from_tuples([('a', 'x'), ('d', 'y')])
        kdf1 = ks.from_pandas(pdf1)
        kdf2 = ks.from_pandas(pdf2)

        pidx1 = pdf1.set_index(('a', 'x')).index
        pidx2 = pdf2.set_index(('d', 'y')).index
        kidx1 = kdf1.set_index(('a', 'x')).index
        kidx2 = kdf2.set_index(('d', 'y')).index

        self.assert_eq(pidx1.append(pidx2), kidx1.append(kidx2))

        self.assert_eq(pidx2.append(pidx1), kidx2.append(kidx1))

        # MultiIndex
        pmidx = pd.MultiIndex.from_tuples([('a', 'x', 1), ('b', 'y', 2),
                                           ('c', 'z', 3)])
        kmidx = ks.MultiIndex.from_tuples([('a', 'x', 1), ('b', 'y', 2),
                                           ('c', 'z', 3)])

        self.assert_eq(pmidx.append(pmidx), kmidx.append(kmidx))

        # MultiIndex with names
        pmidx1 = pd.MultiIndex.from_tuples([('a', 'x', 1), ('b', 'y', 2),
                                            ('c', 'z', 3)],
                                           names=['x', 'y', 'z'])
        pmidx2 = pd.MultiIndex.from_tuples([('a', 'x', 1), ('b', 'y', 2),
                                            ('c', 'z', 3)],
                                           names=['p', 'q', 'r'])
        kmidx1 = ks.MultiIndex.from_tuples([('a', 'x', 1), ('b', 'y', 2),
                                            ('c', 'z', 3)],
                                           names=['x', 'y', 'z'])
        kmidx2 = ks.MultiIndex.from_tuples([('a', 'x', 1), ('b', 'y', 2),
                                            ('c', 'z', 3)],
                                           names=['p', 'q', 'r'])

        self.assert_eq(pmidx1.append(pmidx2), kmidx1.append(kmidx2))

        self.assert_eq(pmidx2.append(pmidx1), kmidx2.append(kmidx1))

        self.assert_eq(
            pmidx1.append(pmidx2).names,
            kmidx1.append(kmidx2).names)

        self.assert_eq(
            pmidx1.append(pmidx2).names,
            kmidx1.append(kmidx2).names)

        # Index & MultiIndex currently is not supported
        expected_error_message = r"append\(\) between Index & MultiIndex currently is not supported"
        with self.assertRaisesRegex(NotImplementedError,
                                    expected_error_message):
            kidx.append(kmidx)
        with self.assertRaisesRegex(NotImplementedError,
                                    expected_error_message):
            kmidx.append(kidx)