Esempio n. 1
0
    def test_plot_backends_incorrect(self):
        fake_plot_backend = "none_plotting_module"

        with ks.option_context("plotting.backend", fake_plot_backend):
            self.assertEqual(ks.options.plotting.backend, fake_plot_backend)

            with self.assertRaises(ValueError):
                ks.plot._get_plot_backend(fake_plot_backend)
Esempio n. 2
0
    def test_plot_backends(self):
        plot_backend = "plotly"

        with ks.option_context("plotting.backend", plot_backend):
            self.assertEqual(ks.options.plotting.backend, plot_backend)

            module = ks.plot._get_plot_backend(plot_backend)
            self.assertEqual(module.__name__, plot_backend)
Esempio n. 3
0
    def test_plot_backends(self):
        plot_backend = "plotly"

        with ks.option_context("plotting.backend", plot_backend):
            self.assertEqual(ks.options.plotting.backend, plot_backend)

            module = KoalasPlotAccessor._get_plot_backend(plot_backend)
            self.assertEqual(module.__name__, "databricks.koalas.plot.plotly")
Esempio n. 4
0
    def indexer_between_time(
        self,
        start_time: Union[datetime.time, str],
        end_time: Union[datetime.time, str],
        include_start: bool = True,
        include_end: bool = True,
    ) -> Index:
        """
        Return index locations of values between particular times of day
        (e.g., 9:00-9:30AM).

        Parameters
        ----------
        start_time, end_time : datetime.time, str
            Time passed either as object (datetime.time) or as string in
            appropriate format ("%H:%M", "%H%M", "%I:%M%p", "%I%M%p",
            "%H:%M:%S", "%H%M%S", "%I:%M:%S%p","%I%M%S%p").
        include_start : bool, default True
        include_end : bool, default True

        Returns
        -------
        values_between_time : Index of integers

        Examples
        --------
        >>> kidx = ks.date_range("2000-01-01", periods=3, freq="T")
        >>> kidx  # doctest: +NORMALIZE_WHITESPACE
        DatetimeIndex(['2000-01-01 00:00:00', '2000-01-01 00:01:00',
                       '2000-01-01 00:02:00'],
                      dtype='datetime64[ns]', freq=None)

        >>> kidx.indexer_between_time("00:01", "00:02").sort_values()
        Int64Index([1, 2], dtype='int64')

        >>> kidx.indexer_between_time("00:01", "00:02", include_end=False)
        Int64Index([1], dtype='int64')

        >>> kidx.indexer_between_time("00:01", "00:02", include_start=False)
        Int64Index([2], dtype='int64')
        """
        def pandas_between_time(pdf) -> ks.DataFrame[int]:
            return pdf.between_time(start_time, end_time, include_start,
                                    include_end)

        kdf = self.to_frame()[[]]
        id_column_name = verify_temp_column_name(kdf, "__id_column__")
        kdf = kdf.koalas.attach_id_column("distributed-sequence",
                                          id_column_name)
        with ks.option_context("compute.default_index_type", "distributed"):
            # The attached index in the statement below will be dropped soon,
            # so we enforce “distributed” default index type
            kdf = kdf.koalas.apply_batch(pandas_between_time)
        return ks.Index(first_series(kdf).rename(self.name))
Esempio n. 5
0
    def indexer_at_time(self, time: Union[datetime.time, str], asof: bool = False) -> Index:
        """
        Return index locations of values at particular time of day
        (e.g. 9:30AM).

        Parameters
        ----------
        time : datetime.time or str
            Time passed in either as object (datetime.time) or as string in
            appropriate format ("%H:%M", "%H%M", "%I:%M%p", "%I%M%p",
            "%H:%M:%S", "%H%M%S", "%I:%M:%S%p", "%I%M%S%p").

        Returns
        -------
        values_at_time : Index of integers

        Examples
        --------
        >>> kidx = ks.date_range("2000-01-01", periods=3, freq="T")
        >>> kidx  # doctest: +NORMALIZE_WHITESPACE
        DatetimeIndex(['2000-01-01 00:00:00', '2000-01-01 00:01:00',
                       '2000-01-01 00:02:00'],
                      dtype='datetime64[ns]', freq=None)

        >>> kidx.indexer_at_time("00:00")
        Int64Index([0], dtype='int64')

        >>> kidx.indexer_at_time("00:01")
        Int64Index([1], dtype='int64')
        """
        if asof:
            raise NotImplementedError("'asof' argument is not supported")

        def pandas_at_time(pdf) -> ks.DataFrame[int]:
            return pdf.at_time(time, asof)

        kdf = self.to_frame()[[]]
        id_column_name = verify_temp_column_name(kdf, "__id_column__")
        kdf = kdf.koalas.attach_id_column("distributed-sequence", id_column_name)
        with ks.option_context("compute.default_index_type", "distributed"):
            # The attached index in the statement below will be dropped soon,
            # so we enforce “distributed” default index type
            kdf = kdf.koalas.apply_batch(pandas_at_time)
        return ks.Index(first_series(kdf).rename(self.name))
 def test_default_index_distributed(self):
     with ks.option_context("compute.default_index_type", "distributed"):
         sdf = self.spark.range(1000)
         pdf = ks.DataFrame(sdf).to_pandas()
         self.assertEqual(len(set(pdf.index)), len(pdf))
 def test_default_index_distributed_sequence(self):
     with ks.option_context("compute.default_index_type",
                            "distributed-sequence"):
         sdf = self.spark.range(1000)
         self.assert_eq(ks.DataFrame(sdf),
                        pd.DataFrame({"id": list(range(1000))}))
Esempio n. 8
0
 def test_groupby_apply_without_shortcut(self):
     with ks.option_context("compute.shortcut_limit", 0):
         self.test_groupby_apply()