def test_plot_backends_incorrect(self): fake_plot_backend = "none_plotting_module" with ks.option_context("plotting.backend", fake_plot_backend): self.assertEqual(ks.options.plotting.backend, fake_plot_backend) with self.assertRaises(ValueError): ks.plot._get_plot_backend(fake_plot_backend)
def test_plot_backends(self): plot_backend = "plotly" with ks.option_context("plotting.backend", plot_backend): self.assertEqual(ks.options.plotting.backend, plot_backend) module = ks.plot._get_plot_backend(plot_backend) self.assertEqual(module.__name__, plot_backend)
def test_plot_backends(self): plot_backend = "plotly" with ks.option_context("plotting.backend", plot_backend): self.assertEqual(ks.options.plotting.backend, plot_backend) module = KoalasPlotAccessor._get_plot_backend(plot_backend) self.assertEqual(module.__name__, "databricks.koalas.plot.plotly")
def indexer_between_time( self, start_time: Union[datetime.time, str], end_time: Union[datetime.time, str], include_start: bool = True, include_end: bool = True, ) -> Index: """ Return index locations of values between particular times of day (e.g., 9:00-9:30AM). Parameters ---------- start_time, end_time : datetime.time, str Time passed either as object (datetime.time) or as string in appropriate format ("%H:%M", "%H%M", "%I:%M%p", "%I%M%p", "%H:%M:%S", "%H%M%S", "%I:%M:%S%p","%I%M%S%p"). include_start : bool, default True include_end : bool, default True Returns ------- values_between_time : Index of integers Examples -------- >>> kidx = ks.date_range("2000-01-01", periods=3, freq="T") >>> kidx # doctest: +NORMALIZE_WHITESPACE DatetimeIndex(['2000-01-01 00:00:00', '2000-01-01 00:01:00', '2000-01-01 00:02:00'], dtype='datetime64[ns]', freq=None) >>> kidx.indexer_between_time("00:01", "00:02").sort_values() Int64Index([1, 2], dtype='int64') >>> kidx.indexer_between_time("00:01", "00:02", include_end=False) Int64Index([1], dtype='int64') >>> kidx.indexer_between_time("00:01", "00:02", include_start=False) Int64Index([2], dtype='int64') """ def pandas_between_time(pdf) -> ks.DataFrame[int]: return pdf.between_time(start_time, end_time, include_start, include_end) kdf = self.to_frame()[[]] id_column_name = verify_temp_column_name(kdf, "__id_column__") kdf = kdf.koalas.attach_id_column("distributed-sequence", id_column_name) with ks.option_context("compute.default_index_type", "distributed"): # The attached index in the statement below will be dropped soon, # so we enforce “distributed” default index type kdf = kdf.koalas.apply_batch(pandas_between_time) return ks.Index(first_series(kdf).rename(self.name))
def indexer_at_time(self, time: Union[datetime.time, str], asof: bool = False) -> Index: """ Return index locations of values at particular time of day (e.g. 9:30AM). Parameters ---------- time : datetime.time or str Time passed in either as object (datetime.time) or as string in appropriate format ("%H:%M", "%H%M", "%I:%M%p", "%I%M%p", "%H:%M:%S", "%H%M%S", "%I:%M:%S%p", "%I%M%S%p"). Returns ------- values_at_time : Index of integers Examples -------- >>> kidx = ks.date_range("2000-01-01", periods=3, freq="T") >>> kidx # doctest: +NORMALIZE_WHITESPACE DatetimeIndex(['2000-01-01 00:00:00', '2000-01-01 00:01:00', '2000-01-01 00:02:00'], dtype='datetime64[ns]', freq=None) >>> kidx.indexer_at_time("00:00") Int64Index([0], dtype='int64') >>> kidx.indexer_at_time("00:01") Int64Index([1], dtype='int64') """ if asof: raise NotImplementedError("'asof' argument is not supported") def pandas_at_time(pdf) -> ks.DataFrame[int]: return pdf.at_time(time, asof) kdf = self.to_frame()[[]] id_column_name = verify_temp_column_name(kdf, "__id_column__") kdf = kdf.koalas.attach_id_column("distributed-sequence", id_column_name) with ks.option_context("compute.default_index_type", "distributed"): # The attached index in the statement below will be dropped soon, # so we enforce “distributed” default index type kdf = kdf.koalas.apply_batch(pandas_at_time) return ks.Index(first_series(kdf).rename(self.name))
def test_default_index_distributed(self): with ks.option_context("compute.default_index_type", "distributed"): sdf = self.spark.range(1000) pdf = ks.DataFrame(sdf).to_pandas() self.assertEqual(len(set(pdf.index)), len(pdf))
def test_default_index_distributed_sequence(self): with ks.option_context("compute.default_index_type", "distributed-sequence"): sdf = self.spark.range(1000) self.assert_eq(ks.DataFrame(sdf), pd.DataFrame({"id": list(range(1000))}))
def test_groupby_apply_without_shortcut(self): with ks.option_context("compute.shortcut_limit", 0): self.test_groupby_apply()