def _register_annotation_converters() -> None: register_annotation_converter( 0.8, SimpleAnnotationConverter( DaskExecutionEngine, lambda param: _DaskExecutionEngineParam(param), ), ) register_annotation_converter( 0.8, SimpleAnnotationConverter( dd.DataFrame, lambda param: _DaskDataFrameParam(param) ), )
def _register_annotation_converters() -> None: register_annotation_converter( 0.8, SimpleAnnotationConverter( SparkExecutionEngine, lambda param: _SparkExecutionEngineParam(param), ), ) register_annotation_converter( 0.8, SimpleAnnotationConverter( SparkSession, lambda param: _SparkSessionParam(param), ), ) register_annotation_converter( 0.8, SimpleAnnotationConverter( SparkContext, lambda param: _SparkContextParam(param), ), ) register_annotation_converter( 0.8, SimpleAnnotationConverter( ps.DataFrame, lambda param: _SparkDataFrameParam(param), ), ) register_annotation_converter( 0.8, SimpleAnnotationConverter( pr.RDD, lambda param: _RddParam(param), ), )
if not partition_spec.empty: self.log.warning( # pragma: no cover "partition_spec is not respected in %s.save_df", self) self.fs.makedirs(os.path.dirname(path), recreate=True) df = self.to_df(df) save_df(df, path, format_hint=format_hint, mode=mode, fs=self.fs, **kwargs) class _NativeExecutionEngineParam(ExecutionEngineParam): def __init__( self, param: Optional[inspect.Parameter], ): super().__init__(param, annotation="NativeExecutionEngine", engine_type=NativeExecutionEngine) register_annotation_converter( 0.8, SimpleAnnotationConverter( NativeExecutionEngine, lambda param: _NativeExecutionEngineParam(param), ), )
def to_input_data(self, df: DataFrame, ctx: Any) -> Any: assert isinstance(ctx, ExecutionEngine) return ArrowDataFrame(ctx.to_df(df).as_pandas()).native def to_output_df(self, output: Any, schema: Any, ctx: Any) -> DataFrame: assert isinstance(output, pa.Table) assert isinstance(ctx, ExecutionEngine) return ctx.to_df(output.to_pandas(), schema=schema) def count(self, df: DataFrame) -> int: raise NotImplementedError("not allowed") register_annotation_converter( 0.8, SimpleAnnotationConverter(pa.Table, lambda param: ArrowDataFrameParam(param))) class NativeExecutionEngineSqliteTests(ExecutionEngineTests.Tests): def make_engine(self): e = NativeExecutionEngine(dict(test=True)) e.set_sql_engine(SqliteEngine(e)) return e def test_map_with_dict_col(self): # TODO: add back return class NativeExecutionEngineBuiltInSqliteTests(BuiltInTests.Tests): def make_engine(self):
def to_output_df(self, output: Any, schema: Any, ctx: Any) -> DataFrame: assert isinstance(output, pr.RDD) assert isinstance(ctx, SparkExecutionEngine) return ctx.to_df(output, schema=schema) def count(self, df: Any) -> int: # pragma: no cover raise NotImplementedError("not allowed") def need_schema(self) -> Optional[bool]: return True register_annotation_converter( 0.8, SimpleAnnotationConverter( SparkExecutionEngine, lambda param: _SparkExecutionEngineParam(param), ), ) register_annotation_converter( 0.8, SimpleAnnotationConverter( SparkSession, lambda param: _SparkSessionParam(param), ), ) register_annotation_converter( 0.8, SimpleAnnotationConverter( SparkContext,
class _DaskDataFrameParam(DataFrameParam): def __init__(self, param: Optional[inspect.Parameter]): super().__init__(param, annotation="dask.dataframe.DataFrame") def to_input_data(self, df: DataFrame, ctx: Any) -> Any: assert isinstance(ctx, DaskExecutionEngine) return ctx.to_df(df).native def to_output_df(self, output: Any, schema: Any, ctx: Any) -> DataFrame: assert isinstance(output, dd.DataFrame) assert isinstance(ctx, DaskExecutionEngine) return ctx.to_df(output, schema=schema) def count(self, df: DataFrame) -> int: # pragma: no cover raise NotImplementedError("not allowed") register_annotation_converter( 0.8, SimpleAnnotationConverter( DaskExecutionEngine, lambda param: _DaskExecutionEngineParam(param), ), ) register_annotation_converter( 0.8, SimpleAnnotationConverter(dd.DataFrame, lambda param: _DaskDataFrameParam(param)), )