예제 #1
0
파일: registry.py 프로젝트: gityow/fugue
def _register_annotation_converters() -> None:
    register_annotation_converter(
        0.8,
        SimpleAnnotationConverter(
            DaskExecutionEngine,
            lambda param: _DaskExecutionEngineParam(param),
        ),
    )
    register_annotation_converter(
        0.8,
        SimpleAnnotationConverter(
            dd.DataFrame, lambda param: _DaskDataFrameParam(param)
        ),
    )
예제 #2
0
파일: registry.py 프로젝트: gityow/fugue
def _register_annotation_converters() -> None:
    register_annotation_converter(
        0.8,
        SimpleAnnotationConverter(
            SparkExecutionEngine,
            lambda param: _SparkExecutionEngineParam(param),
        ),
    )
    register_annotation_converter(
        0.8,
        SimpleAnnotationConverter(
            SparkSession,
            lambda param: _SparkSessionParam(param),
        ),
    )
    register_annotation_converter(
        0.8,
        SimpleAnnotationConverter(
            SparkContext,
            lambda param: _SparkContextParam(param),
        ),
    )
    register_annotation_converter(
        0.8,
        SimpleAnnotationConverter(
            ps.DataFrame,
            lambda param: _SparkDataFrameParam(param),
        ),
    )
    register_annotation_converter(
        0.8,
        SimpleAnnotationConverter(
            pr.RDD,
            lambda param: _RddParam(param),
        ),
    )
예제 #3
0
        if not partition_spec.empty:
            self.log.warning(  # pragma: no cover
                "partition_spec is not respected in %s.save_df", self)
        self.fs.makedirs(os.path.dirname(path), recreate=True)
        df = self.to_df(df)
        save_df(df,
                path,
                format_hint=format_hint,
                mode=mode,
                fs=self.fs,
                **kwargs)


class _NativeExecutionEngineParam(ExecutionEngineParam):
    def __init__(
        self,
        param: Optional[inspect.Parameter],
    ):
        super().__init__(param,
                         annotation="NativeExecutionEngine",
                         engine_type=NativeExecutionEngine)


register_annotation_converter(
    0.8,
    SimpleAnnotationConverter(
        NativeExecutionEngine,
        lambda param: _NativeExecutionEngineParam(param),
    ),
)
예제 #4
0
    def to_input_data(self, df: DataFrame, ctx: Any) -> Any:
        assert isinstance(ctx, ExecutionEngine)
        return ArrowDataFrame(ctx.to_df(df).as_pandas()).native

    def to_output_df(self, output: Any, schema: Any, ctx: Any) -> DataFrame:
        assert isinstance(output, pa.Table)
        assert isinstance(ctx, ExecutionEngine)
        return ctx.to_df(output.to_pandas(), schema=schema)

    def count(self, df: DataFrame) -> int:
        raise NotImplementedError("not allowed")


register_annotation_converter(
    0.8,
    SimpleAnnotationConverter(pa.Table,
                              lambda param: ArrowDataFrameParam(param)))


class NativeExecutionEngineSqliteTests(ExecutionEngineTests.Tests):
    def make_engine(self):
        e = NativeExecutionEngine(dict(test=True))
        e.set_sql_engine(SqliteEngine(e))
        return e

    def test_map_with_dict_col(self):
        # TODO: add back
        return


class NativeExecutionEngineBuiltInSqliteTests(BuiltInTests.Tests):
    def make_engine(self):
예제 #5
0
    def to_output_df(self, output: Any, schema: Any, ctx: Any) -> DataFrame:
        assert isinstance(output, pr.RDD)
        assert isinstance(ctx, SparkExecutionEngine)
        return ctx.to_df(output, schema=schema)

    def count(self, df: Any) -> int:  # pragma: no cover
        raise NotImplementedError("not allowed")

    def need_schema(self) -> Optional[bool]:
        return True


register_annotation_converter(
    0.8,
    SimpleAnnotationConverter(
        SparkExecutionEngine,
        lambda param: _SparkExecutionEngineParam(param),
    ),
)

register_annotation_converter(
    0.8,
    SimpleAnnotationConverter(
        SparkSession,
        lambda param: _SparkSessionParam(param),
    ),
)

register_annotation_converter(
    0.8,
    SimpleAnnotationConverter(
        SparkContext,
예제 #6
0
class _DaskDataFrameParam(DataFrameParam):
    def __init__(self, param: Optional[inspect.Parameter]):
        super().__init__(param, annotation="dask.dataframe.DataFrame")

    def to_input_data(self, df: DataFrame, ctx: Any) -> Any:
        assert isinstance(ctx, DaskExecutionEngine)
        return ctx.to_df(df).native

    def to_output_df(self, output: Any, schema: Any, ctx: Any) -> DataFrame:
        assert isinstance(output, dd.DataFrame)
        assert isinstance(ctx, DaskExecutionEngine)
        return ctx.to_df(output, schema=schema)

    def count(self, df: DataFrame) -> int:  # pragma: no cover
        raise NotImplementedError("not allowed")


register_annotation_converter(
    0.8,
    SimpleAnnotationConverter(
        DaskExecutionEngine,
        lambda param: _DaskExecutionEngineParam(param),
    ),
)

register_annotation_converter(
    0.8,
    SimpleAnnotationConverter(dd.DataFrame,
                              lambda param: _DaskDataFrameParam(param)),
)