def test_global_funcs(): assert isinstance(make_execution_engine(), NativeExecutionEngine) register_execution_engine( "xyz", lambda conf, **kwargs: _MockExecutionEngine(conf, **kwargs)) assert isinstance(make_execution_engine("xyz"), _MockExecutionEngine) register_default_execution_engine( lambda conf, **kwargs: _MockExecutionEngine(conf, **kwargs), on_dup="ignore") assert not isinstance(make_execution_engine(), _MockExecutionEngine) register_default_execution_engine( lambda conf, **kwargs: _MockExecutionEngine(conf, **kwargs), on_dup="overwrite") assert isinstance(make_execution_engine(), _MockExecutionEngine) se = SqliteEngine(make_execution_engine) assert make_sql_engine(se) is se assert not isinstance(make_sql_engine(None, make_execution_engine()), _MockSQlEngine) register_sql_engine("x", lambda engine: _MockSQlEngine(engine)) assert isinstance(make_sql_engine("x", make_execution_engine()), _MockSQlEngine) register_default_sql_engine( lambda engine: _MockSQlEngine(engine, other=10)) e = make_execution_engine() assert isinstance(e, _MockExecutionEngine) assert isinstance(e.sql_engine, _MockSQlEngine) assert 10 == e.sql_engine.other
def _register_engines() -> None: register_execution_engine( "spark", lambda conf, **kwargs: SparkExecutionEngine(conf=conf), on_dup="ignore", ) register_execution_engine( SparkSession, lambda session, conf, **kwargs: SparkExecutionEngine(session, conf=conf), on_dup="ignore", )
def test_sql(): register_execution_engine( "da", lambda conf, **kwargs: DaskExecutionEngine(conf=conf)) df = dd.from_pandas(pd.DataFrame([[0], [1]], columns=["a"]), npartitions=2) dag = FugueSQLWorkflow() dag( """ SELECT * FROM df WHERE a>0 PRINT """, df=df, ) dag.run("da")
def test_sql(): session = SparkSession.builder.getOrCreate() register_execution_engine( "s", lambda conf, **kwargs: SparkExecutionEngine(conf=conf, spark_session=session), ) df = session.createDataFrame(pd.DataFrame([[0], [1]], columns=["a"])) dag = FugueSQLWorkflow() dag( """ SELECT * FROM df WHERE a>0 PRINT """, df=df, ) dag.run("s")
def register_execution_engines(self): """Register execution engines with names. This will also try to register spark and dask engines if the dependent packages are available and they are not registered""" register_execution_engine( "native", lambda conf, **kwargs: NativeExecutionEngine(conf=conf), on_dup="ignore", ) try: import pyspark # noqa: F401 import fugue_spark # noqa: F401 except ImportError: pass try: import dask.dataframe # noqa: F401 import fugue_dask # noqa: F401 except ImportError: pass
def register_execution_engines(self): super().register_execution_engines() register_execution_engine( "native", lambda conf, **kwargs: KaggleNativeExecutionEngine(conf=conf, **kwargs), ) if self._default_engine in ["native", ""]: register_default_execution_engine( lambda conf, **kwargs: KaggleNativeExecutionEngine(conf=conf, **kwargs) ) register_execution_engine( "dask", lambda conf, **kwargs: KaggleDaskExecutionEngine(conf=conf), ) if self._default_engine == "dask": register_default_execution_engine( lambda conf, **kwargs: KaggleDaskExecutionEngine(conf=conf), ) register_execution_engine( "spark", lambda conf, **kwargs: KaggleSparkExecutionEngine(conf=conf), ) if self._default_engine == "spark": register_default_execution_engine( lambda conf, **kwargs: KaggleSparkExecutionEngine(conf=conf), )
from fugue import WorkflowDataFrame, register_execution_engine from fugue_sql import FugueSQLWorkflow from triad.utils.convert import get_caller_global_local_vars except ImportError: # pragma: no cover raise ImportError( "Can not load the fugue module. If you want to use this integration, you need to install it." ) from typing import Any, Dict, Optional import dask.dataframe as dd from dask_sql.context import Context register_execution_engine("dask", lambda conf: DaskSQLExecutionEngine(conf), on_dup="overwrite") class DaskSQLEngine(fugue.execution.execution_engine.SQLEngine): """ SQL engine for fugue which uses dask-sql instead of the native SQL implementation. Please note, that so far the native SQL engine in fugue understands a larger set of SQL commands, but in turns is (on average) slower in computation and scaling. """ def __init__(self, *args, **kwargs): """Create a new instance.""" super().__init__(*args, **kwargs)
def register() -> None: """Register engines for DuckDB""" register_sql_engine("duck", lambda engine: DuckDBEngine(engine)) register_sql_engine("duckdb", lambda engine: DuckDBEngine(engine)) register_execution_engine("duck", lambda conf: DuckExeuctionEngine(conf)) register_execution_engine("duckdb", lambda conf: DuckExeuctionEngine(conf))
def _register_engines() -> None: register_execution_engine( "dask", lambda conf, **kwargs: DaskExecutionEngine(conf=conf), on_dup="ignore", )