Python ExecutionContext Examples

Programming Language: Python

Namespace/Package Name: datafusion

Class/Type: ExecutionContext

Examples at hotexamples.com: 6

Python ExecutionContext - 6 examples found. These are the top rated real world Python examples of datafusion.ExecutionContext extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

ExecutionContext(4)

create_dataframe(3)

catalog(1)

empty_table(1)

register_csv(1)

register_parquet(1)

tables(1)

Example #1

Show file

File: test_udaf.py Project: DSLAM-UMD/arrow-datafusion

def df():
    ctx = ExecutionContext()

    # create a RecordBatch and a new DataFrame from it
    batch = pa.RecordBatch.from_arrays(
        [pa.array([1, 2, 3]), pa.array([4, 4, 6])],
        names=["a", "b"],
    )
    return ctx.create_dataframe([[batch]])

Example #2

Show file

File: __init__.py Project: ibis-project/ibis

    def do_connect(
        self,
        config: Mapping[str, str | Path] | SessionContext,
    ) -> None:
        """Create a Datafusion backend for use with Ibis.

        Parameters
        ----------
        config
            Mapping of table names to files.

        Examples
        --------
        >>> import ibis
        >>> config = {"t": "path/to/file.parquet", "s": "path/to/file.csv"}
        >>> ibis.datafusion.connect(config)
        """
        if isinstance(config, SessionContext):
            self._context = config
        else:
            self._context = SessionContext()

        for name, path in config.items():
            strpath = str(path)
            if strpath.endswith('.csv'):
                self.register_csv(name, path)
            elif strpath.endswith('.parquet'):
                self.register_parquet(name, path)
            else:
                raise ValueError(
                    "Currently the DataFusion backend only supports CSV "
                    "files with the extension .csv and Parquet files with "
                    "the .parquet extension.")

Example #3

Show file

def test_join():
    ctx = ExecutionContext()

    batch = pa.RecordBatch.from_arrays(
        [pa.array([1, 2, 3]), pa.array([4, 5, 6])],
        names=["a", "b"],
    )
    df = ctx.create_dataframe([[batch]])

    batch = pa.RecordBatch.from_arrays(
        [pa.array([1, 2]), pa.array([8, 10])],
        names=["a", "c"],
    )
    df1 = ctx.create_dataframe([[batch]])

    df = df.join(df1, join_keys=(["a"], ["a"]), how="inner")
    df = df.sort([f.col("a").sort(ascending=True)])
    table = pa.Table.from_batches(df.collect())

    expected = {"a": [1, 2], "c": [8, 10], "b": [4, 5]}
    assert table.to_pydict() == expected

Example #4

Show file

def df():
    ctx = ExecutionContext()
    # create a RecordBatch and a new DataFrame from it
    batch = pa.RecordBatch.from_arrays([pa.array([0.1, -0.7, 0.55])],
                                       names=["value"])
    return ctx.create_dataframe([[batch]])

Example #5

Show file

def ctx():
    return ExecutionContext()

Example #6

Show file

File: __init__.py Project: ibis-project/ibis

class Backend(BaseBackend):
    name = 'datafusion'
    builder = None

    @property
    def version(self):
        try:
            import importlib.metadata as importlib_metadata
        except ImportError:
            # TODO: remove this when Python 3.9 support is dropped
            import importlib_metadata
        return importlib_metadata.version("datafusion")

    def do_connect(
        self,
        config: Mapping[str, str | Path] | SessionContext,
    ) -> None:
        """Create a Datafusion backend for use with Ibis.

        Parameters
        ----------
        config
            Mapping of table names to files.

        Examples
        --------
        >>> import ibis
        >>> config = {"t": "path/to/file.parquet", "s": "path/to/file.csv"}
        >>> ibis.datafusion.connect(config)
        """
        if isinstance(config, SessionContext):
            self._context = config
        else:
            self._context = SessionContext()

        for name, path in config.items():
            strpath = str(path)
            if strpath.endswith('.csv'):
                self.register_csv(name, path)
            elif strpath.endswith('.parquet'):
                self.register_parquet(name, path)
            else:
                raise ValueError(
                    "Currently the DataFusion backend only supports CSV "
                    "files with the extension .csv and Parquet files with "
                    "the .parquet extension.")

    def current_database(self) -> str:
        raise NotImplementedError()

    def list_databases(self, like: str | None = None) -> list[str]:
        raise NotImplementedError()

    def list_tables(
        self,
        like: str | None = None,
        database: str | None = None,
    ) -> list[str]:
        """List the available tables."""
        tables = list(self._context.tables())
        if like is not None:
            pattern = re.compile(like)
            return list(filter(lambda t: pattern.findall(t), tables))
        return tables

    def table(
        self,
        name: str,
        schema: sch.Schema | None = None,
    ) -> ir.Table:
        """Get an ibis expression representing a DataFusion table.

        Parameters
        ----------
        name
            The name of the table to retreive
        schema
            An optional schema for the table

        Returns
        -------
        Table
            A table expression
        """
        catalog = self._context.catalog()
        database = catalog.database('public')
        table = database.table(name)
        schema = sch.infer(table.schema)
        return self.table_class(name, schema, self).to_expr()

    def register_csv(
        self,
        name: str,
        path: str | Path,
        schema: sch.Schema | None = None,
    ) -> None:
        """Register a CSV file with with `name` located at `path`.

        Parameters
        ----------
        name
            The name of the table
        path
            The path to the CSV file
        schema
            An optional schema
        """
        self._context.register_csv(name, str(path), schema=schema)

    def register_parquet(
        self,
        name: str,
        path: str | Path,
    ) -> None:
        """Register a parquet file with with `name` located at `path`.

        Parameters
        ----------
        name
            The name of the table
        path
            The path to the Parquet file
        """
        self._context.register_parquet(name, str(path))

    def execute(
        self,
        expr: ir.Expr,
        params: Mapping[ir.Expr, object] = None,
        limit: str = 'default',
        **kwargs: Any,
    ):
        if isinstance(expr, ir.Table):
            frame = self.compile(expr, params, **kwargs)
            table = _to_pyarrow_table(frame)
            return table.to_pandas()
        elif isinstance(expr, ir.Column):
            # expression must be named for the projection
            expr = expr.name('tmp').to_projection()
            frame = self.compile(expr, params, **kwargs)
            table = _to_pyarrow_table(frame)
            return table['tmp'].to_pandas()
        elif isinstance(expr, ir.Scalar):
            if expr.op().root_tables():
                # there are associated datafusion tables so convert the expr
                # to a selection which we can directly convert to a datafusion
                # plan
                expr = expr.name('tmp').to_projection()
                frame = self.compile(expr, params, **kwargs)
            else:
                # doesn't have any tables associated so create a plan from a
                # dummy datafusion table
                compiled = self.compile(expr, params, **kwargs)
                frame = self._context.empty_table().select(compiled)
            table = _to_pyarrow_table(frame)
            return table[0][0].as_py()
        else:
            raise com.IbisError(
                f"Cannot execute expression of type: {type(expr)}")

    def compile(
        self,
        expr: ir.Expr,
        params: Mapping[ir.Expr, object] = None,
        **kwargs: Any,
    ):
        return translate(expr)

    @classmethod
    @lru_cache
    def _get_operations(cls):
        from ibis.backends.datafusion.compiler import translate

        return frozenset(op for op in translate.registry
                         if issubclass(op, ops.Value))

    @classmethod
    def has_operation(cls, operation: type[ops.Value]) -> bool:
        op_classes = cls._get_operations()
        return operation in op_classes or any(
            issubclass(operation, op_impl) for op_impl in op_classes)