Ejemplo n.º 1
0
def _get_interval_col(
    t, interval_ibis_expr, scope, timecontext, allowed_units=None
):
    # if interval expression is a binary op, translate expression into
    # an interval column and return
    if isinstance(interval_ibis_expr.op(), ops.IntervalBinaryOp):
        return t.translate(interval_ibis_expr, scope, timecontext)

    # otherwise, translate expression into a literal op and construct
    # interval column from literal value and dtype
    if isinstance(interval_ibis_expr.op(), ops.Literal):
        op = interval_ibis_expr.op()
    else:
        op = t.translate(interval_ibis_expr, scope, timecontext).op()

    dtype = op.dtype
    if not isinstance(dtype, dtypes.Interval):
        raise com.UnsupportedArgumentError(
            '{} expression cannot be converted to interval column. '
            'Must be Interval dtype.'.format(dtype)
        )
    if allowed_units and dtype.unit not in allowed_units:
        raise com.UnsupportedArgumentError(
            'Interval unit "{}" is not allowed. Allowed units are: '
            '{}'.format(dtype.unit, allowed_units)
        )
    return F.expr(
        'INTERVAL {} {}'.format(op.value, _time_unit_mapping[dtype.unit])
    )
Ejemplo n.º 2
0
    def get_schema(
        self,
        table_name: str,
        database: str | None = None,
    ) -> sch.Schema:
        """Return a Schema object for the indicated table and database.

        Parameters
        ----------
        table_name
            Table name. May be fully qualified
        database
            Spark does not have a database argument for its table() method,
            so this must be None

        Returns
        -------
        Schema
            An ibis schema
        """
        if database is not None:
            raise com.UnsupportedArgumentError(
                'Spark does not support the `database` argument for '
                '`get_schema`'
            )

        df = self._session.table(table_name)

        return sch.infer(df)
Ejemplo n.º 3
0
def compile_limit(t, expr, scope, **kwargs):
    op = expr.op()
    if op.offset != 0:
        raise com.UnsupportedArgumentError(
            'PySpark backend does not support non-zero offset is for '
            'limit operation. Got offset {}.'.format(op.offset))
    df = compile_with_scope(t, op.table, scope)
    return df.limit(op.n)
Ejemplo n.º 4
0
    def validate_func_and_types(self, func):
        if isinstance(self.spark_output_type, (pt.MapType, pt.StructType)):
            raise com.IbisTypeError(
                'Spark does not support MapType or StructType output for \
Pandas UDFs')
        if not self.input_type:
            raise com.UnsupportedArgumentError(
                'Spark does not support 0-arg pandas UDFs. Instead, create \
a 1-arg pandas UDF and ignore the arg in your function')
        super().validate_func_and_types(func)
Ejemplo n.º 5
0
def compile_string_to_timestamp(t, expr, scope, **kwargs):
    op = expr.op()

    src_column = t.translate(op.arg, scope)
    fmt = op.format_str.op().value

    if op.timezone is not None and op.timezone.op().value != "UTC":
        raise com.UnsupportedArgumentError(
            'PySpark backend only supports timezone UTC for converting string '
            'to timestamp.')

    return F.to_timestamp(src_column, fmt)
Ejemplo n.º 6
0
def compile_timestamp_from_unix(t, expr, scope, **kwargs):
    op = expr.op()
    unixtime = t.translate(op.arg, scope)
    if not op.unit:
        return F.to_timestamp(F.from_unixtime(unixtime))
    elif op.unit == 's':
        fmt = 'yyyy-MM-dd HH:mm:ss'
        return F.to_timestamp(F.from_unixtime(unixtime, fmt), fmt)
    else:
        raise com.UnsupportedArgumentError(
            'PySpark backend does not support timestamp from unix time with '
            'unit {}. Supported unit is s.'.format(op.unit))
Ejemplo n.º 7
0
def compile_cast(t, expr, scope, **kwargs):
    op = expr.op()

    if isinstance(op.to, dtypes.Interval):
        if isinstance(op.arg.op(), ops.Literal):
            return interval(op.arg.op().value, op.to.unit)
        else:
            raise com.UnsupportedArgumentError(
                'Casting to intervals is only supported for literals '
                'in the PySpark backend. {} not allowed.'.format(type(op.arg)))

    if isinstance(op.to, dtypes.Array):
        cast_type = ibis_array_dtype_to_spark_dtype(op.to)
    else:
        cast_type = ibis_dtype_to_spark_dtype(op.to)

    src_column = t.translate(op.arg, scope)
    return src_column.cast(cast_type)
Ejemplo n.º 8
0
Archivo: client.py Proyecto: LeeTZ/ibis
    def get_schema(self, table_name, database=None):
        """
        Return a Schema object for the indicated table and database

        Parameters
        ----------
        table_name : string
          May be fully qualified
        database : string
          Spark does not have a database argument for its table() method,
          so this must be None

        Returns
        -------
        schema : ibis Schema
        """
        if database is not None:
            raise com.UnsupportedArgumentError(
                'Spark does not support database param for table')

        df = self._session.table(table_name)

        return sch.infer(df)