Ejemplo n.º 1
0
def raise_unsupported_expr_error(expr: ibis.Expr):
    """Raise an unsupported expression error for given expression.

    Parameters
    ----------
    expr : ibis.Expr

    Raises
    ------
    com.UnsupportedOperationError
    """
    msg = "HeavyDB backend doesn't support {} operation!"
    op = expr.op()
    raise com.UnsupportedOperationError(msg.format(type(op)))
Ejemplo n.º 2
0
def spark_compiles_arbitrary(translator, expr):
    arg, how, where = expr.op().args

    if where is not None:
        arg = where.ifelse(arg, ibis.NA)

    if how in (None, 'first'):
        return 'first({}, True)'.format(translator.translate(arg))
    elif how == 'last':
        return 'last({}, True)'.format(translator.translate(arg))
    else:
        raise com.UnsupportedOperationError(
            '{!r} value not supported for arbitrary in Spark SQL'.format(how)
        )
Ejemplo n.º 3
0
def raise_unsupported_op_error(translator, expr, *args):
    """Raise an unsupported operation error for given expression.

    Parameters
    ----------
    expr : ibis.Expr

    Raises
    ------
    com.UnsupportedOperationError
    """
    msg = "OmniSciDB backend doesn't support {} operation!"
    op = expr.op()
    raise com.UnsupportedOperationError(msg.format(type(op)))
Ejemplo n.º 4
0
    def _formatter(translator, expr):
        op = expr.op()
        arg, offset = op.args

        unit = offset.type().unit
        if unit not in units:
            raise com.UnsupportedOperationError(
                "BigQuery does not allow binary operation "
                "{} with INTERVAL offset {}".format(func, unit)
            )
        formatted_arg = translator.translate(arg)
        formatted_offset = translator.translate(offset)
        result = "{}({}, {})".format(func, formatted_arg, formatted_offset)
        return result
Ejemplo n.º 5
0
def _timestamp_truncate(translator, expr):
    op = expr.op()
    arg, unit = op.args

    arg_formatted = translator.translate(arg)
    try:
        unit = _spark_timestamp_unit_names[unit]
    except KeyError:
        raise com.UnsupportedOperationError(
            '{!r} unit is not supported in timestamp truncate'.format(unit))

    if unit == 'DAY':
        return "date(date_trunc({!r}, {}))".format(unit, arg_formatted)
    else:
        return "date_trunc({!r}, {})".format(unit, arg_formatted)
Ejemplo n.º 6
0
def _literal(t, expr):
    if isinstance(expr, ir.IntervalScalar):
        if expr.type().unit in {'ms', 'ns'}:
            raise com.UnsupportedOperationError(
                'MySQL does not allow operation '
                'with INTERVAL offset {}'.format(expr.type().unit))
        text_unit = expr.type().resolution.upper()
        value = expr.op().value
        return sa.text(f'INTERVAL :value {text_unit}').bindparams(value=value)
    elif isinstance(expr, ir.SetScalar):
        return list(map(sa.literal, expr.op().value))
    else:
        value = expr.op().value
        if isinstance(value, pd.Timestamp):
            value = value.to_pydatetime()
        return sa.literal(value)
Ejemplo n.º 7
0
def _canonicalize_interval(t, interval, scope, timecontext, **kwargs):
    """ Convert interval to integer timestamp of second

    When pyspark cast timestamp to integer type, it uses the number of seconds
    since epoch. Therefore, we need cast ibis interval correspondingly.
    """
    if isinstance(interval, ir.IntervalScalar):
        value = t.translate(interval, scope, timecontext, **kwargs)
        # value is in nanoseconds and spark uses seconds since epoch
        return int(value / 1e9)
    elif isinstance(interval, int):
        return interval
    raise com.UnsupportedOperationError(
        f'type {type(interval)} is not supported in preceding /following '
        'in window.'
    )
Ejemplo n.º 8
0
def _interval_from_integer(t, expr):
    arg, unit = expr.op().args
    if unit in {'ms', 'ns'}:
        raise com.UnsupportedOperationError(
            'MySQL does not allow operation '
            'with INTERVAL offset {}'.format(unit))

    sa_arg = t.translate(arg)
    text_unit = expr.type().resolution.upper()

    # XXX: Is there a better way to handle this? I.e. can we somehow use
    # the existing bind parameter produced by translate and reuse its name in
    # the string passed to sa.text?
    if isinstance(sa_arg, sa.sql.elements.BindParameter):
        return sa.text(
            'INTERVAL :arg {}'.format(text_unit)).bindparams(arg=sa_arg.value)
    return sa.text('INTERVAL {} {}'.format(sa_arg, text_unit))
Ejemplo n.º 9
0
def _cast(translator, expr):
    from ibis.omniscidb.client import OmniSciDBDataType

    op = expr.op()
    arg, target = op.args
    arg_ = translator.translate(arg)

    if isinstance(arg, ir.GeoSpatialValue):
        # NOTE: CastToGeography expects geometry with SRID=4326
        type_ = target.geotype.upper()

        if type_ == 'GEOMETRY':
            raise com.UnsupportedOperationError(
                'OmnisciDB/OmniSciDB doesn\'t support yet convert ' +
                'from GEOGRAPHY to GEOMETRY.')
    else:
        type_ = str(OmniSciDBDataType.from_ibis(target, nullable=False))
    return 'CAST({0!s} AS {1!s})'.format(arg_, type_)
Ejemplo n.º 10
0
def _parse_url(translator, expr):
    op = expr.op()
    arg, extract, key = op.args

    if extract == 'HOST':
        return _call(translator, 'domain', arg)
    elif extract == 'PROTOCOL':
        return _call(translator, 'protocol', arg)
    elif extract == 'PATH':
        return _call(translator, 'path', arg)
    elif extract == 'QUERY':
        if key is not None:
            return _call(translator, 'extractURLParameter', arg, key)
        else:
            return _call(translator, 'queryString', arg)
    else:
        raise com.UnsupportedOperationError(
            f'Parse url with extract {extract} is not supported')
Ejemplo n.º 11
0
    def sql(self, query):
        """
        Convert a SQL query to an Ibis table expression

        Parameters
        ----------

        Returns
        -------
        table : TableExpr
        """
        if pymapd_dtype is None:
            raise com.UnsupportedOperationError(
                'This method is available just on Python version >= 3.6.')
        # Remove `;` + `--` (comment)
        query = re.sub(r'\s*;\s*--', '\n--', query.strip())
        # Remove trailing ;
        query = re.sub(r'\s*;\s*$', '', query.strip())
        schema = self._get_schema_using_validator(query)
        return ops.SQLQueryResult(query, schema, self).to_expr()
Ejemplo n.º 12
0
def truncate(translator, expr):
    base_unit_names = {
        'Y': 'Y',
        'Q': 'Q',
        'M': 'MONTH',
        'W': 'W',
        'D': 'J',
        'h': 'HH',
        'm': 'MI',
    }
    op = expr.op()
    arg, unit = op.args

    arg_formatted = translator.translate(arg)
    try:
        unit = base_unit_names[unit]
    except KeyError:
        raise com.UnsupportedOperationError(
            f'{unit!r} unit is not supported in timestamp truncate')

    return f"trunc({arg_formatted}, '{unit}')"
Ejemplo n.º 13
0
def _truncate(translator, expr):
    op = expr.op()
    arg, unit = op.args

    converters = {
        'Y': 'toStartOfYear',
        'M': 'toStartOfMonth',
        'W': 'toMonday',
        'D': 'toDate',
        'h': 'toStartOfHour',
        'm': 'toStartOfMinute',
        's': 'toDateTime',
    }

    try:
        converter = converters[unit]
    except KeyError:
        raise com.UnsupportedOperationError(
            f'Unsupported truncate unit {unit}')

    return _call(translator, converter, arg)
Ejemplo n.º 14
0
def _hash(translator, expr):
    op = expr.op()
    arg, how = op.args

    algorithms = {
        'MD5',
        'halfMD5',
        'SHA1',
        'SHA224',
        'SHA256',
        'intHash32',
        'intHash64',
        'cityHash64',
        'sipHash64',
        'sipHash128',
    }

    if how not in algorithms:
        raise com.UnsupportedOperationError(
            f'Unsupported hash algorithm {how}')

    return _call(translator, how, arg)
Ejemplo n.º 15
0
def compile_extract_millisecond(t, expr, scope, **kwargs):
    raise com.UnsupportedOperationError(
        'PySpark backend does not support extracting milliseconds.')
Ejemplo n.º 16
0
def compile_percent_rank(t, expr, scope, *, window, **kwargs):
    raise com.UnsupportedOperationError(
        'Pyspark percent_rank() function indexes from 0 '
        'instead of 1, and does not match expected '
        'output of ibis expressions.')
Ejemplo n.º 17
0
def raise_unsupported_expr_error(expr):
    msg = "OmniSciDB backend doesn't support {} operation!"
    op = expr.op()
    raise com.UnsupportedOperationError(msg.format(type(op)))
Ejemplo n.º 18
0
def round(op, expr):
    arg = translate(op.arg)
    if op.digits is not None:
        raise com.UnsupportedOperationError(
            'Rounding to specific digits is not supported in datafusion')
    return df.functions.round(arg).cast(pa.int64())
Ejemplo n.º 19
0
def _value_to_temporal(t, arg, _):
    raise com.UnsupportedOperationError(type(arg))
Ejemplo n.º 20
0
def _window(t, expr):
    op = expr.op()

    arg, window = op.args
    reduction = t.translate(arg)

    window_op = arg.op()

    if isinstance(window_op, (ops.Sum, ops.Mean, ops.Min, ops.Max)):
        msg = """SQLServer backend doesn't support {}
         operation with Window Function!"""
        raise com.UnsupportedOperationError(msg.format(type(window_op)))

    _require_order_by = (
        ops.DenseRank,
        ops.MinRank,
        ops.NTile,
        ops.PercentRank,
        ops.Count,
        ops.Mean,
        ops.Min,
        ops.Max,
        ops.Sum,
        ops.FirstValue,
        ops.LastValue,
        ops.Lag,
        ops.Lead,
    )

    if isinstance(window_op, ops.CumulativeOp):
        arg = _cumulative_to_window(t, arg, window)
        return t.translate(arg)

    if window.max_lookback is not None:
        raise NotImplementedError('Rows with max lookback is not implemented '
                                  'for SQLAlchemy-based backends.')

    if isinstance(window_op, _require_order_by) and not window._order_by:
        order_by = t.translate(window_op.args[0])
    else:
        order_by = list(map(t.translate, window._order_by))

    partition_by = list(map(t.translate, window._group_by))

    frame_clause_not_allowed = (
        ops.Lag,
        ops.Lead,
        ops.DenseRank,
        ops.MinRank,
        ops.NTile,
        ops.PercentRank,
        ops.RowNumber,
    )

    how = {'range': 'range_'}.get(window.how, window.how)
    preceding = window.preceding
    additional_params = ({} if isinstance(
        window_op, frame_clause_not_allowed) else {
            how: (
                -preceding if preceding is not None else preceding,
                window.following,
            )
        })
    result = reduction.over(partition_by=partition_by,
                            order_by=order_by,
                            **additional_params)

    if isinstance(window_op,
                  (ops.RowNumber, ops.DenseRank, ops.MinRank, ops.NTile)):
        return result - 1
    else:
        return result
Ejemplo n.º 21
0
def compile_timestamp_diff(t, expr, scope, **kwargs):
    raise com.UnsupportedOperationError(
        'PySpark backend does not support TimestampDiff as there is no '
        'timedelta type.')
Ejemplo n.º 22
0
def compile_interval_from_integer(t, expr, scope, **kwargs):
    raise com.UnsupportedOperationError(
        'Interval from integer column is unsupported for the PySpark backend.')
Ejemplo n.º 23
0
def _raise_error(translator, expr, *args):
    msg = "Clickhouse backend doesn't support {0} operation!"
    op = expr.op()
    raise com.UnsupportedOperationError(msg.format(type(op)))
Ejemplo n.º 24
0
def _window(translator, expr):
    op = expr.op()

    arg, window = op.args
    window_op = arg.op()

    _require_order_by = (
        ops.Lag,
        ops.Lead,
        ops.DenseRank,
        ops.MinRank,
        ops.FirstValue,
        ops.LastValue,
        ops.PercentRank,
        ops.NTile,
    )

    _unsupported_win_ops = (
        ops.CMSMedian,
        ops.GroupConcat,
        ops.HLLCardinality,
        ops.All,  # TODO: change all to work as cumall
        ops.Any,  # TODO: change any to work as cumany
    )

    _subtract_one = '{} - 1'.format
    _expr_transforms = {
        ops.DenseRank: _subtract_one,
        ops.MinRank: _subtract_one,
        ops.NTile: _subtract_one,
        ops.RowNumber: _subtract_one,
    }

    if isinstance(window_op, _unsupported_win_ops):
        raise com.UnsupportedOperationError(
            '{} is not supported in window functions'.format(type(window_op)))

    if isinstance(window_op, ops.CumulativeOp):
        arg = impala_compiler._cumulative_to_window(translator, arg, window)
        return translator.translate(arg)

    if window.preceding is not None:
        raise com.UnsupportedOperationError(
            'Window preceding is not supported by OmniSciDB backend yet')

    if window.following is not None and window.following != 0:
        raise com.UnsupportedOperationError(
            'Window following is not supported by OmniSciDB backend yet')
    window.following = None

    # Some analytic functions need to have the expression of interest in
    # the ORDER BY part of the window clause
    if isinstance(window_op, _require_order_by) and len(window._order_by) == 0:
        window = window.order_by(window_op.args[0])

    # Time ranges need to be converted to microseconds.
    if window.how == 'range':
        order_by_types = [type(x.op().args[0]) for x in window._order_by]
        time_range_types = (ir.TimeColumn, ir.DateColumn, ir.TimestampColumn)
        if any(col_type in time_range_types for col_type in order_by_types):
            window = impala_compiler._time_range_to_range_window(
                translator, window)

    window_formatted = impala_compiler._format_window(translator, op, window)

    arg_formatted = translator.translate(arg)
    result = '{} {}'.format(arg_formatted, window_formatted)

    if type(window_op) in _expr_transforms:
        return _expr_transforms[type(window_op)](result)
    else:
        return result
Ejemplo n.º 25
0
 def _make_union(self):
     raise com.UnsupportedOperationError(
         "OmniSciDB backend doesn't support Union operation")
Ejemplo n.º 26
0
def raise_unsupported_op_error(translator, expr, *args):
    msg = "SQLServer backend doesn't support {} operation!"
    op = expr.op()
    raise com.UnsupportedOperationError(msg.format(type(op)))
Ejemplo n.º 27
0
def compile_date_diff(t, expr, scope, timecontext, **kwargs):
    raise com.UnsupportedOperationError(
        'PySpark backend does not support DateDiff as there is no '
        'timedelta type.'
    )
Ejemplo n.º 28
0
def execute_rowid(op, *args, **kwargs):
    raise com.UnsupportedOperationError(
        'rowid is not supported in pandas backends')
Ejemplo n.º 29
0
 def _make_union(union_class, expr, context):
     raise com.UnsupportedOperationError(
         "HeavyDB backend doesn't support Union operation"
     )