def raise_unsupported_expr_error(expr: ibis.Expr): """Raise an unsupported expression error for given expression. Parameters ---------- expr : ibis.Expr Raises ------ com.UnsupportedOperationError """ msg = "HeavyDB backend doesn't support {} operation!" op = expr.op() raise com.UnsupportedOperationError(msg.format(type(op)))
def spark_compiles_arbitrary(translator, expr): arg, how, where = expr.op().args if where is not None: arg = where.ifelse(arg, ibis.NA) if how in (None, 'first'): return 'first({}, True)'.format(translator.translate(arg)) elif how == 'last': return 'last({}, True)'.format(translator.translate(arg)) else: raise com.UnsupportedOperationError( '{!r} value not supported for arbitrary in Spark SQL'.format(how) )
def raise_unsupported_op_error(translator, expr, *args): """Raise an unsupported operation error for given expression. Parameters ---------- expr : ibis.Expr Raises ------ com.UnsupportedOperationError """ msg = "OmniSciDB backend doesn't support {} operation!" op = expr.op() raise com.UnsupportedOperationError(msg.format(type(op)))
def _formatter(translator, expr): op = expr.op() arg, offset = op.args unit = offset.type().unit if unit not in units: raise com.UnsupportedOperationError( "BigQuery does not allow binary operation " "{} with INTERVAL offset {}".format(func, unit) ) formatted_arg = translator.translate(arg) formatted_offset = translator.translate(offset) result = "{}({}, {})".format(func, formatted_arg, formatted_offset) return result
def _timestamp_truncate(translator, expr): op = expr.op() arg, unit = op.args arg_formatted = translator.translate(arg) try: unit = _spark_timestamp_unit_names[unit] except KeyError: raise com.UnsupportedOperationError( '{!r} unit is not supported in timestamp truncate'.format(unit)) if unit == 'DAY': return "date(date_trunc({!r}, {}))".format(unit, arg_formatted) else: return "date_trunc({!r}, {})".format(unit, arg_formatted)
def _literal(t, expr): if isinstance(expr, ir.IntervalScalar): if expr.type().unit in {'ms', 'ns'}: raise com.UnsupportedOperationError( 'MySQL does not allow operation ' 'with INTERVAL offset {}'.format(expr.type().unit)) text_unit = expr.type().resolution.upper() value = expr.op().value return sa.text(f'INTERVAL :value {text_unit}').bindparams(value=value) elif isinstance(expr, ir.SetScalar): return list(map(sa.literal, expr.op().value)) else: value = expr.op().value if isinstance(value, pd.Timestamp): value = value.to_pydatetime() return sa.literal(value)
def _canonicalize_interval(t, interval, scope, timecontext, **kwargs): """ Convert interval to integer timestamp of second When pyspark cast timestamp to integer type, it uses the number of seconds since epoch. Therefore, we need cast ibis interval correspondingly. """ if isinstance(interval, ir.IntervalScalar): value = t.translate(interval, scope, timecontext, **kwargs) # value is in nanoseconds and spark uses seconds since epoch return int(value / 1e9) elif isinstance(interval, int): return interval raise com.UnsupportedOperationError( f'type {type(interval)} is not supported in preceding /following ' 'in window.' )
def _interval_from_integer(t, expr): arg, unit = expr.op().args if unit in {'ms', 'ns'}: raise com.UnsupportedOperationError( 'MySQL does not allow operation ' 'with INTERVAL offset {}'.format(unit)) sa_arg = t.translate(arg) text_unit = expr.type().resolution.upper() # XXX: Is there a better way to handle this? I.e. can we somehow use # the existing bind parameter produced by translate and reuse its name in # the string passed to sa.text? if isinstance(sa_arg, sa.sql.elements.BindParameter): return sa.text( 'INTERVAL :arg {}'.format(text_unit)).bindparams(arg=sa_arg.value) return sa.text('INTERVAL {} {}'.format(sa_arg, text_unit))
def _cast(translator, expr): from ibis.omniscidb.client import OmniSciDBDataType op = expr.op() arg, target = op.args arg_ = translator.translate(arg) if isinstance(arg, ir.GeoSpatialValue): # NOTE: CastToGeography expects geometry with SRID=4326 type_ = target.geotype.upper() if type_ == 'GEOMETRY': raise com.UnsupportedOperationError( 'OmnisciDB/OmniSciDB doesn\'t support yet convert ' + 'from GEOGRAPHY to GEOMETRY.') else: type_ = str(OmniSciDBDataType.from_ibis(target, nullable=False)) return 'CAST({0!s} AS {1!s})'.format(arg_, type_)
def _parse_url(translator, expr): op = expr.op() arg, extract, key = op.args if extract == 'HOST': return _call(translator, 'domain', arg) elif extract == 'PROTOCOL': return _call(translator, 'protocol', arg) elif extract == 'PATH': return _call(translator, 'path', arg) elif extract == 'QUERY': if key is not None: return _call(translator, 'extractURLParameter', arg, key) else: return _call(translator, 'queryString', arg) else: raise com.UnsupportedOperationError( f'Parse url with extract {extract} is not supported')
def sql(self, query): """ Convert a SQL query to an Ibis table expression Parameters ---------- Returns ------- table : TableExpr """ if pymapd_dtype is None: raise com.UnsupportedOperationError( 'This method is available just on Python version >= 3.6.') # Remove `;` + `--` (comment) query = re.sub(r'\s*;\s*--', '\n--', query.strip()) # Remove trailing ; query = re.sub(r'\s*;\s*$', '', query.strip()) schema = self._get_schema_using_validator(query) return ops.SQLQueryResult(query, schema, self).to_expr()
def truncate(translator, expr): base_unit_names = { 'Y': 'Y', 'Q': 'Q', 'M': 'MONTH', 'W': 'W', 'D': 'J', 'h': 'HH', 'm': 'MI', } op = expr.op() arg, unit = op.args arg_formatted = translator.translate(arg) try: unit = base_unit_names[unit] except KeyError: raise com.UnsupportedOperationError( f'{unit!r} unit is not supported in timestamp truncate') return f"trunc({arg_formatted}, '{unit}')"
def _truncate(translator, expr): op = expr.op() arg, unit = op.args converters = { 'Y': 'toStartOfYear', 'M': 'toStartOfMonth', 'W': 'toMonday', 'D': 'toDate', 'h': 'toStartOfHour', 'm': 'toStartOfMinute', 's': 'toDateTime', } try: converter = converters[unit] except KeyError: raise com.UnsupportedOperationError( f'Unsupported truncate unit {unit}') return _call(translator, converter, arg)
def _hash(translator, expr): op = expr.op() arg, how = op.args algorithms = { 'MD5', 'halfMD5', 'SHA1', 'SHA224', 'SHA256', 'intHash32', 'intHash64', 'cityHash64', 'sipHash64', 'sipHash128', } if how not in algorithms: raise com.UnsupportedOperationError( f'Unsupported hash algorithm {how}') return _call(translator, how, arg)
def compile_extract_millisecond(t, expr, scope, **kwargs): raise com.UnsupportedOperationError( 'PySpark backend does not support extracting milliseconds.')
def compile_percent_rank(t, expr, scope, *, window, **kwargs): raise com.UnsupportedOperationError( 'Pyspark percent_rank() function indexes from 0 ' 'instead of 1, and does not match expected ' 'output of ibis expressions.')
def raise_unsupported_expr_error(expr): msg = "OmniSciDB backend doesn't support {} operation!" op = expr.op() raise com.UnsupportedOperationError(msg.format(type(op)))
def round(op, expr): arg = translate(op.arg) if op.digits is not None: raise com.UnsupportedOperationError( 'Rounding to specific digits is not supported in datafusion') return df.functions.round(arg).cast(pa.int64())
def _value_to_temporal(t, arg, _): raise com.UnsupportedOperationError(type(arg))
def _window(t, expr): op = expr.op() arg, window = op.args reduction = t.translate(arg) window_op = arg.op() if isinstance(window_op, (ops.Sum, ops.Mean, ops.Min, ops.Max)): msg = """SQLServer backend doesn't support {} operation with Window Function!""" raise com.UnsupportedOperationError(msg.format(type(window_op))) _require_order_by = ( ops.DenseRank, ops.MinRank, ops.NTile, ops.PercentRank, ops.Count, ops.Mean, ops.Min, ops.Max, ops.Sum, ops.FirstValue, ops.LastValue, ops.Lag, ops.Lead, ) if isinstance(window_op, ops.CumulativeOp): arg = _cumulative_to_window(t, arg, window) return t.translate(arg) if window.max_lookback is not None: raise NotImplementedError('Rows with max lookback is not implemented ' 'for SQLAlchemy-based backends.') if isinstance(window_op, _require_order_by) and not window._order_by: order_by = t.translate(window_op.args[0]) else: order_by = list(map(t.translate, window._order_by)) partition_by = list(map(t.translate, window._group_by)) frame_clause_not_allowed = ( ops.Lag, ops.Lead, ops.DenseRank, ops.MinRank, ops.NTile, ops.PercentRank, ops.RowNumber, ) how = {'range': 'range_'}.get(window.how, window.how) preceding = window.preceding additional_params = ({} if isinstance( window_op, frame_clause_not_allowed) else { how: ( -preceding if preceding is not None else preceding, window.following, ) }) result = reduction.over(partition_by=partition_by, order_by=order_by, **additional_params) if isinstance(window_op, (ops.RowNumber, ops.DenseRank, ops.MinRank, ops.NTile)): return result - 1 else: return result
def compile_timestamp_diff(t, expr, scope, **kwargs): raise com.UnsupportedOperationError( 'PySpark backend does not support TimestampDiff as there is no ' 'timedelta type.')
def compile_interval_from_integer(t, expr, scope, **kwargs): raise com.UnsupportedOperationError( 'Interval from integer column is unsupported for the PySpark backend.')
def _raise_error(translator, expr, *args): msg = "Clickhouse backend doesn't support {0} operation!" op = expr.op() raise com.UnsupportedOperationError(msg.format(type(op)))
def _window(translator, expr): op = expr.op() arg, window = op.args window_op = arg.op() _require_order_by = ( ops.Lag, ops.Lead, ops.DenseRank, ops.MinRank, ops.FirstValue, ops.LastValue, ops.PercentRank, ops.NTile, ) _unsupported_win_ops = ( ops.CMSMedian, ops.GroupConcat, ops.HLLCardinality, ops.All, # TODO: change all to work as cumall ops.Any, # TODO: change any to work as cumany ) _subtract_one = '{} - 1'.format _expr_transforms = { ops.DenseRank: _subtract_one, ops.MinRank: _subtract_one, ops.NTile: _subtract_one, ops.RowNumber: _subtract_one, } if isinstance(window_op, _unsupported_win_ops): raise com.UnsupportedOperationError( '{} is not supported in window functions'.format(type(window_op))) if isinstance(window_op, ops.CumulativeOp): arg = impala_compiler._cumulative_to_window(translator, arg, window) return translator.translate(arg) if window.preceding is not None: raise com.UnsupportedOperationError( 'Window preceding is not supported by OmniSciDB backend yet') if window.following is not None and window.following != 0: raise com.UnsupportedOperationError( 'Window following is not supported by OmniSciDB backend yet') window.following = None # Some analytic functions need to have the expression of interest in # the ORDER BY part of the window clause if isinstance(window_op, _require_order_by) and len(window._order_by) == 0: window = window.order_by(window_op.args[0]) # Time ranges need to be converted to microseconds. if window.how == 'range': order_by_types = [type(x.op().args[0]) for x in window._order_by] time_range_types = (ir.TimeColumn, ir.DateColumn, ir.TimestampColumn) if any(col_type in time_range_types for col_type in order_by_types): window = impala_compiler._time_range_to_range_window( translator, window) window_formatted = impala_compiler._format_window(translator, op, window) arg_formatted = translator.translate(arg) result = '{} {}'.format(arg_formatted, window_formatted) if type(window_op) in _expr_transforms: return _expr_transforms[type(window_op)](result) else: return result
def _make_union(self): raise com.UnsupportedOperationError( "OmniSciDB backend doesn't support Union operation")
def raise_unsupported_op_error(translator, expr, *args): msg = "SQLServer backend doesn't support {} operation!" op = expr.op() raise com.UnsupportedOperationError(msg.format(type(op)))
def compile_date_diff(t, expr, scope, timecontext, **kwargs): raise com.UnsupportedOperationError( 'PySpark backend does not support DateDiff as there is no ' 'timedelta type.' )
def execute_rowid(op, *args, **kwargs): raise com.UnsupportedOperationError( 'rowid is not supported in pandas backends')
def _make_union(union_class, expr, context): raise com.UnsupportedOperationError( "HeavyDB backend doesn't support Union operation" )