def _cast(t, expr): # It's not all fun and games with SQLite op = expr.op() arg, target_type = op.args sa_arg = t.translate(arg) sa_type = t.get_sqla_type(target_type) if isinstance(target_type, dt.Timestamp): if isinstance(arg, ir.IntegerValue): return sa.func.datetime(sa_arg, 'unixepoch') elif isinstance(arg, ir.StringValue): return sa.func.strftime('%Y-%m-%d %H:%M:%f', sa_arg) raise com.UnsupportedOperationError(type(arg)) if isinstance(target_type, dt.Date): if isinstance(arg, ir.IntegerValue): return sa.func.date(sa.func.datetime(sa_arg, 'unixepoch')) elif isinstance(arg, ir.StringValue): return sa.func.date(sa_arg) raise com.UnsupportedOperationError(type(arg)) if isinstance(arg, ir.CategoryValue) and target_type == 'int32': return sa_arg else: return sa.cast(sa_arg, sa_type)
def formatter(translator, expr): op = expr.op() arg_count = len(op.args) if arity != arg_count: msg = 'Incorrect number of args {0} instead of {1}' raise com.UnsupportedOperationError(msg.format(arg_count, arity)) return _call(translator, func_name, *op.args)
def _interval_format(translator, expr): dtype = expr.type() if dtype.unit in {'ms', 'us', 'ns'}: raise com.UnsupportedOperationError( "Clickhouse doesn't support subsecond interval resolutions") return 'INTERVAL {} {}'.format(expr.op().value, dtype.resolution.upper())
def _interval_format(translator, expr): dtype = expr.type() if dtype.unit in {'ms', 'us', 'ns'}: raise com.UnsupportedOperationError( "MapD doesn't support subsecond interval resolutions") return '{1}, (sign){0}'.format(expr.op().value, dtype.resolution.upper())
def _string_find(translator, expr): op = expr.op() arg, substr, start, _ = op.args if start is not None: raise com.UnsupportedOperationError( "String find doesn't support start argument") return _call(translator, 'position', arg, substr) + ' - 1'
def translator(t, expr): arg, unit = expr.op().args sa_arg = t.translate(arg) try: modifier = _truncate_modifiers[unit] except KeyError: raise com.UnsupportedOperationError( 'Unsupported truncate unit {!r}'.format(unit)) return func(sa_arg, modifier)
def _truncate(t, expr): arg, unit = expr.op().args sa_arg = t.translate(arg) try: fmt = _truncate_formats[unit] except KeyError: raise com.UnsupportedOperationError( 'Unsupported truncate unit {}'.format(unit)) return sa.func.date_format(sa_arg, fmt)
def truncator(translator, expr): arg, unit = expr.op().args trans_arg = translator.translate(arg) valid_unit = units.get(unit) if valid_unit is None: raise com.UnsupportedOperationError( 'BigQuery does not support truncating {} values to unit ' '{!r}'.format(arg.type(), unit)) return '{}_TRUNC({}, {})'.format(kind, trans_arg, valid_unit)
def _interval_from_integer(t, expr): arg, unit = expr.op().args if unit in {'ms', 'ns'}: raise com.UnsupportedOperationError( 'MySQL does not allow operation ' 'with INTERVAL offset {}'.format(unit)) sa_arg = t.translate(arg) return sa.text('INTERVAL {} {}'.format(sa_arg, expr.resolution.upper()))
def _timestamp_truncate(t, expr): arg, unit = expr.op().args sa_arg = t.translate(arg) try: precision = _truncate_precisions[unit] except KeyError: raise com.UnsupportedOperationError( 'Unsupported truncate unit {!r}'.format(unit)) return sa.func.date_trunc(precision, sa_arg)
def _interval_from_integer(translator, expr): op = expr.op() arg, unit = op.args if expr.unit in {'ms', 'us', 'ns'}: raise com.UnsupportedOperationError( "Clickhouse doesn't support subsecond interval resolutions") arg_ = translator.translate(arg) return 'INTERVAL {} {}'.format(arg_, expr.resolution.upper())
def _interval_from_integer(translator, expr): op = expr.op() arg, unit = op.args dtype = expr.type() if dtype.unit in {'ms', 'us', 'ns'}: raise com.UnsupportedOperationError( "MapD doesn't support subsecond interval resolutions") arg_ = translator.translate(arg) return '{}, (sign){}'.format(dtype.resolution.upper(), arg_)
def _arbitrary(translator, expr): arg, how, where = expr.op().args if where is not None: arg = where.ifelse(arg, ibis.NA) if how != 'first': raise com.UnsupportedOperationError( '{!r} value not supported for arbitrary in BigQuery'.format(how)) return 'ANY_VALUE({})'.format(translator.translate(arg))
def _arbitrary(translator, expr): arg, how, where = expr.op().args if how not in (None, 'last'): raise com.UnsupportedOperationError( '{!r} value not supported for arbitrary in MapD'.format(how)) if where is not None: arg = where.ifelse(arg, ibis.NA) return 'SAMPLE({})'.format(translator.translate(arg))
def _truncate(translator, expr): op = expr.op() arg, unit = op.args arg_formatted = translator.translate(arg) try: unit = _impala_unit_names[unit] except KeyError: raise com.UnsupportedOperationError( '{!r} unit is not supported in timestamp truncate'.format(unit)) return "trunc({}, '{}')".format(arg_formatted, unit)
def _date_truncate(translator, expr): op = expr.op() arg, unit = op.args arg_formatted = translator.translate(arg) try: unit = _spark_date_unit_names[unit] except KeyError: raise com.UnsupportedOperationError( '{!r} unit is not supported in date truncate'.format(unit)) return "trunc({}, {!r})".format(arg_formatted, unit)
def _formatter(translator, expr): op = expr.op() arg, offset = op.args if offset.unit not in units: raise com.UnsupportedOperationError( 'BigQuery does not allow binary operation ' '{} with INTERVAL offset {}'.format(func, offset.unit)) formatted_arg = translator.translate(arg) formatted_offset = translator.translate(offset) result = '{}({}, {})'.format(func, formatted_arg, formatted_offset) return result
def _window(translator, expr): op = expr.op() arg, window = op.args window_op = arg.op() _require_order_by = ( ops.Lag, ops.Lead, ops.DenseRank, ops.MinRank, ops.FirstValue, ops.LastValue, ops.PercentRank, ops.NTile, ) _unsupported_reductions = ( ops.CMSMedian, ops.GroupConcat, ops.HLLCardinality, ) if isinstance(window_op, _unsupported_reductions): raise com.UnsupportedOperationError( '{} is not supported in window functions'.format(type(window_op))) if isinstance(window_op, ops.CumulativeOp): arg = _cumulative_to_window(translator, arg, window) return translator.translate(arg) # Some analytic functions need to have the expression of interest in # the ORDER BY part of the window clause if (isinstance(window_op, _require_order_by) and len(window._order_by) == 0): window = window.order_by(window_op.args[0]) # Time ranges need to be converted to microseconds. if window.how == 'range': order_by_types = [type(x.op().args[0]) for x in window._order_by] time_range_types = (ir.TimeColumn, ir.DateColumn, ir.TimestampColumn) if any(col_type in time_range_types for col_type in order_by_types): window = _time_range_to_range_window(translator, window) window_formatted = _format_window(translator, window) arg_formatted = translator.translate(arg) result = '{} {}'.format(arg_formatted, window_formatted) if type(window_op) in _expr_transforms: return _expr_transforms[type(window_op)](result) else: return result
def _literal(t, expr): if isinstance(expr, ir.IntervalValue): if expr.type().unit in {'ms', 'ns'}: raise com.UnsupportedOperationError( 'MySQL does not allow operation ' 'with INTERVAL offset {}'.format(expr.type().unit)) return sa.text('INTERVAL {} {}'.format(expr.op().value, expr.resolution.upper())) else: value = expr.op().value if isinstance(value, pd.Timestamp): value = value.to_pydatetime() return sa.literal(value)
def truncator(translator, expr): op = expr.op() arg, unit = op.args arg = translator.translate(op.args[0]) try: unit = units[unit] except KeyError: raise com.UnsupportedOperationError( '{!r} unit is not supported in timestamp truncate'.format(unit) ) return "{}_TRUNC({}, {})".format(kind, arg, unit)
def spark_compiles_arbitrary(translator, expr): arg, how, where = expr.op().args if where is not None: arg = where.ifelse(arg, ibis.NA) if how in (None, 'first'): return 'first({}, True)'.format(translator.translate(arg)) elif how == 'last': return 'last({}, True)'.format(translator.translate(arg)) else: raise com.UnsupportedOperationError( '{!r} value not supported for arbitrary in Spark SQL'.format(how))
def _hash(translator, expr): op = expr.op() arg, how = op.args algorithms = { 'MD5', 'halfMD5', 'SHA1', 'SHA224', 'SHA256', 'intHash32', 'intHash64', 'cityHash64', 'sipHash64', 'sipHash128' } if how not in algorithms: raise com.UnsupportedOperationError( 'Unsupported hash algorithm {0}'.format(how)) return _call(translator, how, arg)
def _timestamp_truncate(translator, expr): op = expr.op() arg, unit = op.args arg_formatted = translator.translate(arg) try: unit = _spark_timestamp_unit_names[unit] except KeyError: raise com.UnsupportedOperationError( '{!r} unit is not supported in timestamp truncate'.format(unit)) if unit == 'DAY': return "date(date_trunc({!r}, {}))".format(unit, arg_formatted) else: return "date_trunc({!r}, {})".format(unit, arg_formatted)
def _literal(t, expr): if isinstance(expr, ir.IntervalScalar): if expr.type().unit in {'ms', 'ns'}: raise com.UnsupportedOperationError( 'MySQL does not allow operation ' 'with INTERVAL offset {}'.format(expr.type().unit)) text_unit = expr.type().resolution.upper() value = expr.op().value return sa.text( 'INTERVAL :value {}'.format(text_unit)).bindparams(value=value) elif isinstance(expr, ir.SetScalar): return list(map(sa.literal, expr.op().value)) else: value = expr.op().value if isinstance(value, pd.Timestamp): value = value.to_pydatetime() return sa.literal(value)
def _interval_from_integer(t, expr): arg, unit = expr.op().args if unit in {'ms', 'ns'}: raise com.UnsupportedOperationError( 'MySQL does not allow operation ' 'with INTERVAL offset {}'.format(unit)) sa_arg = t.translate(arg) text_unit = expr.type().resolution.upper() # XXX: Is there a better way to handle this? I.e. can we somehow use # the existing bind parameter produced by translate and reuse its name in # the string passed to sa.text? if isinstance(sa_arg, sa.sql.elements.BindParameter): return sa.text( 'INTERVAL :arg {}'.format(text_unit)).bindparams(arg=sa_arg.value) return sa.text('INTERVAL {} {}'.format(sa_arg, text_unit))
def _parse_url(translator, expr): op = expr.op() arg, extract, key = op.args if extract == 'HOST': return _call(translator, 'domain', arg) elif extract == 'PROTOCOL': return _call(translator, 'protocol', arg) elif extract == 'PATH': return _call(translator, 'path', arg) elif extract == 'QUERY': if key is not None: return _call(translator, 'extractURLParameter', arg, key) else: return _call(translator, 'queryString', arg) else: raise com.UnsupportedOperationError( 'Parse url with extract {0} is not supported'.format(extract))
def _cast(translator, expr): from ibis.mapd.client import MapDDataType op = expr.op() arg, target = op.args arg_ = translator.translate(arg) if isinstance(arg, ir.GeoSpatialValue): # NOTE: CastToGeography expects geometry with SRID=4326 type_ = target.geotype.upper() if type_ == 'GEOMETRY': raise com.UnsupportedOperationError( 'OmnisciDB/MapD doesn\'t support yet convert ' + 'from GEOGRAPHY to GEOMETRY.' ) else: type_ = str(MapDDataType.from_ibis(target, nullable=False)) return 'CAST({0!s} AS {1!s})'.format(arg_, type_)
def _truncate(translator, expr): op = expr.op() arg, unit = op.args converters = { 'Y': 'toStartOfYear', 'M': 'toStartOfMonth', 'W': 'toMonday', 'D': 'toDate', 'h': 'toStartOfHour', 'm': 'toStartOfMinute', 's': 'toDateTime' } try: converter = converters[unit] except KeyError: raise com.UnsupportedOperationError( 'Unsupported truncate unit {}'.format(unit)) return _call(translator, converter, arg)
def raise_error(translator, expr, *args): msg = "Clickhouse backend doesn't support {0} operation!" op = expr.op() raise com.UnsupportedOperationError(msg.format(type(op)))
def raise_unsupported_op_error(translator, expr, *args): msg = "MapD backend doesn't support {} operation!" op = expr.op() raise com.UnsupportedOperationError(msg.format(type(op)))