def test_instance_of_operation(): class MyOperation(ops.Node): arg = Arg(ir.IntegerValue) MyOperation(ir.literal(5)) with pytest.raises(IbisTypeError): MyOperation(ir.literal('string'))
def _set_literal_format(translator, expr): value_type = expr.type().value_type formatted = [translator.translate(ir.literal(x, type=value_type)) for x in expr.op().value] return '({})'.format(', '.join(formatted))
def array_of(inner, arg): val = arg if isinstance(arg, ir.Expr) else ir.literal(arg) argtype = val.type() if not isinstance(argtype, dt.Array): raise com.IbisTypeError( 'Argument must be an array, got expression {} which is of type ' '{}'.format(val, val.type())) return value(dt.Array(inner(val[0]).type()), val)
def _timestamp_from_str(value: str, timezone: str | None = None) -> ir.TimestampScalar: try: value = pd.Timestamp(value, tz=timezone) except pd.errors.OutOfBoundsDatetime: value = dateutil.parser.parse(value) dtype = dt.Timestamp( timezone=timezone if timezone is not None else value.tzname()) return literal(value, type=dtype)
def array_of(inner, arg): val = arg if isinstance(arg, ir.Expr) else ir.literal(arg) argtype = val.type() if not isinstance(argtype, dt.Array): raise com.IbisTypeError( 'Argument must be an array, got expression {} which is of type ' '{}'.format(val, val.type()) ) return value(dt.Array(inner(val[0]).type()), val)
def _set_literal_format(translator, expr): value_type = expr.type().value_type formatted = [ translator.translate(ir.literal(x, type=value_type)) for x in expr.op().value ] return '({})'.format(', '.join(formatted))
def test_ops_smoke(): expr = ir.literal(3) ops.UnaryOp(expr) ops.Cast(expr, to='int64') ops.TypeOf(arg=2) ops.Negate(4) ops.Negate(4.0) ops.NullIfZero(0) ops.NullIfZero(1) ops.IsNull(ir.null()) ops.NotNull(ir.null()) ops.ZeroIfNull(ir.null()) ops.IfNull(1, ops.NullIfZero(0).to_expr()) ops.NullIf(ir.null(), ops.NullIfZero(0).to_expr()) ops.IsNan(np.nan) ops.IsInf(np.inf) ops.Ceil(4.5) ops.Floor(4.5) ops.Round(3.43456) ops.Round(3.43456, 2) ops.Round(3.43456, digits=1) ops.Clip(123, lower=30) ops.Clip(123, lower=30, upper=100) ops.BaseConvert('EEE', from_base=16, to_base=10) ops.Logarithm(100) ops.Log(100) ops.Log(100, base=2) ops.Ln(100) ops.Log2(100) ops.Log10(100) ops.Uppercase('asd') ops.Lowercase('asd') ops.Reverse('asd') ops.Strip('asd') ops.LStrip('asd') ops.RStrip('asd') ops.Capitalize('asd') ops.Substring('asd', start=1) ops.Substring('asd', 1) ops.Substring('asd', 1, length=2) ops.StrRight('asd', nchars=2) ops.Repeat('asd', times=4) ops.StringFind('asd', 'sd', start=1) ops.Translate('asd', from_str='bd', to_str='ce') ops.LPad('asd', length=2, pad='ss') ops.RPad('asd', length=2, pad='ss') ops.StringJoin(',', ['asd', 'bsdf']) ops.FuzzySearch('asd', pattern='n') ops.StringSQLLike('asd', pattern='as', escape='asd') ops.RegexExtract('asd', pattern='as', index=1) ops.RegexReplace('asd', 'as', 'a') ops.StringReplace('asd', 'as', 'a') ops.StringSplit('asd', 's') ops.StringConcat(['s', 'e']) ops.StartsWith('asd', 'as') ops.EndsWith('asd', 'xyz')
def array_of(inner, arg, **kwargs): val = arg if isinstance(arg, ir.Expr) else ir.literal(arg) argtype = val.type() if not isinstance(argtype, dt.Array): raise com.IbisTypeError( 'Argument must be an array, ' f'got expression which is of type {val.type()}') value_dtype = inner(val[0], **kwargs).type() array_dtype = dt.Array(value_dtype) return value(array_dtype, val, **kwargs)
def test_ops_smoke(): expr = ir.literal(3) ops.UnaryOp(expr) ops.Cast(expr, to='int64') ops.TypeOf(arg=2) ops.Negate(4) ops.Negate(4.0) ops.NullIfZero(0) ops.NullIfZero(1) ops.IsNull(ir.null()) ops.NotNull(ir.null()) ops.ZeroIfNull(ir.null()) ops.IfNull(1, ops.NullIfZero(0).to_expr()) ops.NullIf(ir.null(), ops.NullIfZero(0).to_expr()) ops.IsNan(np.nan) ops.IsInf(np.inf) ops.Ceil(4.5) ops.Floor(4.5) ops.Round(3.43456) ops.Round(3.43456, 2) ops.Round(3.43456, digits=1) ops.Clip(123, lower=30) ops.Clip(123, lower=30, upper=100) ops.BaseConvert('EEE', from_base=16, to_base=10) ops.Logarithm(100) ops.Log(100) ops.Log(100, base=2) ops.Ln(100) ops.Log2(100) ops.Log10(100) ops.Uppercase('asd') ops.Lowercase('asd') ops.Reverse('asd') ops.Strip('asd') ops.LStrip('asd') ops.RStrip('asd') ops.Capitalize('asd') ops.Substring('asd', start=1) ops.Substring('asd', 1) ops.Substring('asd', 1, length=2) ops.StrRight('asd', nchars=2) ops.Repeat('asd', times=4) ops.StringFind('asd', 'sd', start=1) ops.Translate('asd', from_str='bd', to_str='ce') ops.LPad('asd', length=2, pad='ss') ops.RPad('asd', length=2, pad='ss') ops.StringJoin(',', ['asd', 'bsdf']) ops.FuzzySearch('asd', pattern='n') ops.StringSQLLike('asd', pattern='as', escape='asd') ops.RegexExtract('asd', pattern='as', index=1) ops.RegexReplace('asd', 'as', 'a') ops.StringReplace('asd', 'as', 'a') ops.StringSplit('asd', 's') ops.StringConcat(['s', 'e'])
def _exists_subquery(t, expr): op = expr.op() ctx = t.context filtered = (op.foreign_table.filter(op.predicates) .projection([ir.literal(1).name(ir.unnamed)])) sub_ctx = ctx.subcontext() clause = to_sqlalchemy(filtered, context=sub_ctx, exists=True) if isinstance(op, transforms.NotExistsSubquery): clause = sa.not_(clause) return clause
def _exists_subquery(t, expr): op = expr.op() ctx = t.context filtered = op.foreign_table.filter(op.predicates).projection( [ir.literal(1).name(ir.unnamed)]) sub_ctx = ctx.subcontext() clause = to_sqlalchemy(filtered, sub_ctx, exists=True) if isinstance(op, transforms.NotExistsSubquery): clause = sa.not_(clause) return clause
def _exists_subquery(t, expr): from .query_builder import AlchemyCompiler op = expr.op() ctx = t.context filtered = op.foreign_table.filter(op.predicates).projection( [ir.literal(1).name(ir.unnamed)]) sub_ctx = ctx.subcontext() clause = AlchemyCompiler.to_sql(filtered, sub_ctx, exists=True) if isinstance(op, ops.NotExistsSubquery): clause = sa.not_(clause) return clause
def _exists_subquery(translator, expr): op = expr.op() ctx = translator.context expr = (op.foreign_table.filter(op.predicates).projection( [ir.literal(1).name(ir.unnamed)])) subquery = ctx.get_formatted_query(expr) if isinstance(op, transforms.ExistsSubquery): key = 'EXISTS' elif isinstance(op, transforms.NotExistsSubquery): key = 'NOT EXISTS' else: raise NotImplementedError return '{0} (\n{1}\n)'.format(key, util.indent(subquery, ctx.indent))
def _exists_subquery(translator, expr): op = expr.op() ctx = translator.context expr = (op.foreign_table .filter(op.predicates) .projection([ir.literal(1).name(ir.unnamed)])) subquery = ctx.get_compiled_expr(expr) if isinstance(op, transforms.ExistsSubquery): key = 'EXISTS' elif isinstance(op, transforms.NotExistsSubquery): key = 'NOT EXISTS' else: raise NotImplementedError return '{0} (\n{1}\n)'.format(key, util.indent(subquery, ctx.indent))
def exists_subquery(translator, expr): op = expr.op() ctx = translator.context dummy = ir.literal(1).name(ir.core.unnamed) filtered = op.foreign_table.filter(op.predicates) expr = filtered.projection([dummy]) subquery = ctx.get_compiled_expr(expr) if isinstance(op, ops.ExistsSubquery): key = 'EXISTS' elif isinstance(op, ops.NotExistsSubquery): key = 'NOT EXISTS' else: raise NotImplementedError return f'{key} (\n{util.indent(subquery, ctx.indent)}\n)'
def value(dtype, arg): """Validates that the given argument is a Value with a particular datatype Parameters ---------- dtype : DataType subclass or DataType instance arg : python literal or an ibis expression If a python literal is given the validator tries to coerce it to an ibis literal. Returns ------- arg : AnyValue An ibis value expression with the specified datatype """ if not isinstance(arg, ir.Expr): # coerce python literal to ibis literal arg = ir.literal(arg) if not isinstance(arg, ir.AnyValue): raise com.IbisTypeError( 'Given argument with type {} is not a value ' 'expression'.format(type(arg)) ) # retrieve literal values for implicit cast check value = getattr(arg.op(), 'value', None) if isinstance(dtype, type) and isinstance(arg.type(), dtype): # dtype class has been specified like dt.Interval or dt.Decimal return arg elif dt.castable(arg.type(), dt.dtype(dtype), value=value): # dtype instance or string has been specified and arg's dtype is # implicitly castable to it, like dt.int8 is castable to dt.int64 return arg else: raise com.IbisTypeError( 'Given argument with datatype {} is not ' 'subtype of {} nor implicitly castable to ' 'it'.format(arg.type(), dtype) )
def _(value) -> ir.DateScalar: return literal(value, type=dt.date)
def _(value, timezone: str | None = None) -> ir.TimestampScalar: return literal(value, type=dt.Timestamp(timezone=timezone))
def _date_from_timestamp(value) -> ir.DateScalar: return literal(value, type=dt.date)
def _(value: str, timezone: str | None = None) -> ir.TimestampScalar: try: value = pd.Timestamp(value, tz=timezone) except pd.errors.OutOfBoundsDatetime: value = dateutil.parser.parse(value) return literal(value, type=dt.Timestamp(timezone=timezone))
def interval( value: int | datetime.timedelta | None = None, unit: str = 's', years: int | None = None, quarters: int | None = None, months: int | None = None, weeks: int | None = None, days: int | None = None, hours: int | None = None, minutes: int | None = None, seconds: int | None = None, milliseconds: int | None = None, microseconds: int | None = None, nanoseconds: int | None = None, ) -> ir.IntervalScalar: """Return an interval literal expression. Parameters ---------- value Interval value. If passed, must be combined with `unit`. unit Unit of `value` years Number of years quarters Number of quarters months Number of months weeks Number of weeks days Number of days hours Number of hours minutes Number of minutes seconds Number of seconds milliseconds Number of milliseconds microseconds Number of microseconds nanoseconds Number of nanoseconds Returns ------- IntervalScalar An interval expression """ if value is not None: if isinstance(value, datetime.timedelta): unit = 's' value = int(value.total_seconds()) elif not isinstance(value, int): raise ValueError('Interval value must be an integer') else: kwds = [ ('Y', years), ('Q', quarters), ('M', months), ('W', weeks), ('D', days), ('h', hours), ('m', minutes), ('s', seconds), ('ms', milliseconds), ('us', microseconds), ('ns', nanoseconds), ] defined_units = [(k, v) for k, v in kwds if v is not None] if len(defined_units) != 1: raise ValueError('Exactly one argument is required') unit, value = defined_units[0] value_type = literal(value).type() type = dt.Interval(unit, value_type=value_type) return literal(value, type=type).op().to_expr()
def _(value: str) -> ir.TimeScalar: return literal(pd.to_datetime(value).time(), type=dt.time)
def time(value) -> TimeValue: return literal(value, type=dt.time)
def _(value: str) -> ir.DateScalar: return literal(pd.to_datetime(value).date(), type=dt.date)