def test_simple_datetime_operations(con, func, expected): value = ibis.timestamp('2015-09-01 14:48:05.359') with pytest.raises(ValueError): con.execute(func(value)) value = ibis.timestamp('2015-09-01 14:48:05') con.execute(func(value)) == expected
def test_timestamp_integer_warns(): with pytest.warns(UserWarning): ibis.timestamp(1234) t = ibis.table([('ts', 'timestamp')]) column = t.ts with pytest.warns(UserWarning): column < 1234
def test_udf_primitive_output_types(self): types = [ ('boolean', True, self.b), ('int8', 1, self.i8), ('int16', 1, self.i16), ('int32', 1, self.i32), ('int64', 1, self.i64), ('float', 1.0, self.f), ('double', 1.0, self.d), ('string', '1', self.s), ('timestamp', ibis.timestamp('1961-04-10'), self.t), ] for t, sv, av in types: func = self._register_udf([t], t, 'test') ibis_type = dt.validate_type(t) expr = func(sv) assert type(expr) == type( ibis_type.scalar_type()(expr.op()) ) # noqa: E501, E721 expr = func(av) assert type(expr) == type( ibis_type.column_type()(expr.op()) ) # noqa: E501, E721
def test_timestamp_wrapping(self): func = self._udf_registration_single_input('timestamp', 'timestamp', 'test') expr = func(ibis.timestamp('1961-04-10')) assert type(expr) == ir.TimestampScalar expr = func(self.t) assert type(expr) == ir.TimestampArray
def test_timestamp_scalar_in_filter(alltypes, translate): table = alltypes expr = (table.filter([ table.timestamp_col < (ibis.timestamp('2010-01-01') + ibis.week(3)), table.timestamp_col < (ibis.now() + ibis.day(10)) ]).count()) expr.execute()
def test_timestamp_literals(self): tv1 = '2015-01-01 12:34:56' ex1 = ("'2015-01-01 12:34:56'") cases = [(ibis.literal(pd.Timestamp(tv1)), ex1), (ibis.literal(pd.Timestamp(tv1).to_pydatetime()), ex1), (ibis.timestamp(tv1), ex1)] self._check_expr_cases(cases)
def test_strftime(con, func): value = ibis.timestamp('2015-09-01 14:48:05.359') raw_value = datetime( year=2015, month=9, day=1, hour=14, minute=48, second=5, microsecond=359000 ) assert con.execute(func(value)) == func(raw_value)
def test_large_timestamp(client): huge_timestamp = datetime.datetime(2012, 10, 10, 10, 10, 10, 154117) expr = ibis.timestamp("2012-10-10 10:10:10.154117") result = client.execute(expr) huge_timestamp = (pd.to_datetime(huge_timestamp).tz_localize("UTC")).date() result = (result["tmp"][0]).date() assert result == huge_timestamp
def test_timestamp_scalar_in_filter(alltypes, translate): table = alltypes expr = table.filter([ table.timestamp_col < (ibis.timestamp('2010-01-01') + ibis.interval(weeks=3)), table.timestamp_col < (ibis.now() + ibis.interval(days=10)), ]).count() expr.execute()
def test_timestamp_scalar_in_filter(self): # #310 table = self.alltypes expr = (table.filter([table.timestamp_col < (ibis.timestamp('2010-01-01') + ibis.month(3)), table.timestamp_col < (ibis.now() + ibis.day(10)) ]) .count()) expr.execute()
def test_mult_args(self): func = self._register_udf(['int32', 'double', 'string', 'boolean', 'timestamp'], 'int64', 'mult_types') expr = func(self.i32, self.d, self.s, self.b, self.t) assert issubclass(type(expr), ir.ArrayExpr) expr = func(1, 1.0, 'a', True, ibis.timestamp('1961-04-10')) assert issubclass(type(expr), ir.ScalarExpr)
def test_timestamp_literals(self): tv1 = '2015-01-01 12:34:56' ex1 = ("'2015-01-01 12:34:56'") cases = [ (ibis.literal(pd.Timestamp(tv1)), ex1), (ibis.literal(pd.Timestamp(tv1).to_pydatetime()), ex1), (ibis.timestamp(tv1), ex1) ] self._check_expr_cases(cases)
def test_timestamp_scalar_in_filter(alltypes): # #310 table = alltypes expr = table.filter([ table.timestamp_col < (ibis.timestamp('2010-01-01') + ibis.interval(months=3)), table.timestamp_col < (ibis.now() + ibis.interval(days=10)), ]).count() expr.execute()
def test_timestamp_literals(self): from pandas import Timestamp tv1 = '2015-01-01 12:34:56' ex1 = ("'2015-01-01 12:34:56'") cases = [(L(Timestamp(tv1)), ex1), (L(Timestamp(tv1).to_pydatetime()), ex1), (ibis.timestamp(tv1), ex1)] self._check_expr_cases(cases)
def test_mult_args(self): func = self._register_udf(['int32', 'double', 'string', 'boolean', 'timestamp'], 'int64', 'mult_types') expr = func(self.i32, self.d, self.s, self.b, self.t) assert issubclass(type(expr), ir.ColumnExpr) expr = func(1, 1.0, 'a', True, ibis.timestamp('1961-04-10')) assert issubclass(type(expr), ir.ScalarExpr)
def test_timestamp_literals(self): ts_str = '2015-01-01 00:00:00' val = pd.Timestamp(ts_str) expr = ibis.literal(val) assert isinstance(expr, ir.TimestampScalar) expr = ibis.timestamp(ts_str) assert isinstance(expr, ir.TimestampScalar) self.assertRaises(ValueError, ibis.timestamp, '2015-01-01 00:71')
def test_timestamp_scalar_in_filter(alltypes, translate): table = alltypes expr = table.filter( [ table.timestamp_col < (ibis.timestamp('2010-01-01') + ibis.interval(weeks=3)), table.timestamp_col < (ibis.now() + ibis.interval(days=10)), ] ).count() expr.execute()
def test_mult_args(self): op = self._udf_registration(["int32", "double", "string", "boolean", "timestamp"], "int64", "mult_types") def _func(integer, double, string, boolean, timestamp): return op(integer, double, string, boolean, timestamp).to_expr() expr = _func(self.i32, self.d, self.s, self.b, self.t) assert issubclass(type(expr), ir.ArrayExpr) expr = _func(1, 1.0, "a", True, ibis.timestamp("1961-04-10")) assert issubclass(type(expr), ir.ScalarExpr)
def test_decimal_timestamp_builtins(self): table = self.con.table('tpch_lineitem') dc = table.l_quantity ts = table.l_receiptdate.cast('timestamp') exprs = [ dc % 10, dc + 5, dc + dc, dc / 2, dc * 2, dc ** 2, dc.cast('double'), api.where(table.l_discount > 0, dc * table.l_discount, api.NA), dc.fillna(0), ts < (ibis.now() + ibis.month(3)), ts < (ibis.timestamp('2005-01-01') + ibis.month(3)), # hashing dc.hash(), ts.hash(), # truncate ts.truncate('y'), ts.truncate('q'), ts.truncate('month'), ts.truncate('d'), ts.truncate('w'), ts.truncate('h'), ts.truncate('minute'), ] timestamp_fields = ['year', 'month', 'day', 'hour', 'minute', 'second', 'millisecond', 'microsecond', 'week'] for field in timestamp_fields: if hasattr(ts, field): exprs.append(getattr(ts, field)()) offset = getattr(ibis, field)(2) exprs.append(ts + offset) exprs.append(ts - offset) proj_exprs = [expr.name('e%d' % i) for i, expr in enumerate(exprs)] projection = table[proj_exprs].limit(10) projection.execute()
def test_mult_args(i32, d, s, b, t): func = _register_udf( ['int32', 'double', 'string', 'boolean', 'timestamp'], 'int64', 'mult_types', ) expr = func(i32, d, s, b, t) assert issubclass(type(expr), ir.Column) expr = func(1, 1.0, 'a', True, ibis.timestamp('1961-04-10')) assert issubclass(type(expr), ir.Scalar)
def test_strftime(con, pattern): value = ibis.timestamp('2015-09-01 14:48:05.359') raw_value = datetime( year=2015, month=9, day=1, hour=14, minute=48, second=5, microsecond=359000, ) assert con.execute(value.strftime(pattern)) == raw_value.strftime(pattern)
def test_timestamp_literals(self): from pandas import Timestamp tv1 = '2015-01-01 12:34:56' ex1 = ("'2015-01-01 12:34:56'") cases = [ (L(Timestamp(tv1)), ex1), (L(Timestamp(tv1).to_pydatetime()), ex1), (ibis.timestamp(tv1), ex1) ] self._check_expr_cases(cases)
def test_mult_args(self): op = self._udf_registration(['int32', 'double', 'string', 'boolean', 'timestamp'], 'int64', 'mult_types') def _func(integer, double, string, boolean, timestamp): return op(integer, double, string, boolean, timestamp).to_expr() expr = _func(self.i32, self.d, self.s, self.b, self.t) assert issubclass(type(expr), ir.ArrayExpr) expr = _func(1, 1.0, 'a', True, ibis.timestamp('1961-04-10')) assert issubclass(type(expr), ir.ScalarExpr)
def test_to_timestamp(backend, con, unit): backend_unit = backend.returned_timestamp_unit factor = unit_factors[unit] ts = ibis.timestamp('2018-04-13 09:54:11.872832') pandas_ts = ibis.pandas.execute(ts).floor(unit).value # convert the now timestamp to the input unit being tested int_expr = ibis.literal(pandas_ts // factor) expr = int_expr.to_timestamp(unit) result = con.execute(expr) expected = pd.Timestamp(pandas_ts, unit='ns').floor(backend_unit) assert result == expected
def test_uda_primitive_output_types(self): types = [('boolean', True, self.b), ('int8', 1, self.i8), ('int16', 1, self.i16), ('int32', 1, self.i32), ('int64', 1, self.i64), ('float', 1.0, self.f), ('double', 1.0, self.d), ('string', '1', self.s), ('timestamp', ibis.timestamp('1961-04-10'), self.t)] for t, sv, av in types: func = self._register_uda([t], t, 'test') ibis_type = validate_type(t) expr1 = func(sv) expr2 = func(sv) assert isinstance(expr1, ibis_type.scalar_type()) assert isinstance(expr2, ibis_type.scalar_type())
def test_where_analyze_scalar_op(functional_alltypes): # root cause of #310 table = functional_alltypes expr = table.filter([ table.timestamp_col < (ibis.timestamp('2010-01-01') + ibis.interval(months=3)), table.timestamp_col < (ibis.now() + ibis.interval(days=10)), ]).count() result = Compiler.to_sql(expr) expected = """\ SELECT count(*) AS `count` FROM functional_alltypes WHERE (`timestamp_col` < date_add(cast({} as timestamp), INTERVAL 3 MONTH)) AND (`timestamp_col` < date_add(cast(now() as timestamp), INTERVAL 10 DAY))""" # noqa: E501 assert result == expected.format("'2010-01-01 00:00:00'")
def test_timestamp_builtins(self): i32 = ibis.literal(50000) i64 = ibis.literal(5 * 10 ** 8) stamp = ibis.timestamp("2009-05-17 12:34:56") timestamp_cases = [ (i32.to_timestamp("s"), pd.to_datetime(50000, unit="s")), (i32.to_timestamp("ms"), pd.to_datetime(50000, unit="ms")), (i64.to_timestamp(), pd.to_datetime(5 * 10 ** 8, unit="s")), (stamp.truncate("y"), pd.Timestamp("2009-01-01")), (stamp.truncate("m"), pd.Timestamp("2009-05-01")), (stamp.truncate("d"), pd.Timestamp("2009-05-17")), (stamp.truncate("h"), pd.Timestamp("2009-05-17 12:00")), (stamp.truncate("minute"), pd.Timestamp("2009-05-17 12:34")), ] self.assert_cases_equality(timestamp_cases)
def test_where_analyze_scalar_op(self): # root cause of #310 table = self.con.table('functional_alltypes') expr = (table.filter([ table.timestamp_col < (ibis.timestamp('2010-01-01') + ibis.month(3)), table.timestamp_col < (ibis.now() + ibis.day(10)) ]).count()) result = to_sql(expr) expected = """\ SELECT count(*) AS `tmp` FROM functional_alltypes WHERE `timestamp_col` < months_add('2010-01-01 00:00:00', 3) AND `timestamp_col` < days_add(now(), 10)""" assert result == expected
def test_where_analyze_scalar_op(self): # root cause of #310 table = self.con.table('functional_alltypes') expr = (table.filter([table.timestamp_col < (ibis.timestamp('2010-01-01') + ibis.month(3)), table.timestamp_col < (ibis.now() + ibis.day(10))]) .count()) result = to_sql(expr) expected = """\ SELECT count(*) AS `tmp` FROM functional_alltypes WHERE `timestamp_col` < months_add('2010-01-01 00:00:00', 3) AND `timestamp_col` < days_add(now(), 10)""" assert result == expected
def test_timestamp_builtins(self): i32 = ibis.literal(50000) i64 = ibis.literal(5 * 10**8) stamp = ibis.timestamp('2009-05-17 12:34:56') timestamp_cases = [ (i32.to_timestamp('s'), pd.to_datetime(50000, unit='s')), (i32.to_timestamp('ms'), pd.to_datetime(50000, unit='ms')), (i64.to_timestamp(), pd.to_datetime(5 * 10**8, unit='s')), (stamp.truncate('y'), pd.Timestamp('2009-01-01')), (stamp.truncate('m'), pd.Timestamp('2009-05-01')), (stamp.truncate('d'), pd.Timestamp('2009-05-17')), (stamp.truncate('h'), pd.Timestamp('2009-05-17 12:00')), (stamp.truncate('minute'), pd.Timestamp('2009-05-17 12:34')) ] self.assert_cases_equality(timestamp_cases)
def test_to_timestamp(backend, con, unit): if unit not in backend.supported_to_timestamp_units: pytest.skip('Unit {!r} not supported by {} to_timestamp'.format( unit, backend)) backend_unit = backend.returned_timestamp_unit factor = unit_factors[unit] ts = ibis.timestamp('2018-04-13 09:54:11.872832') pandas_ts = ibis.pandas.execute(ts).floor(unit).value # convert the now timestamp to the input unit being tested int_expr = ibis.literal(pandas_ts // factor) expr = int_expr.to_timestamp(unit) result = con.execute(expr) expected = pd.Timestamp(pandas_ts, unit='ns').floor(backend_unit) assert result == expected
def test_identity_primitive_types(self): cases = [ ('boolean', True, self.alltypes.bool_col), ('int8', 5, self.alltypes.tinyint_col), ('int16', 2**10, self.alltypes.smallint_col), ('int32', 2**17, self.alltypes.int_col), ('int64', 2**33, self.alltypes.bigint_col), ('float', 3.14, self.alltypes.float_col), ('double', 3.14, self.alltypes.double_col), ('string', 'ibis', self.alltypes.string_col), ('timestamp', ibis.timestamp('1961-04-10'), self.alltypes.timestamp_col), ] for t, lit_val, array_val in cases: if not isinstance(lit_val, ir.Expr): lit_val = ibis.literal(lit_val) self._identity_func_testing(t, lit_val, array_val)
def test_decimal_timestamp_builtins(self): table = self.con.table("tpch_lineitem") dc = table.l_quantity ts = table.l_receiptdate.cast("timestamp") exprs = [ dc % 10, dc + 5, dc + dc, dc / 2, dc * 2, dc ** 2, dc.cast("double"), api.where(table.l_discount > 0, dc * table.l_discount, api.NA), dc.fillna(0), ts < (ibis.now() + ibis.month(3)), ts < (ibis.timestamp("2005-01-01") + ibis.month(3)), # hashing dc.hash(), ts.hash(), # truncate ts.truncate("y"), ts.truncate("q"), ts.truncate("month"), ts.truncate("d"), ts.truncate("w"), ts.truncate("h"), ts.truncate("minute"), ] timestamp_fields = ["year", "month", "day", "hour", "minute", "second", "millisecond", "microsecond", "week"] for field in timestamp_fields: if hasattr(ts, field): exprs.append(getattr(ts, field)()) offset = getattr(ibis, field)(2) exprs.append(ts + offset) exprs.append(ts - offset) proj_exprs = [expr.name("e%d" % i) for i, expr in enumerate(exprs)] projection = table[proj_exprs].limit(10) projection.execute()
def test_to_timestamp(backend, con, alltypes, df, unit): if unit not in backend.supported_to_timestamp_units: pytest.skip( 'Unit {!r} not supported by {} to_timestamp'.format(unit, backend) ) backend_unit = backend.returned_timestamp_unit factor = unit_factors[unit] ts = ibis.timestamp('2018-04-13 09:54:11.872832') pandas_ts = ibis.pandas.execute(ts).floor(unit).value # convert the now timestamp to the input unit being tested int_expr = ibis.literal(pandas_ts // factor) expr = int_expr.to_timestamp(unit) result = con.execute(expr) expected = pd.Timestamp(pandas_ts, unit='ns').floor(backend_unit) assert result == expected
def test_timestamp_builtins(self): i32 = L(50000) i64 = L(5 * 10 ** 8) stamp = ibis.timestamp('2009-05-17 12:34:56') timestamp_cases = [ (i32.to_timestamp('s'), pd.to_datetime(50000, unit='s')), (i32.to_timestamp('ms'), pd.to_datetime(50000, unit='ms')), (i64.to_timestamp(), pd.to_datetime(5 * 10 ** 8, unit='s')), (stamp.truncate('y'), pd.Timestamp('2009-01-01')), (stamp.truncate('m'), pd.Timestamp('2009-05-01')), (stamp.truncate('d'), pd.Timestamp('2009-05-17')), (stamp.truncate('h'), pd.Timestamp('2009-05-17 12:00')), (stamp.truncate('minute'), pd.Timestamp('2009-05-17 12:34')) ] self.assert_cases_equality(timestamp_cases)
def test_uda_primitive_output_types(self): types = [ ('boolean', True, self.b), ('int8', 1, self.i8), ('int16', 1, self.i16), ('int32', 1, self.i32), ('int64', 1, self.i64), ('float', 1.0, self.f), ('double', 1.0, self.d), ('string', '1', self.s), ('timestamp', ibis.timestamp('1961-04-10'), self.t) ] for t, sv, av in types: func = self._register_uda([t], t, 'test') ibis_type = validate_type(t) expr1 = func(sv) expr2 = func(sv) assert isinstance(expr1, ibis_type.scalar_type()) assert isinstance(expr2, ibis_type.scalar_type())
(literal('2017-04-01'), date(2017, 4, 2)), (date(2017, 4, 2), literal('2017-04-01')), (literal('2017-04-01 01:02:33'), datetime(2017, 4, 1, 1, 3, 34)), (datetime(2017, 4, 1, 1, 3, 34), literal('2017-04-01 01:02:33')), ], ) @pytest.mark.parametrize( 'op', [ operator.eq, operator.ne, operator.lt, operator.le, operator.gt, operator.ge, lambda left, right: ibis.timestamp('2017-04-01 00:02:34').between( left, right), lambda left, right: ibis.timestamp('2017-04-01').cast(dt.date).between( left, right), ], ) def test_string_temporal_compare(op, left, right): result = op(left, right) assert result.type().equals(dt.boolean) @pytest.mark.parametrize( ('value', 'type', 'expected_type_class'), [ (2.21, 'decimal', dt.Decimal), (3.14, 'double', dt.Double), (4.2, 'int64', dt.Double),
def test_timestamp_truncate(con, translate, unit, expected): stamp = ibis.timestamp('2009-05-17 12:34:56') expr = stamp.truncate(unit) assert con.execute(expr) == expected
def test_timestamp(self): func = self._udf_registration_single_input("timestamp", "timestamp", "test") expr = func(ibis.timestamp("1961-04-10")) assert type(expr) == ir.TimestampScalar expr = func(self.t) assert type(expr) == ir.TimestampArray
def test_large_timestamp(): expr = ibis.timestamp('4567-02-03') expected = datetime(year=4567, month=2, day=3) result = expr.op().value assert result == expected
lambda t, be: t.timestamp_col - pd.Timedelta(days=17), id='timestamp-subtract-interval', ), param( lambda t, be: t.timestamp_col.date() + ibis.interval(days=4), lambda t, be: t.timestamp_col.dt.floor('d') + pd.Timedelta(days=4), id='date-add-interval', ), param( lambda t, be: t.timestamp_col.date() - ibis.interval(days=14), lambda t, be: t.timestamp_col.dt.floor('d') - pd.Timedelta(days=14), id='date-subtract-interval', ), param( lambda t, be: t.timestamp_col - ibis.timestamp(timestamp_value), lambda t, be: pd.Series( t.timestamp_col.sub(timestamp_value).values.astype( 'timedelta64[{}]'.format(be.returned_timestamp_unit) ) ), id='timestamp-subtract-timestamp', ), param( lambda t, be: t.timestamp_col.date() - ibis.date(date_value), lambda t, be: t.timestamp_col.dt.floor('d') - date_value, id='date-subtract-date', ), ], ) @tu.skipif_unsupported
def test_timestamp_with_timezone(tz): expr = ibis.timestamp('2017-01-01', timezone=tz) expected = pd.Timestamp('2017-01-01', tz=tz) result = expr.op().value assert expected == result
def test_integer_timestamp_fails(value): with pytest.raises( TypeError, match=r"Use ibis\.literal\(-?\d+\)\.to_timestamp" ): ibis.timestamp(value)
def test_timestamp_timezone_type(tz): expr = ibis.timestamp('2017-01-01', timezone=tz) expected = dt.Timestamp(timezone=tz) assert expected == expr.op().dtype
def test_timestamp_wrapping(self): col = self.alltypes.timestamp_col literal = ibis.timestamp("1961-04-10") self._identity_func_testing("timestamp", literal, col)
def uda_so(udfcon, test_data_dir): return pjoin(test_data_dir, 'udf/libudasample.so') @pytest.mark.parametrize( ('typ', 'lit_val', 'col_name'), [ ('boolean', True, 'bool_col'), ('int8', ibis.literal(5), 'tinyint_col'), ('int16', ibis.literal(2 ** 10), 'smallint_col'), ('int32', ibis.literal(2 ** 17), 'int_col'), ('int64', ibis.literal(2 ** 33), 'bigint_col'), ('float', ibis.literal(3.14), 'float_col'), ('double', ibis.literal(3.14), 'double_col'), ('string', ibis.literal('ibis'), 'string_col'), ('timestamp', ibis.timestamp('1961-04-10'), 'timestamp_col'), ], ) def test_identity_primitive_types( udfcon, alltypes, test_data_db, udf_ll, typ, lit_val, col_name ): col_val = alltypes[col_name] identity_func_testing(udf_ll, udfcon, test_data_db, typ, lit_val, col_val) def test_decimal(udfcon, test_data_db, udf_ll): col = udfcon.table('tpch_customer').c_acctbal literal = ibis.literal(1).cast('decimal(12,2)') name = '__tmp_udf_' + util.guid() func = udf_creation_to_op(
(date(2017, 4, 2), literal('2017-04-01')), (literal('2017-04-01 01:02:33'), datetime(2017, 4, 1, 1, 3, 34)), (datetime(2017, 4, 1, 1, 3, 34), literal('2017-04-01 01:02:33')), ], ) @pytest.mark.parametrize( 'op', [ operator.eq, operator.ne, operator.lt, operator.le, operator.gt, operator.ge, lambda left, right: ibis.timestamp('2017-04-01 00:02:34').between( left, right ), lambda left, right: ibis.timestamp('2017-04-01') .cast(dt.date) .between(left, right), ], ) def test_string_temporal_compare(op, left, right): result = op(left, right) assert result.type().equals(dt.boolean) @pytest.mark.parametrize( ('value', 'type', 'expected_type_class'), [ (2.21, 'decimal', dt.Decimal),
def test_invalid_timestamp_literal(): with pytest.raises(ValueError): ibis.timestamp('2015-01-01 00:71')
def test_large_timestamp(client): huge_timestamp = datetime.datetime(year=4567, month=1, day=1) expr = ibis.timestamp('4567-01-01 00:00:00') result = client.execute(expr) assert result == huge_timestamp