def test_interval(unit): definition = f"interval('{unit}')" dt.Interval(unit, dt.int32) == dt.dtype(definition) definition = f"interval<uint16>('{unit}')" dt.Interval(unit, dt.uint16) == dt.dtype(definition) definition = f"interval<int64>('{unit}')" dt.Interval(unit, dt.int64) == dt.dtype(definition)
def test_interval(unit): definition = "interval('{}')".format(unit) dt.Interval(unit, dt.int32) == dt.dtype(definition) definition = "interval<uint16>('{}')".format(unit) dt.Interval(unit, dt.uint16) == dt.dtype(definition) definition = "interval<int64>('{}')".format(unit) dt.Interval(unit, dt.int64) == dt.dtype(definition)
def output_dtype(self): dtype = self.arg.type() return dt.Interval( unit=self.unit, value_type=dtype.value_type, nullable=dtype.nullable, )
def test_interval_invalid_unit(unit): definition = f"interval('{unit}')" with pytest.raises(ValueError): dt.dtype(definition) with pytest.raises(ValueError): dt.Interval(dt.int32, unit)
def test_interval_unvalid_unit(unit): definition = "interval('{}')".format(unit) with pytest.raises(ValueError): dt.dtype(definition) with pytest.raises(ValueError): dt.Interval(dt.int32, unit)
def _timestamp_truncate(translator, expr): op = expr.op() arg, unit = op.args unit_ = dt.Interval(unit=unit).resolution.upper() # return _call_date_trunc(translator, converter, arg) arg_ = translator.translate(arg) return 'DATE_TRUNC({0!s}, {1!s})'.format(unit_, arg_)
def sa_postgres_interval(_, satype, nullable=True): field = satype.fields.upper() unit = POSTGRES_FIELD_TO_IBIS_UNIT.get(field, None) if unit is None: raise ValueError( "Unknown PostgreSQL interval field {!r}".format(field)) elif unit in {"Y", "M"}: raise ValueError( "Variable length timedeltas are not yet supported with PostgreSQL") return dt.Interval(unit=unit, nullable=nullable)
def output_dtype(self): integer_args = [ arg.cast(arg.type().value_type) if isinstance(arg.type(), dt.Interval) else arg for arg in self.args ] value_dtype = rlz._promote_numeric_binop(integer_args, self.op) left_dtype = self.left.type() return dt.Interval( unit=left_dtype.unit, value_type=value_dtype, nullable=left_dtype.nullable, )
def test_time_arithmetics(): t1 = api.time('18:00') t2 = api.time('19:12') i1 = api.interval(minutes=3) for expr in [t1 - t2, t2 - t1]: assert isinstance(expr, ir.IntervalScalar) assert isinstance(expr.op(), ops.TimeDiff) assert expr.type() == dt.Interval('s', dt.int32) for expr in [t1 - i1, t2 - i1]: assert isinstance(expr, ir.TimeScalar) assert isinstance(expr.op(), ops.TimeSub) for expr in [t1 + i1, t2 + i1]: assert isinstance(expr, ir.TimeScalar) assert isinstance(expr.op(), ops.TimeAdd)
def test_date_arithmetics(): d1 = api.date('2015-01-02') d2 = api.date('2017-01-01') i1 = api.interval(weeks=3) for expr in [d1 - d2, d2 - d1]: assert isinstance(expr, ir.IntervalScalar) assert isinstance(expr.op(), ops.DateDiff) assert expr.type() == dt.Interval('D', dt.int32) for expr in [d1 - i1, d2 - i1]: assert isinstance(expr, ir.DateScalar) assert isinstance(expr.op(), ops.DateSub) for expr in [d1 + i1, d2 + i1]: assert isinstance(expr, ir.DateScalar) assert isinstance(expr.op(), ops.DateAdd)
def test_timestamp_arithmetics(): ts1 = api.timestamp(datetime.datetime.now()) ts2 = api.timestamp(datetime.datetime.today()) i1 = api.interval(minutes=30) # TODO: raise for unsupported operations too for expr in [ts2 - ts1, ts1 - ts2]: assert isinstance(expr, ir.IntervalScalar) assert isinstance(expr.op(), ops.TimestampDiff) assert expr.type() == dt.Interval('s', dt.int32) for expr in [ts1 - i1, ts2 - i1]: assert isinstance(expr, ir.TimestampScalar) assert isinstance(expr.op(), ops.TimestampSub) for expr in [ts1 + i1, ts2 + i1]: assert isinstance(expr, ir.TimestampScalar) assert isinstance(expr.op(), ops.TimestampAdd)
class TimeDiff(Binary): left = rlz.time right = rlz.time output_dtype = dt.Interval('s')
(np.int64([102, 67228734, -0]), dt.int64), (np.float32([45e-3, -0.4, 99.]), dt.float), (np.float64([-3e43, 43., 10000000.]), dt.double), (['foo', 'bar', 'hello'], dt.string), ([ pd.Timestamp('2010-11-01 00:01:00'), pd.Timestamp('2010-11-01 00:02:00.1000'), pd.Timestamp('2010-11-01 00:03:00.300000') ], dt.timestamp), (pd.date_range('20130101', periods=3, tz='US/Eastern'), dt.timestamp('US/Eastern')), ([ pd.Timedelta('1 days'), pd.Timedelta('-1 days 2 min 3us'), pd.Timedelta('-2 days +23:57:59.999997') ], dt.Interval('ns')), (pd.Series(['a', 'b', 'c', 'a']).astype('category'), dt.Category())]) def test_infer_simple_dataframe(column, expected_dtype): df = pd.DataFrame({'col': column}) assert sch.infer(df) == ibis.schema([('col', expected_dtype)]) def test_infer_exhaustive_dataframe(): df = pd.DataFrame({ 'bigint_col': np.array([0, 10, 20, 30, 40, 50, 60, 70, 80, 90], dtype='i8'), 'bool_col': np.array( [True, False, True, False, True, None, True, False, True, False], dtype=np.bool_), 'bool_obj_col':
'int32': dt.int32, 'int64': dt.int64, 'uint8': dt.uint8, 'uint16': dt.uint16, 'uint32': dt.uint32, 'uint64': dt.uint64, 'float16': dt.float16, 'float32': dt.float32, 'float64': dt.float64, 'double': dt.double, 'unicode': dt.string, 'str': dt.string, 'datetime64': dt.timestamp, 'datetime64[ns]': dt.timestamp, 'timedelta64': dt.interval, 'timedelta64[ns]': dt.Interval('ns'), }, ) _inferable_pandas_dtypes = { 'boolean': dt.boolean, 'string': dt.string, 'unicode': dt.string, 'bytes': dt.string, 'empty': dt.string, } @dt.dtype.register(np.dtype) def from_numpy_dtype(value): try:
def interval( value: int | datetime.timedelta | None = None, unit: str = 's', years: int | None = None, quarters: int | None = None, months: int | None = None, weeks: int | None = None, days: int | None = None, hours: int | None = None, minutes: int | None = None, seconds: int | None = None, milliseconds: int | None = None, microseconds: int | None = None, nanoseconds: int | None = None, ) -> ir.IntervalScalar: """Return an interval literal expression. Parameters ---------- value Interval value. If passed, must be combined with `unit`. unit Unit of `value` years Number of years quarters Number of quarters months Number of months weeks Number of weeks days Number of days hours Number of hours minutes Number of minutes seconds Number of seconds milliseconds Number of milliseconds microseconds Number of microseconds nanoseconds Number of nanoseconds Returns ------- IntervalScalar An interval expression """ if value is not None: if isinstance(value, datetime.timedelta): unit = 's' value = int(value.total_seconds()) elif not isinstance(value, int): raise ValueError('Interval value must be an integer') else: kwds = [ ('Y', years), ('Q', quarters), ('M', months), ('W', weeks), ('D', days), ('h', hours), ('m', minutes), ('s', seconds), ('ms', milliseconds), ('us', microseconds), ('ns', nanoseconds), ] defined_units = [(k, v) for k, v in kwds if v is not None] if len(defined_units) != 1: raise ValueError('Exactly one argument is required') unit, value = defined_units[0] value_type = literal(value).type() type = dt.Interval(unit, value_type=value_type) return literal(value, type=type).op().to_expr()
def output_dtype(self): return dt.Interval(self.unit, self.arg.type())
def test_interval_films_schema(con): t = con.table("films") assert t.len.type() == dt.Interval(unit="m") assert t.len.execute().dtype == np.dtype("timedelta64[ns]")
pd.Timestamp('2010-11-01 00:02:00.1000'), pd.Timestamp('2010-11-01 00:03:00.300000'), ], dt.timestamp, ), ( pd.date_range('20130101', periods=3, tz='US/Eastern'), dt.Timestamp('US/Eastern'), ), ( [ pd.Timedelta('1 days'), pd.Timedelta('-1 days 2 min 3us'), pd.Timedelta('-2 days +23:57:59.999997'), ], dt.Interval('ns'), ), (pd.Series(['a', 'b', 'c', 'a']).astype('category'), dt.Category()), ], ) def test_infer_simple_dataframe(column, expected_dtype): df = pd.DataFrame({'col': column}) assert sch.infer(df) == ibis.schema([('col', expected_dtype)]) def test_infer_exhaustive_dataframe(): df = pd.DataFrame( { 'bigint_col': np.array( [0, 10, 20, 30, 40, 50, 60, 70, 80, 90], dtype='i8' ),
assert t.f.type().nullable is False s = t.a + t.d assert s.type().nullable is True s = t.b + t.d assert s.type().nullable is True s = t.b + t.f assert s.type().nullable is False @pytest.mark.parametrize( 'base_expr', [ ibis.table([('interval_col', dt.Interval(unit='D'))]).interval_col, ibis.interval(seconds=42), ], ) def test_interval_negate(base_expr): expr = -base_expr expr2 = base_expr.negate() expr3 = ibis.negate(base_expr) assert isinstance(expr.op(), ops.Negate) assert expr.equals(expr2) assert expr.equals(expr3) def test_large_timestamp(): expr = ibis.timestamp('4567-02-03') expected = datetime(year=4567, month=2, day=3)
class DateDiff(Binary): left = rlz.date right = rlz.date output_dtype = dt.Interval('D')
class Foo(enum.Enum): a = 1 b = 2 @pytest.mark.parametrize( ('value', 'expected_dtype'), [ (None, dt.null), (False, dt.boolean), (True, dt.boolean), ('foo', dt.string), (b'fooblob', dt.binary), (datetime.date.today(), dt.date), (datetime.datetime.now(), dt.timestamp), (datetime.timedelta(days=3), dt.Interval(unit='D')), (pd.Timedelta('5 hours'), dt.Interval(unit='h')), (pd.Timedelta('7 minutes'), dt.Interval(unit='m')), (datetime.timedelta(seconds=9), dt.Interval(unit='s')), (pd.Timedelta('11 milliseconds'), dt.Interval(unit='ms')), (datetime.timedelta(microseconds=15), dt.Interval(unit='us')), (pd.Timedelta('17 nanoseconds'), dt.Interval(unit='ns')), # numeric types (5, dt.int8), (5, dt.int8), (127, dt.int8), (128, dt.int16), (32767, dt.int16), (32768, dt.int32), (2147483647, dt.int32), (2147483648, dt.int64),
def test_interval_invalid_type(): with pytest.raises(TypeError): dt.Interval('m', dt.float32) with pytest.raises(TypeError): dt.dtype("interval<float>('s')")
@pytest.mark.parametrize( ('source', 'target'), [ (dt.any, dt.string), (dt.null, dt.date), (dt.null, dt.any), (dt.int8, dt.int64), (dt.int8, dt.Decimal(12, 2)), (dt.int32, dt.int32), (dt.int32, dt.int64), (dt.uint32, dt.uint64), (dt.uint32, dt.Decimal(12, 2)), (dt.uint32, dt.float32), (dt.uint32, dt.float64), (dt.Interval('s', dt.int16), dt.Interval('s', dt.int32)), ], ) def test_implicit_castable(source, target): assert dt.castable(source, target) @pytest.mark.parametrize( ('source', 'target'), [ (dt.string, dt.null), (dt.int32, dt.int16), (dt.int16, dt.uint64), (dt.Decimal(12, 2), dt.int32), (dt.timestamp, dt.boolean), (dt.boolean, dt.interval),
expected = ibis.schema(ibis_types) assert_equal(schema, expected) def test_interval_films_schema(con): t = con.table("films") assert t.len.type() == dt.Interval(unit="m") assert t.len.execute().dtype == np.dtype("timedelta64[ns]") @pytest.mark.parametrize( ("column", "expected_dtype"), [ # a, b and g are variable length intervals, like YEAR TO MONTH ("c", dt.Interval("D")), ("d", dt.Interval("h")), ("e", dt.Interval("m")), ("f", dt.Interval("s")), ("h", dt.Interval("h")), ("i", dt.Interval("m")), ("j", dt.Interval("s")), ("k", dt.Interval("m")), ("l", dt.Interval("s")), ("m", dt.Interval("s")), ], ) def test_all_interval_types_schema(intervals, column, expected_dtype): assert intervals[column].type() == expected_dtype
class TimestampDiff(Binary): left = rlz.timestamp right = rlz.timestamp output_dtype = dt.Interval('s')
def test_unsupported_intervals(con): t = con.table("not_supported_intervals") assert t["a"].type() == dt.Interval("Y") assert t["b"].type() == dt.Interval("M") assert t["g"].type() == dt.Interval("M")
def get_result(self): promoted_value_type = self._get_type() promoted_type = dt.Interval(self.unit, promoted_value_type) return shape_like_args(self.args, promoted_type)