Ejemplo n.º 1
0
def test_interval(unit):
    definition = f"interval('{unit}')"
    dt.Interval(unit, dt.int32) == dt.dtype(definition)

    definition = f"interval<uint16>('{unit}')"
    dt.Interval(unit, dt.uint16) == dt.dtype(definition)

    definition = f"interval<int64>('{unit}')"
    dt.Interval(unit, dt.int64) == dt.dtype(definition)
Ejemplo n.º 2
0
def test_interval(unit):
    definition = "interval('{}')".format(unit)
    dt.Interval(unit, dt.int32) == dt.dtype(definition)

    definition = "interval<uint16>('{}')".format(unit)
    dt.Interval(unit, dt.uint16) == dt.dtype(definition)

    definition = "interval<int64>('{}')".format(unit)
    dt.Interval(unit, dt.int64) == dt.dtype(definition)
Ejemplo n.º 3
0
 def output_dtype(self):
     dtype = self.arg.type()
     return dt.Interval(
         unit=self.unit,
         value_type=dtype.value_type,
         nullable=dtype.nullable,
     )
Ejemplo n.º 4
0
def test_interval_invalid_unit(unit):
    definition = f"interval('{unit}')"

    with pytest.raises(ValueError):
        dt.dtype(definition)

    with pytest.raises(ValueError):
        dt.Interval(dt.int32, unit)
Ejemplo n.º 5
0
def test_interval_unvalid_unit(unit):
    definition = "interval('{}')".format(unit)

    with pytest.raises(ValueError):
        dt.dtype(definition)

    with pytest.raises(ValueError):
        dt.Interval(dt.int32, unit)
Ejemplo n.º 6
0
def _timestamp_truncate(translator, expr):
    op = expr.op()
    arg, unit = op.args

    unit_ = dt.Interval(unit=unit).resolution.upper()

    # return _call_date_trunc(translator, converter, arg)
    arg_ = translator.translate(arg)
    return 'DATE_TRUNC({0!s}, {1!s})'.format(unit_, arg_)
Ejemplo n.º 7
0
def sa_postgres_interval(_, satype, nullable=True):
    field = satype.fields.upper()
    unit = POSTGRES_FIELD_TO_IBIS_UNIT.get(field, None)
    if unit is None:
        raise ValueError(
            "Unknown PostgreSQL interval field {!r}".format(field))
    elif unit in {"Y", "M"}:
        raise ValueError(
            "Variable length timedeltas are not yet supported with PostgreSQL")
    return dt.Interval(unit=unit, nullable=nullable)
Ejemplo n.º 8
0
 def output_dtype(self):
     integer_args = [
         arg.cast(arg.type().value_type)
         if isinstance(arg.type(), dt.Interval)
         else arg
         for arg in self.args
     ]
     value_dtype = rlz._promote_numeric_binop(integer_args, self.op)
     left_dtype = self.left.type()
     return dt.Interval(
         unit=left_dtype.unit,
         value_type=value_dtype,
         nullable=left_dtype.nullable,
     )
Ejemplo n.º 9
0
def test_time_arithmetics():
    t1 = api.time('18:00')
    t2 = api.time('19:12')
    i1 = api.interval(minutes=3)

    for expr in [t1 - t2, t2 - t1]:
        assert isinstance(expr, ir.IntervalScalar)
        assert isinstance(expr.op(), ops.TimeDiff)
        assert expr.type() == dt.Interval('s', dt.int32)

    for expr in [t1 - i1, t2 - i1]:
        assert isinstance(expr, ir.TimeScalar)
        assert isinstance(expr.op(), ops.TimeSub)

    for expr in [t1 + i1, t2 + i1]:
        assert isinstance(expr, ir.TimeScalar)
        assert isinstance(expr.op(), ops.TimeAdd)
Ejemplo n.º 10
0
def test_date_arithmetics():
    d1 = api.date('2015-01-02')
    d2 = api.date('2017-01-01')
    i1 = api.interval(weeks=3)

    for expr in [d1 - d2, d2 - d1]:
        assert isinstance(expr, ir.IntervalScalar)
        assert isinstance(expr.op(), ops.DateDiff)
        assert expr.type() == dt.Interval('D', dt.int32)

    for expr in [d1 - i1, d2 - i1]:
        assert isinstance(expr, ir.DateScalar)
        assert isinstance(expr.op(), ops.DateSub)

    for expr in [d1 + i1, d2 + i1]:
        assert isinstance(expr, ir.DateScalar)
        assert isinstance(expr.op(), ops.DateAdd)
Ejemplo n.º 11
0
def test_timestamp_arithmetics():
    ts1 = api.timestamp(datetime.datetime.now())
    ts2 = api.timestamp(datetime.datetime.today())

    i1 = api.interval(minutes=30)

    # TODO: raise for unsupported operations too
    for expr in [ts2 - ts1, ts1 - ts2]:
        assert isinstance(expr, ir.IntervalScalar)
        assert isinstance(expr.op(), ops.TimestampDiff)
        assert expr.type() == dt.Interval('s', dt.int32)

    for expr in [ts1 - i1, ts2 - i1]:
        assert isinstance(expr, ir.TimestampScalar)
        assert isinstance(expr.op(), ops.TimestampSub)

    for expr in [ts1 + i1, ts2 + i1]:
        assert isinstance(expr, ir.TimestampScalar)
        assert isinstance(expr.op(), ops.TimestampAdd)
Ejemplo n.º 12
0
class TimeDiff(Binary):
    left = rlz.time
    right = rlz.time
    output_dtype = dt.Interval('s')
Ejemplo n.º 13
0
     (np.int64([102, 67228734, -0]), dt.int64),
     (np.float32([45e-3, -0.4, 99.]), dt.float),
     (np.float64([-3e43, 43., 10000000.]), dt.double),
     (['foo', 'bar', 'hello'], dt.string),
     ([
         pd.Timestamp('2010-11-01 00:01:00'),
         pd.Timestamp('2010-11-01 00:02:00.1000'),
         pd.Timestamp('2010-11-01 00:03:00.300000')
     ], dt.timestamp),
     (pd.date_range('20130101', periods=3,
                    tz='US/Eastern'), dt.timestamp('US/Eastern')),
     ([
         pd.Timedelta('1 days'),
         pd.Timedelta('-1 days 2 min 3us'),
         pd.Timedelta('-2 days +23:57:59.999997')
     ], dt.Interval('ns')),
     (pd.Series(['a', 'b', 'c', 'a']).astype('category'), dt.Category())])
def test_infer_simple_dataframe(column, expected_dtype):
    df = pd.DataFrame({'col': column})
    assert sch.infer(df) == ibis.schema([('col', expected_dtype)])


def test_infer_exhaustive_dataframe():
    df = pd.DataFrame({
        'bigint_col':
        np.array([0, 10, 20, 30, 40, 50, 60, 70, 80, 90], dtype='i8'),
        'bool_col':
        np.array(
            [True, False, True, False, True, None, True, False, True, False],
            dtype=np.bool_),
        'bool_obj_col':
Ejemplo n.º 14
0
        'int32': dt.int32,
        'int64': dt.int64,
        'uint8': dt.uint8,
        'uint16': dt.uint16,
        'uint32': dt.uint32,
        'uint64': dt.uint64,
        'float16': dt.float16,
        'float32': dt.float32,
        'float64': dt.float64,
        'double': dt.double,
        'unicode': dt.string,
        'str': dt.string,
        'datetime64': dt.timestamp,
        'datetime64[ns]': dt.timestamp,
        'timedelta64': dt.interval,
        'timedelta64[ns]': dt.Interval('ns'),
    },
)

_inferable_pandas_dtypes = {
    'boolean': dt.boolean,
    'string': dt.string,
    'unicode': dt.string,
    'bytes': dt.string,
    'empty': dt.string,
}


@dt.dtype.register(np.dtype)
def from_numpy_dtype(value):
    try:
Ejemplo n.º 15
0
def interval(
    value: int | datetime.timedelta | None = None,
    unit: str = 's',
    years: int | None = None,
    quarters: int | None = None,
    months: int | None = None,
    weeks: int | None = None,
    days: int | None = None,
    hours: int | None = None,
    minutes: int | None = None,
    seconds: int | None = None,
    milliseconds: int | None = None,
    microseconds: int | None = None,
    nanoseconds: int | None = None,
) -> ir.IntervalScalar:
    """Return an interval literal expression.

    Parameters
    ----------
    value
        Interval value. If passed, must be combined with `unit`.
    unit
        Unit of `value`
    years
        Number of years
    quarters
        Number of quarters
    months
        Number of months
    weeks
        Number of weeks
    days
        Number of days
    hours
        Number of hours
    minutes
        Number of minutes
    seconds
        Number of seconds
    milliseconds
        Number of milliseconds
    microseconds
        Number of microseconds
    nanoseconds
        Number of nanoseconds

    Returns
    -------
    IntervalScalar
        An interval expression
    """
    if value is not None:
        if isinstance(value, datetime.timedelta):
            unit = 's'
            value = int(value.total_seconds())
        elif not isinstance(value, int):
            raise ValueError('Interval value must be an integer')
    else:
        kwds = [
            ('Y', years),
            ('Q', quarters),
            ('M', months),
            ('W', weeks),
            ('D', days),
            ('h', hours),
            ('m', minutes),
            ('s', seconds),
            ('ms', milliseconds),
            ('us', microseconds),
            ('ns', nanoseconds),
        ]
        defined_units = [(k, v) for k, v in kwds if v is not None]

        if len(defined_units) != 1:
            raise ValueError('Exactly one argument is required')

        unit, value = defined_units[0]

    value_type = literal(value).type()
    type = dt.Interval(unit, value_type=value_type)

    return literal(value, type=type).op().to_expr()
Ejemplo n.º 16
0
 def output_dtype(self):
     return dt.Interval(self.unit, self.arg.type())
Ejemplo n.º 17
0
def test_interval_films_schema(con):
    t = con.table("films")
    assert t.len.type() == dt.Interval(unit="m")
    assert t.len.execute().dtype == np.dtype("timedelta64[ns]")
Ejemplo n.º 18
0
                pd.Timestamp('2010-11-01 00:02:00.1000'),
                pd.Timestamp('2010-11-01 00:03:00.300000'),
            ],
            dt.timestamp,
        ),
        (
            pd.date_range('20130101', periods=3, tz='US/Eastern'),
            dt.Timestamp('US/Eastern'),
        ),
        (
            [
                pd.Timedelta('1 days'),
                pd.Timedelta('-1 days 2 min 3us'),
                pd.Timedelta('-2 days +23:57:59.999997'),
            ],
            dt.Interval('ns'),
        ),
        (pd.Series(['a', 'b', 'c', 'a']).astype('category'), dt.Category()),
    ],
)
def test_infer_simple_dataframe(column, expected_dtype):
    df = pd.DataFrame({'col': column})
    assert sch.infer(df) == ibis.schema([('col', expected_dtype)])


def test_infer_exhaustive_dataframe():
    df = pd.DataFrame(
        {
            'bigint_col': np.array(
                [0, 10, 20, 30, 40, 50, 60, 70, 80, 90], dtype='i8'
            ),
Ejemplo n.º 19
0
    assert t.f.type().nullable is False

    s = t.a + t.d
    assert s.type().nullable is True

    s = t.b + t.d
    assert s.type().nullable is True

    s = t.b + t.f
    assert s.type().nullable is False


@pytest.mark.parametrize(
    'base_expr',
    [
        ibis.table([('interval_col', dt.Interval(unit='D'))]).interval_col,
        ibis.interval(seconds=42),
    ],
)
def test_interval_negate(base_expr):
    expr = -base_expr
    expr2 = base_expr.negate()
    expr3 = ibis.negate(base_expr)
    assert isinstance(expr.op(), ops.Negate)
    assert expr.equals(expr2)
    assert expr.equals(expr3)


def test_large_timestamp():
    expr = ibis.timestamp('4567-02-03')
    expected = datetime(year=4567, month=2, day=3)
Ejemplo n.º 20
0
class DateDiff(Binary):
    left = rlz.date
    right = rlz.date
    output_dtype = dt.Interval('D')
Ejemplo n.º 21
0
class Foo(enum.Enum):
    a = 1
    b = 2


@pytest.mark.parametrize(
    ('value', 'expected_dtype'),
    [
        (None, dt.null),
        (False, dt.boolean),
        (True, dt.boolean),
        ('foo', dt.string),
        (b'fooblob', dt.binary),
        (datetime.date.today(), dt.date),
        (datetime.datetime.now(), dt.timestamp),
        (datetime.timedelta(days=3), dt.Interval(unit='D')),
        (pd.Timedelta('5 hours'), dt.Interval(unit='h')),
        (pd.Timedelta('7 minutes'), dt.Interval(unit='m')),
        (datetime.timedelta(seconds=9), dt.Interval(unit='s')),
        (pd.Timedelta('11 milliseconds'), dt.Interval(unit='ms')),
        (datetime.timedelta(microseconds=15), dt.Interval(unit='us')),
        (pd.Timedelta('17 nanoseconds'), dt.Interval(unit='ns')),
        # numeric types
        (5, dt.int8),
        (5, dt.int8),
        (127, dt.int8),
        (128, dt.int16),
        (32767, dt.int16),
        (32768, dt.int32),
        (2147483647, dt.int32),
        (2147483648, dt.int64),
Ejemplo n.º 22
0
def test_interval_invalid_type():
    with pytest.raises(TypeError):
        dt.Interval('m', dt.float32)

    with pytest.raises(TypeError):
        dt.dtype("interval<float>('s')")
Ejemplo n.º 23
0
@pytest.mark.parametrize(
    ('source', 'target'),
    [
        (dt.any, dt.string),
        (dt.null, dt.date),
        (dt.null, dt.any),
        (dt.int8, dt.int64),
        (dt.int8, dt.Decimal(12, 2)),
        (dt.int32, dt.int32),
        (dt.int32, dt.int64),
        (dt.uint32, dt.uint64),
        (dt.uint32, dt.Decimal(12, 2)),
        (dt.uint32, dt.float32),
        (dt.uint32, dt.float64),
        (dt.Interval('s', dt.int16), dt.Interval('s', dt.int32)),
    ],
)
def test_implicit_castable(source, target):
    assert dt.castable(source, target)


@pytest.mark.parametrize(
    ('source', 'target'),
    [
        (dt.string, dt.null),
        (dt.int32, dt.int16),
        (dt.int16, dt.uint64),
        (dt.Decimal(12, 2), dt.int32),
        (dt.timestamp, dt.boolean),
        (dt.boolean, dt.interval),
Ejemplo n.º 24
0
    expected = ibis.schema(ibis_types)

    assert_equal(schema, expected)


def test_interval_films_schema(con):
    t = con.table("films")
    assert t.len.type() == dt.Interval(unit="m")
    assert t.len.execute().dtype == np.dtype("timedelta64[ns]")


@pytest.mark.parametrize(
    ("column", "expected_dtype"),
    [
        # a, b and g are variable length intervals, like YEAR TO MONTH
        ("c", dt.Interval("D")),
        ("d", dt.Interval("h")),
        ("e", dt.Interval("m")),
        ("f", dt.Interval("s")),
        ("h", dt.Interval("h")),
        ("i", dt.Interval("m")),
        ("j", dt.Interval("s")),
        ("k", dt.Interval("m")),
        ("l", dt.Interval("s")),
        ("m", dt.Interval("s")),
    ],
)
def test_all_interval_types_schema(intervals, column, expected_dtype):
    assert intervals[column].type() == expected_dtype

Ejemplo n.º 25
0
class TimestampDiff(Binary):
    left = rlz.timestamp
    right = rlz.timestamp
    output_dtype = dt.Interval('s')
Ejemplo n.º 26
0
def test_unsupported_intervals(con):
    t = con.table("not_supported_intervals")
    assert t["a"].type() == dt.Interval("Y")
    assert t["b"].type() == dt.Interval("M")
    assert t["g"].type() == dt.Interval("M")
Ejemplo n.º 27
0
 def get_result(self):
     promoted_value_type = self._get_type()
     promoted_type = dt.Interval(self.unit, promoted_value_type)
     return shape_like_args(self.args, promoted_type)