def test_frame_slice(): assert_series_equal(compute(t[0], df), df.iloc[0]) assert_series_equal(compute(t[2], df), df.iloc[2]) tm.assert_frame_equal(compute(t[:2], df), df.iloc[:2]) tm.assert_frame_equal(compute(t[1:3], df), df.iloc[1:3]) tm.assert_frame_equal(compute(t[1::2], df), df.iloc[1::2]) tm.assert_frame_equal(compute(t[[2, 0]], df), df.iloc[[2, 0]])
def test_time_field(): data = pd.Series(pd.date_range(start='20120101', end='20120102', freq='H')) s = symbol('s', discover(data)) result = compute(s.time, data) expected = data.dt.time expected.name = 's_time' assert_series_equal(result, expected)
def test_datetime_truncation_days(): data = Series(['2000-01-01T12:10:00Z', '2000-06-25T12:35:12Z'], dtype='M8[ns]') s = symbol('s', 'var * datetime') result = compute(s.truncate(days=3), data) expected = Series(['1999-12-31', '2000-06-25'], dtype='M8[ns]', name='s') assert_series_equal(result, expected)
def test_str_predicates(what, expected): predicate = 'is' + what expr = getattr(t.name.str, predicate)() expected = pd.Series([expected, expected, expected], name='name') result = compute(expr, df).reset_index(drop=True) assert_series_equal(expected, result) assert discover(result).measure == expr.dshape.measure
def test_coerce_series_string_datetime(d, tp, ptp): s = pd.Series(d, name='a') e = symbol('t', discover(s)).coerce(to=tp) assert e.schema == dshape(tp) result = compute(e, s) expected = s.astype(ptp) assert_series_equal(result, expected)
def test_subsecond(sql_with_subsecond_dts): """Verify that `.second` returns a value with subsecond resolution and does not truncate to the second. """ t = data(sql_with_subsecond_dts) result = compute(t.A.second, sql_with_subsecond_dts, return_type=pd.Series) assert_series_equal(result, pd.Series([0.042, 0.047], name='A_second'))
def test_datetime_access(attr, sql_with_dts): s = symbol('s', discover(sql_with_dts)) expr = getattr(s.A.dt, attr)() assert_series_equal( compute(expr, sql_with_dts, return_type=pd.Series), getattr(compute(s.A, sql_with_dts, return_type=pd.Series).dt, attr), check_names=False, )
def test_datetime_truncation_nanoseconds(): data = Series(['2000-01-01T12:10:00.000000005', '2000-01-01T12:10:00.000000025'], dtype='M8[ns]') s = symbol('s', 'var * datetime') expected = Series(['2000-01-01T12:10:00.000000000', '2000-01-01T12:10:00.000000020'], dtype='M8[ns]', name='s') result = compute(s.truncate(nanoseconds=20), data) assert_series_equal(result, expected)
def test_str_ops(ds, op, args, data, expected): df = pd.Series(data, name='name') sym = symbol('t', datashape.var * datashape.R['name': ds]) expr = getattr(sym.name.str, op)(*args) expected = pd.Series(expected, name='name') result = compute(expr, df).reset_index(drop=True) assert_series_equal(expected, result) # Test that the option / non-option dshape of the column passes through to # the expression's dshape. assert sym.dshape.measure.dict['name'].measure == expr.dshape.measure
def test_str_predicates(what, expected): predicate = 'is' + what expr = getattr(nt.name.str, predicate)() expected = pd.Series([expected, expected, None], name='name') result = compute(expr, ndf).reset_index(drop=True) assert_series_equal(expected, result) # 'discover' reports an incorrect value here... #assert discover(result).measure == expr.dshape.measure # ...so use a hardcoded one instead. assert str(expr.dshape.measure) == '?bool'
def test_datetime_access(attr, dtype, sql_with_dts): s = symbol('s', discover(sql_with_dts)) expr = getattr(s.A.dt, attr)() result = compute(expr, sql_with_dts, return_type=pd.Series) assert result.dtype == dtype assert_series_equal( result, getattr(compute(s.A, sql_with_dts, return_type=pd.Series).dt, attr), check_names=False, check_dtype=False, )
def test_datetime_access(): df = DataFrame({'name': ['Alice', 'Bob', 'Joe'], 'when': [datetime(2010, 1, 1, 1, 1, 1)] * 3, 'amount': [100, 200, 300], 'id': [1, 2, 3]}) t = symbol('t', discover(df)) for attr in ['day', 'month', 'minute', 'second']: expr = getattr(t.when, attr) assert_series_equal(compute(expr, df), Series([1, 1, 1], name=expr._name))
def test_datetime_access(attr): df = DataFrame({'name': ['Alice', 'Bob', 'Joe'], # 2002 is used because the dayofyear 1 is the same as # dayofweek 1 'when': [datetime(2002, 1, 1, 1, 1, 1)] * 3, 'amount': [100, 200, 300], 'id': [1, 2, 3]}) t = symbol('t', discover(df)) expr = getattr(t.when.dt, attr)() assert_series_equal(compute(expr, df), Series([1, 1, 1], name=expr._name))
def test_sort_on_series_no_warning(recwarn): expected = df.amount.order() recwarn.clear() assert_series_equal(compute(t['amount'].sort('amount'), df), expected) # raises as assertion error if no warning occurs, same thing for below with pytest.raises(AssertionError): assert recwarn.pop(FutureWarning) assert_series_equal(compute(t['amount'].sort(), df), expected) with pytest.raises(AssertionError): assert recwarn.pop(FutureWarning)
def test_arithmetic(): assert_series_equal(compute(t['amount'] + t['id'], df), df.amount + df.id) assert_series_equal(compute(t['amount'] * t['id'], df), df.amount * df.id) assert_series_equal(compute(t['amount'] % t['id'], df), df.amount % df.id)
def test_map_column(): inc = lambda x: x + 1 result = compute(t['amount'].map(inc, 'int'), df) expected = df['amount'] + 1 assert_series_equal(result, expected)
def test_selection_out_of_order(): expr = t['name'][t['amount'] < 100] expected = df.loc[df.amount < 100, 'name'] result = compute(expr, df) assert_series_equal(result, expected)
def test_frame_broadcast(): bcast = broadcast_collect(expr=t.amount * t.id) result = compute(bcast, df) assert_series_equal(result, df.amount * df.id)
def test_series_slice(): assert compute(t.amount[0], df) == df.amount.iloc[0] assert compute(t.amount[2], df) == df.amount.iloc[2] assert_series_equal(compute(t.amount[:2], df), df.amount.iloc[:2]) assert_series_equal(compute(t.amount[1:3], df), df.amount.iloc[1:3]) assert_series_equal(compute(t.amount[1::2], df), df.amount.iloc[1::2])
def test_strlen(): expr = t.name.strlen() expected = pd.Series([5, 3, 5], name='name') result = compute(expr, df).reset_index(drop=True) assert_series_equal(expected, result)
def test_series_columnwise(): s = Series([1, 2, 3], name='a') t = symbol('t', 'var * {a: int64}') result = compute(t.a + 1, s) assert_series_equal(s + 1, result)
def test_count_keepdims_frame(): df = pd.DataFrame(dict(a=[1, 2, 3, np.nan])) s = symbol('s', discover(df)) assert_series_equal(compute(s.count(keepdims=True), df), pd.Series([df.shape[0]], name='s_count'))
def test_coerce_series(): s = pd.Series(list('123'), name='a') t = symbol('t', discover(s)) result = compute(t.coerce(to='int64'), s) expected = pd.Series([1, 2, 3], name=s.name) assert_series_equal(result, expected)
def test_map(): f = lambda _, amt, id: amt + id result = compute(t.map(f, 'real'), df) expected = df['amount'] + df['id'] assert_series_equal(result, expected)
def test_eq(): assert_series_equal(compute(t['amount'] == 100, df), df['amount'] == 100)
def test_summary(): expr = summary(count=t.id.count(), sum=t.amount.sum()) assert_series_equal(compute(expr, df), Series({'count': 3, 'sum': 350}))
def test_shift(n): data = pd.Series(pd.date_range(start='20120101', end='20120102', freq='H')) s = symbol('s', discover(data)) result = compute(s.shift(n), data) expected = data.shift(n) assert_series_equal(result, expected)
def test_neg(): assert_series_equal(compute(-t['amount'], df), -df['amount'])
def test_neg_projection(): assert_series_equal(compute(-t[['amount', 'id']], df), -df[['amount', 'id']])
def test_label(): expected = df['amount'] * 10 expected.name = 'foo' assert_series_equal(compute((t['amount'] * 10).label('foo'), df), expected)
def test_field_on_series(): expr = symbol('s', 'var * int') data = Series([1, 2, 3, 4], name='s') assert_series_equal(compute(expr.s, data), data)