def test_frame_slice():
    assert_series_equal(compute(t[0], df), df.iloc[0])
    assert_series_equal(compute(t[2], df), df.iloc[2])
    tm.assert_frame_equal(compute(t[:2], df), df.iloc[:2])
    tm.assert_frame_equal(compute(t[1:3], df), df.iloc[1:3])
    tm.assert_frame_equal(compute(t[1::2], df), df.iloc[1::2])
    tm.assert_frame_equal(compute(t[[2, 0]], df), df.iloc[[2, 0]])
def test_time_field():
    data = pd.Series(pd.date_range(start='20120101', end='20120102', freq='H'))
    s = symbol('s', discover(data))
    result = compute(s.time, data)
    expected = data.dt.time
    expected.name = 's_time'
    assert_series_equal(result, expected)
def test_datetime_truncation_days():
    data = Series(['2000-01-01T12:10:00Z', '2000-06-25T12:35:12Z'],
                  dtype='M8[ns]')
    s = symbol('s', 'var * datetime')
    result = compute(s.truncate(days=3), data)
    expected = Series(['1999-12-31', '2000-06-25'], dtype='M8[ns]', name='s')
    assert_series_equal(result, expected)
def test_datetime_truncation_days():
    data = Series(['2000-01-01T12:10:00Z', '2000-06-25T12:35:12Z'],
                  dtype='M8[ns]')
    s = symbol('s', 'var * datetime')
    result = compute(s.truncate(days=3), data)
    expected = Series(['1999-12-31', '2000-06-25'], dtype='M8[ns]', name='s')
    assert_series_equal(result, expected)
Exemple #5
0
def test_str_predicates(what, expected):
    predicate = 'is' + what
    expr = getattr(t.name.str, predicate)()
    expected = pd.Series([expected, expected, expected], name='name')
    result = compute(expr, df).reset_index(drop=True)
    assert_series_equal(expected, result)
    assert discover(result).measure == expr.dshape.measure
def test_time_field():
    data = pd.Series(pd.date_range(start='20120101', end='20120102', freq='H'))
    s = symbol('s', discover(data))
    result = compute(s.time, data)
    expected = data.dt.time
    expected.name = 's_time'
    assert_series_equal(result, expected)
Exemple #7
0
def test_coerce_series_string_datetime(d, tp, ptp):
    s = pd.Series(d, name='a')
    e = symbol('t', discover(s)).coerce(to=tp)
    assert e.schema == dshape(tp)
    result = compute(e, s)
    expected = s.astype(ptp)
    assert_series_equal(result, expected)
def test_subsecond(sql_with_subsecond_dts):
    """Verify that `.second` returns a value with subsecond resolution and does not
    truncate to the second.
    """
    t = data(sql_with_subsecond_dts)
    result = compute(t.A.second, sql_with_subsecond_dts, return_type=pd.Series)
    assert_series_equal(result, pd.Series([0.042, 0.047], name='A_second'))
Exemple #9
0
def test_frame_slice():
    assert_series_equal(compute(t[0], df), df.iloc[0])
    assert_series_equal(compute(t[2], df), df.iloc[2])
    tm.assert_frame_equal(compute(t[:2], df), df.iloc[:2])
    tm.assert_frame_equal(compute(t[1:3], df), df.iloc[1:3])
    tm.assert_frame_equal(compute(t[1::2], df), df.iloc[1::2])
    tm.assert_frame_equal(compute(t[[2, 0]], df), df.iloc[[2, 0]])
def test_datetime_access(attr, sql_with_dts):
    s = symbol('s', discover(sql_with_dts))
    expr = getattr(s.A.dt, attr)()
    assert_series_equal(
        compute(expr, sql_with_dts, return_type=pd.Series),
        getattr(compute(s.A, sql_with_dts, return_type=pd.Series).dt, attr),
        check_names=False,
    )
def test_datetime_truncation_nanoseconds():
    data = Series(['2000-01-01T12:10:00.000000005',
                   '2000-01-01T12:10:00.000000025'],
                  dtype='M8[ns]')
    s = symbol('s', 'var * datetime')
    expected = Series(['2000-01-01T12:10:00.000000000',
                       '2000-01-01T12:10:00.000000020'],
                      dtype='M8[ns]', name='s')
    result = compute(s.truncate(nanoseconds=20), data)
    assert_series_equal(result, expected)
Exemple #12
0
def test_datetime_truncation_nanoseconds():
    data = Series(['2000-01-01T12:10:00.000000005',
                   '2000-01-01T12:10:00.000000025'],
                  dtype='M8[ns]')
    s = symbol('s', 'var * datetime')
    expected = Series(['2000-01-01T12:10:00.000000000',
                       '2000-01-01T12:10:00.000000020'],
                      dtype='M8[ns]', name='s')
    result = compute(s.truncate(nanoseconds=20), data)
    assert_series_equal(result, expected)
Exemple #13
0
def test_str_ops(ds, op, args, data, expected):
    df = pd.Series(data, name='name')
    sym = symbol('t', datashape.var * datashape.R['name': ds])
    expr = getattr(sym.name.str, op)(*args)
    expected = pd.Series(expected, name='name')
    result = compute(expr, df).reset_index(drop=True)
    assert_series_equal(expected, result)
    # Test that the option / non-option dshape of the column passes through to
    # the expression's dshape.
    assert sym.dshape.measure.dict['name'].measure == expr.dshape.measure
Exemple #14
0
def test_str_predicates(what, expected):
    predicate = 'is' + what
    expr = getattr(nt.name.str, predicate)()
    expected = pd.Series([expected, expected, None], name='name')
    result = compute(expr, ndf).reset_index(drop=True)
    assert_series_equal(expected, result)
    # 'discover' reports an incorrect value here...
    #assert discover(result).measure == expr.dshape.measure
    # ...so use a hardcoded one instead.
    assert str(expr.dshape.measure) == '?bool'
def test_datetime_access(attr, dtype, sql_with_dts):
    s = symbol('s', discover(sql_with_dts))
    expr = getattr(s.A.dt, attr)()
    result = compute(expr, sql_with_dts, return_type=pd.Series)
    assert result.dtype == dtype
    assert_series_equal(
        result,
        getattr(compute(s.A, sql_with_dts, return_type=pd.Series).dt, attr),
        check_names=False,
        check_dtype=False,
    )
Exemple #16
0
def test_datetime_access():
    df = DataFrame({'name': ['Alice', 'Bob', 'Joe'],
                    'when': [datetime(2010, 1, 1, 1, 1, 1)] * 3,
                    'amount': [100, 200, 300],
                    'id': [1, 2, 3]})

    t = symbol('t', discover(df))

    for attr in ['day', 'month', 'minute', 'second']:
        expr = getattr(t.when, attr)
        assert_series_equal(compute(expr, df),
                            Series([1, 1, 1], name=expr._name))
def test_datetime_access(attr):
    df = DataFrame({'name': ['Alice', 'Bob', 'Joe'],
                    # 2002 is used because the dayofyear 1 is the same as
                    # dayofweek 1
                    'when': [datetime(2002, 1, 1, 1, 1, 1)] * 3,
                    'amount': [100, 200, 300],
                    'id': [1, 2, 3]})

    t = symbol('t', discover(df))
    expr = getattr(t.when.dt, attr)()
    assert_series_equal(compute(expr, df),
                        Series([1, 1, 1], name=expr._name))
def test_datetime_access():
    df = DataFrame({'name': ['Alice', 'Bob', 'Joe'],
                    'when': [datetime(2010, 1, 1, 1, 1, 1)] * 3,
                    'amount': [100, 200, 300],
                    'id': [1, 2, 3]})

    t = symbol('t', discover(df))

    for attr in ['day', 'month', 'minute', 'second']:
        expr = getattr(t.when, attr)
        assert_series_equal(compute(expr, df),
                            Series([1, 1, 1], name=expr._name))
Exemple #19
0
def test_sort_on_series_no_warning(recwarn):
    expected = df.amount.order()

    recwarn.clear()

    assert_series_equal(compute(t['amount'].sort('amount'), df), expected)

    # raises as assertion error if no warning occurs, same thing for below
    with pytest.raises(AssertionError):
        assert recwarn.pop(FutureWarning)

    assert_series_equal(compute(t['amount'].sort(), df), expected)
    with pytest.raises(AssertionError):
        assert recwarn.pop(FutureWarning)
def test_sort_on_series_no_warning(recwarn):
    expected = df.amount.order()

    recwarn.clear()

    assert_series_equal(compute(t['amount'].sort('amount'), df), expected)

    # raises as assertion error if no warning occurs, same thing for below
    with pytest.raises(AssertionError):
        assert recwarn.pop(FutureWarning)

    assert_series_equal(compute(t['amount'].sort(), df), expected)
    with pytest.raises(AssertionError):
        assert recwarn.pop(FutureWarning)
def test_arithmetic():
    assert_series_equal(compute(t['amount'] + t['id'], df),
                           df.amount + df.id)
    assert_series_equal(compute(t['amount'] * t['id'], df),
                           df.amount * df.id)
    assert_series_equal(compute(t['amount'] % t['id'], df),
                           df.amount % df.id)
Exemple #22
0
def test_arithmetic():
    assert_series_equal(compute(t['amount'] + t['id'], df),
                           df.amount + df.id)
    assert_series_equal(compute(t['amount'] * t['id'], df),
                           df.amount * df.id)
    assert_series_equal(compute(t['amount'] % t['id'], df),
                           df.amount % df.id)
def test_map_column():
    inc = lambda x: x + 1
    result = compute(t['amount'].map(inc, 'int'), df)
    expected = df['amount'] + 1
    assert_series_equal(result, expected)
Exemple #24
0
def test_selection_out_of_order():
    expr = t['name'][t['amount'] < 100]
    expected = df.loc[df.amount < 100, 'name']
    result = compute(expr, df)
    assert_series_equal(result, expected)
Exemple #25
0
def test_frame_broadcast():
    bcast = broadcast_collect(expr=t.amount * t.id)
    result = compute(bcast, df)
    assert_series_equal(result, df.amount * df.id)
def test_series_slice():
    assert compute(t.amount[0], df) == df.amount.iloc[0]
    assert compute(t.amount[2], df) == df.amount.iloc[2]
    assert_series_equal(compute(t.amount[:2], df), df.amount.iloc[:2])
    assert_series_equal(compute(t.amount[1:3], df), df.amount.iloc[1:3])
    assert_series_equal(compute(t.amount[1::2], df), df.amount.iloc[1::2])
Exemple #27
0
def test_map_column():
    inc = lambda x: x + 1
    result = compute(t['amount'].map(inc, 'int'), df)
    expected = df['amount'] + 1
    assert_series_equal(result, expected)
def test_strlen():
    expr = t.name.strlen()
    expected = pd.Series([5, 3, 5], name='name')
    result = compute(expr, df).reset_index(drop=True)
    assert_series_equal(expected, result)
def test_series_columnwise():
    s = Series([1, 2, 3], name='a')
    t = symbol('t', 'var * {a: int64}')
    result = compute(t.a + 1, s)
    assert_series_equal(s + 1, result)
Exemple #30
0
def test_count_keepdims_frame():
    df = pd.DataFrame(dict(a=[1, 2, 3, np.nan]))
    s = symbol('s', discover(df))
    assert_series_equal(compute(s.count(keepdims=True), df),
                        pd.Series([df.shape[0]], name='s_count'))
Exemple #31
0
def test_coerce_series():
    s = pd.Series(list('123'), name='a')
    t = symbol('t', discover(s))
    result = compute(t.coerce(to='int64'), s)
    expected = pd.Series([1, 2, 3], name=s.name)
    assert_series_equal(result, expected)
Exemple #32
0
def test_series_slice():
    assert compute(t.amount[0], df) == df.amount.iloc[0]
    assert compute(t.amount[2], df) == df.amount.iloc[2]
    assert_series_equal(compute(t.amount[:2], df), df.amount.iloc[:2])
    assert_series_equal(compute(t.amount[1:3], df), df.amount.iloc[1:3])
    assert_series_equal(compute(t.amount[1::2], df), df.amount.iloc[1::2])
Exemple #33
0
def test_map():
    f = lambda _, amt, id: amt + id
    result = compute(t.map(f, 'real'), df)
    expected = df['amount'] + df['id']
    assert_series_equal(result, expected)
Exemple #34
0
def test_strlen():
    expr = t.name.strlen()
    expected = pd.Series([5, 3, 5], name='name')
    result = compute(expr, df).reset_index(drop=True)
    assert_series_equal(expected, result)
Exemple #35
0
def test_eq():
    assert_series_equal(compute(t['amount'] == 100, df), df['amount'] == 100)
Exemple #36
0
def test_summary():
    expr = summary(count=t.id.count(), sum=t.amount.sum())
    assert_series_equal(compute(expr, df), Series({'count': 3, 'sum': 350}))
def test_map():
    f = lambda _, amt, id: amt + id
    result = compute(t.map(f, 'real'), df)
    expected = df['amount'] + df['id']
    assert_series_equal(result, expected)
def test_selection_out_of_order():
    expr = t['name'][t['amount'] < 100]
    expected = df.loc[df.amount < 100, 'name']
    result = compute(expr, df)
    assert_series_equal(result, expected)
Exemple #39
0
def test_shift(n):
    data = pd.Series(pd.date_range(start='20120101', end='20120102', freq='H'))
    s = symbol('s', discover(data))
    result = compute(s.shift(n), data)
    expected = data.shift(n)
    assert_series_equal(result, expected)
def test_summary():
    expr = summary(count=t.id.count(), sum=t.amount.sum())
    assert_series_equal(compute(expr, df), Series({'count': 3, 'sum': 350}))
def test_neg():
    assert_series_equal(compute(-t['amount'], df),
                           -df['amount'])
def test_eq():
    assert_series_equal(compute(t['amount'] == 100, df),
                           df['amount'] == 100)
def test_neg_projection():
    assert_series_equal(compute(-t[['amount', 'id']], df),
                           -df[['amount', 'id']])
Exemple #44
0
def test_neg_projection():
    assert_series_equal(compute(-t[['amount', 'id']], df),
                        -df[['amount', 'id']])
def test_coerce_series():
    s = pd.Series(list('123'), name='a')
    t = symbol('t', discover(s))
    result = compute(t.coerce(to='int64'), s)
    expected = pd.Series([1, 2, 3], name=s.name)
    assert_series_equal(result, expected)
Exemple #46
0
def test_neg():
    assert_series_equal(compute(-t['amount'], df), -df['amount'])
Exemple #47
0
def test_label():
    expected = df['amount'] * 10
    expected.name = 'foo'
    assert_series_equal(compute((t['amount'] * 10).label('foo'), df), expected)
def test_field_on_series():
    expr = symbol('s', 'var * int')
    data = Series([1, 2, 3, 4], name='s')
    assert_series_equal(compute(expr.s, data), data)
Exemple #49
0
def test_series_columnwise():
    s = Series([1, 2, 3], name='a')
    t = symbol('t', 'var * {a: int64}')
    result = compute(t.a + 1, s)
    assert_series_equal(s + 1, result)
def test_count_keepdims_frame():
    df = pd.DataFrame(dict(a=[1, 2, 3, np.nan]))
    s = symbol('s', discover(df))
    assert_series_equal(compute(s.count(keepdims=True), df),
                        pd.Series([df.shape[0]], name='s_count'))
def test_label():
    expected = df['amount'] * 10
    expected.name = 'foo'
    assert_series_equal(compute((t['amount'] * 10).label('foo'), df),
                           expected)
def test_shift(n):
    data = pd.Series(pd.date_range(start='20120101', end='20120102', freq='H'))
    s = symbol('s', discover(data))
    result = compute(s.shift(n), data)
    expected = data.shift(n)
    assert_series_equal(result, expected)
Exemple #53
0
def test_field_on_series():
    expr = symbol('s', 'var * int')
    data = Series([1, 2, 3, 4], name='s')
    assert_series_equal(compute(expr.s, data), data)