Example #1
def test_eq():
    left = ["hello", "world", "strings", "morestrings"]
    right = ["hel", "world", "string", "morestrings"]
    x = gr.GrizzlySeries(left)
    y = gr.GrizzlySeries(right)
    assert list(x.eq(y).evaluate().values) == [False, True, False, True]
    assert list(x.ne(y).evaluate().values) == [True, False, True, False]
Example #2
def test_unsupported_binop_error():
    # Test unsupported
    from weld.grizzly.core.error import GrizzlyError
    with pytest.raises(GrizzlyError):
        a = gr.GrizzlySeries([1,2,3])
        b = pd.Series([1,2,3])

    with pytest.raises(TypeError):
        a = gr.GrizzlySeries(["hello", "world"])
        b = gr.GrizzlySeries(["hello", "world"])
Example #3
def test_unsupported_binop_error():
    # Test unsupported
    from weld.grizzly.core.error import GrizzlyError
    with pytest.raises(GrizzlyError):
        a = gr.GrizzlySeries([1, 2, 3])
        b = pd.Series([1, 2, 3])
Example #4
def _compare_vs_pandas(aggs, data=None):
    Compare the result of aggregations vs. Pandas.

    Returns the code used to compute the result if the result
    is a `GrizzlySeries`.

    if data is None:
        data = list(range(-10, 25))

    pandas_result = pd.Series(data).agg(aggs)
    grizzly_result = gr.GrizzlySeries(data).agg(aggs)

    if isinstance(pandas_result, pd.Series):
        assert isinstance(grizzly_result, gr.GrizzlySeries)
        code = grizzly_result.code
        grizzly_result = grizzly_result.to_pandas()
        # Need to reset index since labels in Pandas becoem the aggregation name.
        # Grizzly doesn't support indices right now.
        assert pandas_result.reset_index(drop=True).equals(grizzly_result)
        return code
        assert isinstance(pandas_result, (int, float, np.float64, np.int64))
        assert isinstance(grizzly_result, (int, float, np.float64, np.int64))
        assert pandas_result == grizzly_result
        return None

    return grizzly_result
Example #5
def _test_binop(grizzly_op, pandas_op, name):
    Test binary operators, ensuring that their output/data type
    matches Pandas.

    types = ['int8', 'uint8', 'int16', 'uint16', 'int32',\
            'uint32', 'int64', 'uint64', 'float32', 'float64']
    for left in types:
        for right in types:
            a = gr.GrizzlySeries([1, 2, 3], dtype=left)
            b = gr.GrizzlySeries([1, 2, 3], dtype=right)
            result = grizzly_op(a, b).to_pandas()

            a = pd.Series([1, 2, 3], dtype=left)
            b = pd.Series([1, 2, 3], dtype=right)

            expect = pandas_op(a, b)
            assert result.equals(expect), "{}, {} (op={})".format(left, right, name)
Example #6
def test_name():
    # Test that names propagate after operations.
    x = gr.GrizzlySeries([1,2,3], name="testname")
    y = x + x
    assert y.evaluate().name == "testname"
    y = x.agg(['sum', 'count'])
    assert y.evaluate().name == "testname"
    y = x[:2]
    assert y.evaluate().name == "testname"
    y = x[x == 1]
    assert y.evaluate().name == "testname"
Example #7
def test_indexing():
    # We don't compare with Pandas in these tests because the output
    # doesn't always match (this is because we don't currently support indexes).
    x = gr.GrizzlySeries(list(range(100)), dtype='int64')
    assert x[0] == 0
    assert x[50] == 50
    assert np.array_equal(x[10:50].evaluate().values, np.arange(10, 50, dtype='int64'))
    assert np.array_equal(x[:50].evaluate().values, np.arange(50, dtype='int64'))
    assert np.array_equal(x[x > 50].evaluate().values, np.arange(51, 100, dtype='int64'))
    assert np.array_equal(x[x == 2].evaluate().values, np.array([2], dtype='int64'))
    assert np.array_equal(x[x < 0].evaluate().values, np.array([], dtype='int64'))
Example #8
def test_get():
    Behavior of get is different in Grizzly -- it currently returns empty strings
    in cases where Pandas returns NaN. This will be changed in a later patch.

    inp = ["hello", "world", "test", "me", '']
    expect = ['l', 'l', 't', '', '']
    grizzly_result = gr.GrizzlySeries(inp).str.get(3).str.to_pandas()
    pandas_result = pd.Series(expect)
    assert pandas_result.equals(grizzly_result)

    expect = ['o', 'd', 't', 'e', '']
    grizzly_result = gr.GrizzlySeries(inp).str.get(-1).str.to_pandas()
    pandas_result = pd.Series(expect)
    assert pandas_result.equals(grizzly_result)

    expect = ['', '', '', '', '']
    grizzly_result = gr.GrizzlySeries(inp).str.get(-50).str.to_pandas()
    pandas_result = pd.Series(expect)
    assert pandas_result.equals(grizzly_result)
Example #9
def compare_vs_pandas(func, strings, *args, **kwargs):
    pandas_series = pd.Series(strings)
    grizzly_series = gr.GrizzlySeries(strings)

    pandas_result = getattr(pandas_series.str, func)(*args, **kwargs)
    grizzly_result = getattr(grizzly_series.str, func)(*args, **kwargs)
    if grizzly_result.output_type.elem_type != WeldVec(I8()):
        grizzly_result = grizzly_result.to_pandas()
        # Perform UTF-8 decoding.
        grizzly_result = grizzly_result.str.to_pandas()
    assert pandas_result.equals(grizzly_result)
Example #10
def test_scalar():
    types = ['int8', 'uint8', 'int16', 'uint16', 'int32',\
            'uint32', 'int64', 'uint64', 'float32', 'float64']
    for left in types:
        for right in types:
            a = gr.GrizzlySeries([1, 2, 3], dtype=left)
            b = 123
            result = (a + b).to_pandas()

            a = pd.Series([1, 2, 3], dtype=left)
            expect = a + b
            assert result.equals(expect), "{}, {} (op={})".format(left, right, "scalar")
Example #11
def test_replace():
    Behavior of replace is different in Grizzly -- it currently only replaces the *first*
    occurrance. This will be changed in a later patch.

    import copy
    inp = ["abc", "abcdefg", "abcabcabc", "gfedcbaabcabcdef", "", "XYZ"]
    expect = [s.replace("abc", "XYZ", 1) for s in copy.copy(inp)]
    grizzly_result = gr.GrizzlySeries(inp).str.replace("abc",
    pandas_result = pd.Series(expect)
    assert pandas_result.equals(grizzly_result)