Ejemplo n.º 1
0
def test_table_schema(c, df):
    original_df = c.sql("SELECT * FROM df")

    assert_eq(original_df, c.sql("SELECT * FROM root.df"))

    c.sql("CREATE SCHEMA foo")
    assert_eq(original_df, c.sql("SELECT * FROM df"))

    c.sql('USE SCHEMA "foo"')
    assert_eq(original_df, c.sql("SELECT * FROM root.df"))

    c.sql("CREATE TABLE bar AS TABLE root.df")
    assert_eq(original_df, c.sql("SELECT * FROM bar"))

    with pytest.raises(KeyError):
        c.sql("CREATE TABLE other.bar AS TABLE df")

    c.sql('USE SCHEMA "root"')
    assert_eq(original_df, c.sql("SELECT * FROM foo.bar"))

    with pytest.raises(ParsingException):
        c.sql("SELECT * FROM bar")

    c.sql("DROP SCHEMA foo")

    with pytest.raises(ParsingException):
        c.sql("SELECT * FROM foo.bar")
Ejemplo n.º 2
0
def test_create_from_query(c, df):
    c.sql("""
        CREATE OR REPLACE TABLE
            new_table
        AS (
            SELECT * FROM df
        )
    """)

    return_df = c.sql("""
        SELECT * FROM new_table
    """)

    assert_eq(df, return_df)

    c.sql("""
        CREATE OR REPLACE VIEW
            new_table
        AS (
            SELECT * FROM df
        )
    """)

    return_df = c.sql("""
        SELECT * FROM new_table
    """)

    assert_eq(df, return_df)
Ejemplo n.º 3
0
def test_null(c):
    df = c.sql(
        """
        SELECT
            c IS NOT NULL AS nn,
            c IS NULL AS n
        FROM user_table_nan
    """
    )

    expected_df = pd.DataFrame(index=[0, 1, 2])
    expected_df["nn"] = [True, False, True]
    expected_df["nn"] = expected_df["nn"].astype("boolean")
    expected_df["n"] = [False, True, False]
    assert_eq(df, expected_df)

    df = c.sql(
        """
        SELECT
            a IS NOT NULL AS nn,
            a IS NULL AS n
        FROM string_table
    """
    )

    expected_df = pd.DataFrame(index=[0, 1, 2])
    expected_df["nn"] = [True, True, True]
    expected_df["nn"] = expected_df["nn"].astype("boolean")
    expected_df["n"] = [False, False, False]
    assert_eq(df, expected_df)
Ejemplo n.º 4
0
def test_over_with_windows(c):
    tmp_df = pd.DataFrame({"a": range(5)})
    c.create_table("tmp", tmp_df)

    return_df = c.sql("""
    SELECT
        a,
        SUM(a) OVER (ORDER BY a ROWS BETWEEN 2 PRECEDING AND CURRENT ROW) AS O1,
        SUM(a) OVER (ORDER BY a ROWS BETWEEN 2 PRECEDING AND 3 FOLLOWING) AS O2,
        SUM(a) OVER (ORDER BY a ROWS BETWEEN 2 PRECEDING AND UNBOUNDED FOLLOWING) AS O3,
        SUM(a) OVER (ORDER BY a ROWS BETWEEN CURRENT ROW AND 3 FOLLOWING) AS O4,
        SUM(a) OVER (ORDER BY a ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) AS O5,
        SUM(a) OVER (ORDER BY a ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS O6,
        SUM(a) OVER (ORDER BY a ROWS BETWEEN UNBOUNDED PRECEDING AND 3 FOLLOWING) AS O7,
        SUM(a) OVER (ORDER BY a ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS O8,
        SUM(a) OVER (ORDER BY a ROWS BETWEEN 3 FOLLOWING AND 3 FOLLOWING) AS O9,
        SUM(a) OVER (ORDER BY a ROWS BETWEEN 3 PRECEDING AND 1 PRECEDING) AS O10
    FROM tmp
    """)
    expected_df = pd.DataFrame({
        "a": return_df.a,
        "O1": [0, 1, 3, 6, 9],
        "O2": [6, 10, 10, 10, 9],
        "O3": [10, 10, 10, 10, 9],
        "O4": [6, 10, 9, 7, 4],
        "O5": [10, 10, 9, 7, 4],
        "O6": [0, 1, 3, 6, 10],
        "O7": [6, 10, 10, 10, 10],
        "O8": [10, 10, 10, 10, 10],
        "O9": [3, 4, None, None, None],
        "O10": [None, 0, 1, 3, 6],
    })

    assert_eq(return_df, expected_df, check_dtype=False, check_index=False)
Ejemplo n.º 5
0
def test_string_filter(c, string_table):
    return_df = c.sql("SELECT * FROM string_table WHERE a = 'a normal string'")

    assert_eq(
        return_df,
        string_table.head(1),
    )
Ejemplo n.º 6
0
def test_describe_model(c, training_df):
    c.sql(
        """
        CREATE MODEL ex_describe_model WITH (
            model_class = 'sklearn.ensemble.GradientBoostingClassifier',
            wrap_predict = True,
            target_column = 'target'
        ) AS (
            SELECT x, y, x*y > 0 AS target
            FROM timeseries
            LIMIT 100
        )
    """
    )

    model, training_columns = c.schema[c.schema_name].models["ex_describe_model"]
    expected_dict = model.get_params()
    expected_dict["training_columns"] = training_columns.tolist()
    # hack for converting model class into string
    expected_series = (
        pd.DataFrame.from_dict(expected_dict, orient="index", columns=["Params"])[
            "Params"
        ]
        .apply(lambda x: str(x))
        .sort_index()
    )
    # test
    result = c.sql("DESCRIBE MODEL ex_describe_model")["Params"].apply(lambda x: str(x))

    assert_eq(expected_series, result)

    with pytest.raises(RuntimeError):
        c.sql("DESCRIBE MODEL undefined_model")
Ejemplo n.º 7
0
def test_boolean_operations(c):
    df = dd.from_pandas(pd.DataFrame({"b": [1, 0, -1]}), npartitions=1)
    df["b"] = df["b"].apply(
        lambda x: pd.NA if x < 0 else x > 0, meta=("b", "bool")
    )  # turn into a bool column
    c.create_table("df", df)

    df = c.sql(
        """
        SELECT
            b IS TRUE AS t,
            b IS FALSE AS f,
            b IS NOT TRUE AS nt,
            b IS NOT FALSE AS nf,
            b IS UNKNOWN AS u,
            b IS NOT UNKNOWN AS nu
        FROM df"""
    )

    expected_df = pd.DataFrame(
        {
            "t": [True, False, False],
            "f": [False, True, False],
            "nt": [False, True, True],
            "nf": [True, False, True],
            "u": [False, False, True],
            "nu": [True, True, False],
        },
        dtype="bool",
    )
    expected_df["nt"] = expected_df["nt"].astype("boolean")
    expected_df["nf"] = expected_df["nf"].astype("boolean")
    expected_df["nu"] = expected_df["nu"].astype("boolean")
    assert_eq(df, expected_df)
Ejemplo n.º 8
0
def test_filtered_csv(tmpdir, c):
    # Predicate pushdown is NOT supported for CSV data.
    # This test just checks that the "attempted"
    # predicate-pushdown logic does not lead to
    # any unexpected errors

    # Write simple csv dataset
    df = pd.DataFrame({
        "a": [1, 2, 3] * 5,
        "b": range(15),
        "c": ["A"] * 15,
    }, )
    dd.from_pandas(df, npartitions=3).to_csv(tmpdir + "/*.csv", index=False)

    # Read back with dask and apply WHERE query
    csv_ddf = dd.read_csv(tmpdir + "/*.csv")
    try:
        c.create_table("my_csv_table", csv_ddf)
        return_df = c.sql("SELECT * FROM my_csv_table WHERE b < 10")
    finally:
        c.drop_table("my_csv_table")

    # Check computed result is correct
    df = csv_ddf
    expected_df = df[df["b"] < 10]

    assert_eq(return_df, expected_df)
Ejemplo n.º 9
0
def test_group_by_nan(c):
    return_df = c.sql(
        """
    SELECT
        c
    FROM user_table_nan
    GROUP BY c
    """
    )
    expected_df = pd.DataFrame({"c": [3, float("nan"), 1]})

    # we return nullable int dtype instead of float
    assert_eq(return_df, expected_df, check_dtype=False)

    return_df = c.sql(
        """
    SELECT
        c
    FROM user_table_inf
    GROUP BY c
    """
    )
    expected_df = pd.DataFrame({"c": [3, 1, float("inf")]})
    expected_df["c"] = expected_df["c"].astype("float64")

    assert_eq(
        return_df.sort_values("c").reset_index(drop=True),
        expected_df.sort_values("c").reset_index(drop=True),
    )
Ejemplo n.º 10
0
def test_over_calls(c, user_table_1):
    return_df = c.sql("""
    SELECT
        user_id,
        b,
        ROW_NUMBER() OVER (PARTITION BY user_id ORDER BY b) AS O1,
        FIRST_VALUE(user_id*10 - b) OVER (PARTITION BY user_id ORDER BY b) AS O2,
        SINGLE_VALUE(user_id*10 - b) OVER (PARTITION BY user_id ORDER BY b) AS O3,
        LAST_VALUE(user_id*10 - b) OVER (PARTITION BY user_id ORDER BY b ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS O4,
        SUM(user_id) OVER (PARTITION BY user_id ORDER BY b) AS O5,
        AVG(user_id) OVER (PARTITION BY user_id ORDER BY b) AS O6,
        COUNT(*) OVER (PARTITION BY user_id ORDER BY b) AS O7,
        COUNT(b) OVER (PARTITION BY user_id ORDER BY b) AS O7b,
        MAX(b) OVER (PARTITION BY user_id ORDER BY b) AS O8,
        MIN(b) OVER (PARTITION BY user_id ORDER BY b) AS O9
    FROM user_table_1
    """)
    expected_df = pd.DataFrame({
        "user_id": user_table_1.user_id,
        "b": user_table_1.b,
        "O1": [2, 1, 1, 1],
        "O2": [19, 7, 19, 27],
        "O3": [19, 7, 19, 27],
        "O4": [17, 7, 17, 27],
        "O5": [4, 1, 2, 3],
        "O6": [2, 1, 2, 3],
        "O7": [2, 1, 1, 1],
        "O7b": [2, 1, 1, 1],
        "O8": [3, 3, 1, 3],
        "O9": [1, 3, 1, 3],
    })

    assert_eq(return_df, expected_df, check_dtype=False, check_index=False)
Ejemplo n.º 11
0
def test_groupby_split_every(c, gpu, split_every, expected_keys):
    input_ddf = dd.from_pandas(
        pd.DataFrame({"user_id": [1, 2, 3, 4] * 16, "b": [5, 6, 7, 8] * 16}),
        npartitions=16,
    )  # Need an input with multiple partitions to demonstrate split_every

    c.create_table("split_every_input", input_ddf, gpu=gpu)

    return_df = c.sql(
        """
        SELECT
        user_id, SUM(b) AS "S"
        FROM split_every_input
        GROUP BY user_id
        """,
        config_options={"sql.groupby.split_every": split_every},
    )
    expected_df = (
        input_ddf.groupby(by="user_id")
        .agg({"b": "sum"}, split_every=split_every)
        .reset_index(drop=False)
        .rename(columns={"b": "S"})
        .sort_values("user_id")
    )

    assert len(return_df.dask.keys()) == expected_keys
    assert_eq(return_df, expected_df, check_index=False)

    c.drop_table("split_every_input")
Ejemplo n.º 12
0
def test_sort_with_nan_many_partitions(gpu):
    c = Context()
    df = pd.DataFrame({
        "a": [float("nan"), 1] * 30,
        "b": [1, 2, 3] * 20,
    })
    c.create_table("df", dd.from_pandas(df, npartitions=10), gpu=gpu)

    df_result = c.sql(
        "SELECT * FROM df ORDER BY a NULLS FIRST, b ASC NULLS FIRST")

    assert_eq(
        df_result,
        pd.DataFrame({
            "a": [float("nan")] * 30 + [1] * 30,
            "b":
            [1] * 10 + [2] * 10 + [3] * 10 + [1] * 10 + [2] * 10 + [3] * 10,
        }),
        check_index=False,
    )

    df = pd.DataFrame({"a": [float("nan"), 1] * 30})
    c.create_table("df", dd.from_pandas(df, npartitions=10))

    df_result = c.sql("SELECT * FROM df ORDER BY a")

    assert_eq(
        df_result,
        pd.DataFrame({
            "a": [1] * 30 + [float("nan")] * 30,
        }),
        check_index=False,
    )
Ejemplo n.º 13
0
def test_join_literal(c):
    return_df = c.sql("""
    SELECT lhs.user_id, lhs.b, rhs.user_id, rhs.c
    FROM user_table_1 AS lhs
    JOIN user_table_2 AS rhs
    ON True
    """)
    expected_df = pd.DataFrame({
        "user_id": [2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3],
        "b": [1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3],
        "user_id0": [1, 1, 2, 4, 1, 1, 2, 4, 1, 1, 2, 4, 1, 1, 2, 4],
        "c": [1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4],
    })

    assert_eq(return_df, expected_df, check_index=False)

    return_df = c.sql("""
    SELECT lhs.user_id, lhs.b, rhs.user_id, rhs.c
    FROM user_table_1 AS lhs
    JOIN user_table_2 AS rhs
    ON False
    """)
    expected_df = pd.DataFrame({
        "user_id": [],
        "b": [],
        "user_id0": [],
        "c": []
    })

    assert_eq(return_df, expected_df, check_dtype=False, check_index=False)
Ejemplo n.º 14
0
def test_operators(c, df):
    result_df = c.sql(
        """
    SELECT
        a * b AS m,
        -a AS u,
        a / b AS q,
        a + b AS s,
        a - b AS d,
        a = b AS e,
        a > b AS g,
        a >= b AS ge,
        a < b AS l,
        a <= b AS le,
        a <> b AS n
    FROM df
    """
    )

    expected_df = pd.DataFrame(index=df.index)
    expected_df["m"] = df["a"] * df["b"]
    expected_df["u"] = -df["a"]
    expected_df["q"] = df["a"] / df["b"]
    expected_df["s"] = df["a"] + df["b"]
    expected_df["d"] = df["a"] - df["b"]
    expected_df["e"] = df["a"] == df["b"]
    expected_df["g"] = df["a"] > df["b"]
    expected_df["ge"] = df["a"] >= df["b"]
    expected_df["l"] = df["a"] < df["b"]
    expected_df["le"] = df["a"] <= df["b"]
    expected_df["n"] = df["a"] != df["b"]
    assert_eq(result_df, expected_df)
Ejemplo n.º 15
0
def test_multiple_definitions(c, df_simple):
    def f(x):
        return x**2

    c.register_function(f, "f", [("x", np.float64)], np.float64)
    c.register_function(f, "f", [("x", np.int64)], np.int64)

    return_df = c.sql("""
        SELECT F(a) AS a, f(b) AS b
        FROM df_simple
        """)
    expected_df = df_simple[["a", "b"]]**2

    assert_eq(return_df, expected_df)

    def f(x):
        return x**3

    c.register_function(f, "f", [("x", np.float64)], np.float64, replace=True)
    c.register_function(f, "f", [("x", np.int64)], np.int64)

    return_df = c.sql("""
        SELECT F(a) AS a, f(b) AS b
        FROM df_simple
        """)
    expected_df = df_simple[["a", "b"]]**3

    assert_eq(return_df, expected_df)
Ejemplo n.º 16
0
def test_case(c, df):
    result_df = c.sql(
        """
    SELECT
        (CASE WHEN a = 3 THEN 1 END) AS "S1",
        (CASE WHEN a > 0 THEN a ELSE 1 END) AS "S2",
        (CASE WHEN a = 4 THEN 3 ELSE a + 1 END) AS "S3",
        (CASE WHEN a = 3 THEN 1 WHEN a > 0 THEN 2 ELSE a END) AS "S4",
        CASE
            WHEN (a >= 1 AND a < 2) OR (a > 2) THEN CAST('in-between' AS VARCHAR) ELSE CAST('out-of-range' AS VARCHAR)
        END AS "S5",
        CASE
            WHEN (a < 2) OR (3 < a AND a < 4) THEN 42 ELSE 47
        END AS "S6",
        CASE WHEN (1 < a AND a <= 4) THEN 1 ELSE 0 END AS "S7"
    FROM df
    """
    )
    expected_df = pd.DataFrame(index=df.index)
    expected_df["S1"] = df.a.apply(lambda a: 1 if a == 3 else pd.NA)
    expected_df["S2"] = df.a.apply(lambda a: a if a > 0 else 1)
    expected_df["S3"] = df.a.apply(lambda a: 3 if a == 4 else a + 1)
    expected_df["S4"] = df.a.apply(lambda a: 1 if a == 3 else 2 if a > 0 else a)
    expected_df["S5"] = df.a.apply(
        lambda a: "in-between" if ((1 <= a < 2) or (a > 2)) else "out-of-range"
    )
    expected_df["S6"] = df.a.apply(lambda a: 42 if ((a < 2) or (3 < a < 4)) else 47)
    expected_df["S7"] = df.a.apply(lambda a: 1 if (1 < a <= 4) else 0)

    # Do not check dtypes, as pandas versions are inconsistent here
    assert_eq(result_df, expected_df, check_dtype=False)
Ejemplo n.º 17
0
def test_filter_complicated(c, df):
    return_df = c.sql("SELECT * FROM df WHERE a < 3 AND (b > 1 AND b < 3)")

    expected_df = df[((df["a"] < 3) & ((df["b"] > 1) & (df["b"] < 3)))]
    assert_eq(
        return_df,
        expected_df,
    )
Ejemplo n.º 18
0
def test_timezones(c, datetime_table):
    result_df = c.sql(
        """
        SELECT * FROM datetime_table
        """
    )

    assert_eq(result_df, datetime_table)
Ejemplo n.º 19
0
def test_select(hive_cursor):
    c = Context()
    c.create_table("df", hive_cursor)

    result_df = c.sql("SELECT * FROM df")
    expected_df = pd.DataFrame({"i": [1, 2], "j": [2, 4]}).astype("int32")

    assert_eq(result_df, expected_df, check_index=False)
Ejemplo n.º 20
0
def test_select_alias(c, df):
    result_df = c.sql("SELECT a as b, b as a FROM df")

    expected_df = pd.DataFrame(index=df.index)
    expected_df["b"] = df.a
    expected_df["a"] = df.b

    assert_eq(result_df[["a", "b"]], expected_df[["a", "b"]])
Ejemplo n.º 21
0
def test_tables(gpu):
    c = Context()
    c.create_table("table", pd.DataFrame(), gpu=gpu)

    result_df = c.sql(f'SHOW TABLES FROM "{c.schema_name}"')
    expected_df = pd.DataFrame({"Table": ["table"]})

    assert_eq(result_df, expected_df, check_index=False)
Ejemplo n.º 22
0
def test_math_operations(c, df):
    result_df = c.sql(
        """
        SELECT
            ABS(b) AS "abs"
            , ACOS(b) AS "acos"
            , ASIN(b) AS "asin"
            , ATAN(b) AS "atan"
            , ATAN2(a, b) AS "atan2"
            , CBRT(b) AS "cbrt"
            , CEIL(b) AS "ceil"
            , COS(b) AS "cos"
            , COT(b) AS "cot"
            , DEGREES(b) AS "degrees"
            , EXP(b) AS "exp"
            , FLOOR(b) AS "floor"
            , LOG10(b) AS "log10"
            , LN(b) AS "ln"
            , MOD(b, 4) AS "mod"
            , POWER(b, 2) AS "power"
            , POWER(b, a) AS "power2"
            , RADIANS(b) AS "radians"
            , ROUND(b) AS "round"
            , ROUND(b, 3) AS "round2"
            , SIGN(b) AS "sign"
            , SIN(b) AS "sin"
            , TAN(b) AS "tan"
            , TRUNCATE(b) AS "truncate"
        FROM df
    """
    )

    expected_df = pd.DataFrame(index=df.index)
    expected_df["abs"] = df.b.abs()
    expected_df["acos"] = np.arccos(df.b)
    expected_df["asin"] = np.arcsin(df.b)
    expected_df["atan"] = np.arctan(df.b)
    expected_df["atan2"] = np.arctan2(df.a, df.b)
    expected_df["cbrt"] = np.cbrt(df.b)
    expected_df["ceil"] = np.ceil(df.b)
    expected_df["cos"] = np.cos(df.b)
    expected_df["cot"] = 1 / np.tan(df.b)
    expected_df["degrees"] = df.b / np.pi * 180
    expected_df["exp"] = np.exp(df.b)
    expected_df["floor"] = np.floor(df.b)
    expected_df["log10"] = np.log10(df.b)
    expected_df["ln"] = np.log(df.b)
    expected_df["mod"] = np.mod(df.b, 4)
    expected_df["power"] = np.power(df.b, 2)
    expected_df["power2"] = np.power(df.b, df.a)
    expected_df["radians"] = df.b / 180 * np.pi
    expected_df["round"] = np.round(df.b)
    expected_df["round2"] = np.round(df.b, 3)
    expected_df["sign"] = np.sign(df.b)
    expected_df["sin"] = np.sin(df.b)
    expected_df["tan"] = np.tan(df.b)
    expected_df["truncate"] = np.trunc(df.b)
    assert_eq(result_df, expected_df)
Ejemplo n.º 23
0
def test_show_tables_no_schema(c):
    c = Context()

    df = pd.DataFrame({"id": [0, 1]})
    c.create_table("test", df)

    actual_df = c.sql("show tables").compute()
    expected_df = pd.DataFrame({"Table": ["test"]})
    assert_eq(actual_df, expected_df)
Ejemplo n.º 24
0
def test_limit(c, input_table, limit, offset, request):
    long_table = request.getfixturevalue(input_table)

    if not limit:
        query = f"SELECT * FROM long_table OFFSET {offset}"
    else:
        query = f"SELECT * FROM long_table LIMIT {limit} OFFSET {offset}"

    assert_eq(c.sql(query), long_table.iloc[offset : offset + limit if limit else None])
Ejemplo n.º 25
0
def test_custom_function(c, df):
    def f(x):
        return x**2

    c.register_function(f, "f", [("x", np.float64)], np.float64)

    return_df = c.sql("SELECT F(a) AS a FROM df")

    assert_eq(return_df, df[["a"]]**2)
Ejemplo n.º 26
0
def test_custom_function_row(c, df):
    def f(row):
        return row["x"]**2

    c.register_function(f, "f", [("x", np.float64)], np.float64, row_udf=True)

    return_df = c.sql("SELECT F(a) AS a FROM df")

    assert_eq(return_df, df[["a"]]**2)
Ejemplo n.º 27
0
def test_nan():
    op = call.IsNullOperation()

    assert op(None)
    assert op(np.NaN)
    assert op(pd.NA)
    assert_eq(op(pd.Series(["a", None, "c"])), pd.Series([False, True, False]))
    assert_eq(op(pd.Series([3, 2, np.NaN, pd.NA])),
              pd.Series([False, False, True, True]))
Ejemplo n.º 28
0
def test_filter_cast_timestamp(c, input_table, request):
    datetime_table = request.getfixturevalue(input_table)
    return_df = c.sql(f"""
        SELECT * FROM {input_table} WHERE
            CAST(timezone AS TIMESTAMP) >= TIMESTAMP '2014-08-01 23:00:00'
        """)

    expected_df = datetime_table[datetime_table["timezone"].astype("<M8[ns]")
                                 >= pd.Timestamp("2014-08-01 23:00:00")]
    assert_eq(return_df, expected_df)
Ejemplo n.º 29
0
def test_literal_null(c):
    df = c.sql(
        """
    SELECT NULL AS "N", 1 + NULL AS "I"
    """
    )

    expected_df = pd.DataFrame({"N": [pd.NA], "I": [pd.NA]})
    expected_df["I"] = expected_df["I"].astype("Int32")
    assert_eq(df, expected_df)
Ejemplo n.º 30
0
def test_filter_year(c):
    df = pd.DataFrame({"year": [2015, 2016], "month": [2, 3], "day": [4, 5]})
    df["dt"] = pd.to_datetime(df)

    c.create_table("datetime_test", df)

    return_df = c.sql("select * from datetime_test where year(dt) < 2016")
    expected_df = df[df["year"] < 2016]

    assert_eq(expected_df, return_df)