Beispiel #1
0
def test_for_non_existent_table():
    """
    Check that exception is raised if table does not exist
    :return:
    """
    try:
        query("select * from a_table_that_is_not_here")
    except Exception as err:
        assert isinstance(err, DataFrameDoesNotExist)
Beispiel #2
0
def test_for_valid_query():
    """
    Test that exception is raised for invalid query
    :return:
    """
    sql = "hello world!"
    try:
        query(sql)
    except InvalidQueryException as err:
        assert isinstance(err, InvalidQueryException)
Beispiel #3
0
def test_rank_statement_many_columns():
    """
    Test rank statement
    :return:
    """
    my_frame = query("""
    select wind, rain, month, rank() over(order by wind desc, rain asc, month) as rank
    from forest_fires
    """)
    pandas_frame = FOREST_FIRES.copy()[["wind", "rain", "month"]]
    pandas_frame.sort_values(by=["wind", "rain", "month"],
                             ascending=[False, True, True],
                             inplace=True)
    pandas_frame.reset_index(inplace=True)
    rank_map = {}
    rank_counter = 1
    rank_offset = 0
    pandas_frame["rank"] = 0
    rank_series = pandas_frame["rank"].copy()
    for row_num, row in enumerate(pandas_frame.iterrows()):
        key = "".join(map(str, list(list(row)[1])[1:4]))
        if rank_map.get(key):
            rank_offset += 1
            rank = rank_map[key]
        else:
            rank = rank_counter + rank_offset
            rank_map[key] = rank
            rank_counter += 1
        rank_series[row_num] = rank
    pandas_frame["rank"] = rank_series
    pandas_frame.sort_values(by="index", ascending=True, inplace=True)
    pandas_frame.drop(columns=["index"], inplace=True)
    pandas_frame.reset_index(drop=True, inplace=True)
    tm.assert_frame_equal(pandas_frame, my_frame)
Beispiel #4
0
def test_case_statement_w_name():
    """
    Test using case statements
    :return:
    """
    my_frame, plan = query(
        """
        select case when wind > 5 then 'strong'
        when wind = 5 then 'mid'
        else 'weak' end as wind_strength
        from
        forest_fires
        """,
        show_execution_plan=True,
    )

    assert (
        plan == "FOREST_FIRES.loc[:, []].assign(wind_strength=NONE_SERIES"
        ".mask(((FOREST_FIRES['wind']>5) ^ (FALSE_SERIES)) & "
        "(FOREST_FIRES['wind']>5), 'strong')"
        ".mask(((FOREST_FIRES['wind']==5) ^ ((FALSE_SERIES) | "
        "(FOREST_FIRES['wind']>5))) & (FOREST_FIRES['wind']==5), "
        "'mid').where(((FALSE_SERIES) | (FOREST_FIRES['wind']>5)) "
        "| (FOREST_FIRES['wind']==5), 'weak'))"
    )
Beispiel #5
0
def test_type_conversion():
    """
    Tests sql as statements
    :return:
    """
    my_frame = query("""select cast(temp as int64),
        cast(RH as float64) my_rh, wind, rain, area,
        cast(2.0 as int64) my_int,
        cast(3 as float64) as my_float,
        cast(7 as object) as my_object,
        cast(0 as bool) as my_bool from forest_fires""")
    fire_frame = FOREST_FIRES[["temp", "RH", "wind", "rain",
                               "area"]].rename(columns={"RH": "my_rh"})
    fire_frame["my_int"] = 2
    fire_frame["my_float"] = 3
    fire_frame["my_object"] = str(7)
    fire_frame["my_bool"] = 0
    pandas_frame = fire_frame.astype({
        "temp": "int64",
        "my_rh": "float64",
        "my_int": "int64",
        "my_float": "float64",
        "my_bool": "bool",
    })
    tm.assert_frame_equal(pandas_frame, my_frame)
Beispiel #6
0
def test_case_insensitivity():
    """
    Tests to ensure that the sql is case insensitive for table names
    :return:
    """
    frame, plan = query("select * from FOREST_fires", show_execution_plan=True)
    assert plan == "FOREST_FIRES"
Beispiel #7
0
def test_select_columns_from_two_tables_with_same_column_name():
    """
    Test selecting tables
    :return:
    """
    my_frame = query(
        """select * from forest_fires table1, forest_fires table2""")
    table1 = FOREST_FIRES.copy()
    table2 = FOREST_FIRES.copy()
    pandas_frame = merge(table1.assign(__=1),
                         table2.assign(__=1),
                         on="__",
                         how="inner",
                         copy=False)
    del pandas_frame["__"]

    renamed = {}
    for column in pandas_frame.columns:
        if "_x" in column:
            renamed[column] = "table1." + column.replace("_x", "")
        if "_y" in column:
            renamed[column] = "table2." + column.replace("_y", "")
    pandas_frame.rename(columns=renamed, inplace=True)

    for column in my_frame.columns:
        tm.assert_series_equal(pandas_frame[column], my_frame[column])

    tm.assert_frame_equal(my_frame, pandas_frame)
Beispiel #8
0
def test_sql_data_types():
    """
    Tests sql data types
    :return:
    """
    my_frame = query("""
        select
            cast(avocado_id as object) as avocado_id_object,
            cast(avocado_id as int16) as avocado_id_int16,
            cast(avocado_id as smallint) as avocado_id_smallint,
            cast(avocado_id as int32) as avocado_id_int32,
            cast(avocado_id as int) as avocado_id_int,
            cast(avocado_id as int64) as avocado_id_int64,
            cast(avocado_id as bigint) as avocado_id_bigint,
            cast(avocado_id as float) as avocado_id_float,
            cast(avocado_id as float16) as avocado_id_float16,
            cast(avocado_id as float32) as avocado_id_float32,
            cast(avocado_id as float64) as avocado_id_float64,
            cast(avocado_id as bool) as avocado_id_bool,
            cast(avocado_id as category) as avocado_id_category,
            cast(date as datetime64) as date,
            cast(date as timestamp) as time,
            cast(region as varchar) as region_varchar,
            cast(region as string) as region_string
        from avocado
        """)

    pandas_frame = AVOCADO.copy()[["avocado_id", "Date", "region"]]
    pandas_frame["avocado_id_object"] = pandas_frame["avocado_id"].astype(
        "object")
    pandas_frame["avocado_id_int16"] = pandas_frame["avocado_id"].astype(
        "int16")
    pandas_frame["avocado_id_smallint"] = pandas_frame["avocado_id"].astype(
        "int16")
    pandas_frame["avocado_id_int32"] = pandas_frame["avocado_id"].astype(
        "int32")
    pandas_frame["avocado_id_int"] = pandas_frame["avocado_id"].astype("int32")
    pandas_frame["avocado_id_int64"] = pandas_frame["avocado_id"].astype(
        "int64")
    pandas_frame["avocado_id_bigint"] = pandas_frame["avocado_id"].astype(
        "int64")
    pandas_frame["avocado_id_float"] = pandas_frame["avocado_id"].astype(
        "float")
    pandas_frame["avocado_id_float16"] = pandas_frame["avocado_id"].astype(
        "float16")
    pandas_frame["avocado_id_float32"] = pandas_frame["avocado_id"].astype(
        "float32")
    pandas_frame["avocado_id_float64"] = pandas_frame["avocado_id"].astype(
        "float64")
    pandas_frame["avocado_id_bool"] = pandas_frame["avocado_id"].astype("bool")
    pandas_frame["avocado_id_category"] = pandas_frame["avocado_id"].astype(
        "category")
    pandas_frame["date"] = pandas_frame["Date"].astype("datetime64")
    pandas_frame["time"] = pandas_frame["Date"].astype("datetime64")
    pandas_frame["region_varchar"] = pandas_frame["region"].astype("string")
    pandas_frame["region_string"] = pandas_frame["region"].astype("string")
    pandas_frame = pandas_frame.drop(columns=["avocado_id", "Date", "region"])

    tm.assert_frame_equal(pandas_frame, my_frame)
Beispiel #9
0
def test_limit():
    """
    Test limit clause
    :return:
    """
    my_frame = query("""select * from forest_fires limit 10""")
    pandas_frame = FOREST_FIRES.copy().head(10)
    tm.assert_frame_equal(pandas_frame, my_frame)
Beispiel #10
0
def test_select_star():
    """
    Tests the simple select * case
    :return:
    """
    frame, plan = query("select * from forest_fires", show_execution_plan=True)
    print(plan)
    assert plan == "FOREST_FIRES"
Beispiel #11
0
def test_case_insensitivity():
    """
    Tests to ensure that the sql is case insensitive for table names
    :return:
    """
    my_frame = query("select * from FOREST_fires")
    pandas_frame = FOREST_FIRES
    tm.assert_frame_equal(pandas_frame, my_frame)
Beispiel #12
0
def test_select_star():
    """
    Tests the simple select * case
    :return:
    """
    my_frame = query("select * from forest_fires")
    pandas_frame = FOREST_FIRES
    tm.assert_frame_equal(pandas_frame, my_frame)
Beispiel #13
0
def test_limit():
    """
    Test limit clause
    :return:
    """
    my_frame, plan = query(
        """select * from forest_fires limit 10""", show_execution_plan=True
    )
    assert plan == "FOREST_FIRES.head(10)"
Beispiel #14
0
def test_using_math():
    """
    Test the mathematical operations and order of operations
    :return:
    """
    my_frame = query("select temp, 1 + 2 * 3 as my_number from forest_fires")
    pandas_frame = FOREST_FIRES[["temp"]].copy()
    pandas_frame["my_number"] = 1 + 2 * 3
    tm.assert_frame_equal(pandas_frame, my_frame)
Beispiel #15
0
def test_subquery():
    """
    Test ability to perform subqueries
    :return:
    """
    my_frame = query(
        "select * from (select area, rain from forest_fires) rain_area")
    pandas_frame = FOREST_FIRES[["area", "rain"]].copy()
    tm.assert_frame_equal(pandas_frame, my_frame)
Beispiel #16
0
def test_where_clause():
    """
    Test where clause
    :return:
    """
    my_frame, plan = query(
        """select * from forest_fires where month = 'mar'""", show_execution_plan=True
    )
    assert plan == "FOREST_FIRES.loc[FOREST_FIRES['month']=='mar', :]"
Beispiel #17
0
def test_maintain_case_in_query():
    """
    Test nested subqueries
    :return:
    """
    my_frame, plan = query(
        """select wind, rh from forest_fires""", show_execution_plan=True
    )
    assert plan == "FOREST_FIRES.loc[:, ['wind', 'RH']].rename(columns={'RH': 'rh'})"
Beispiel #18
0
def test_maintain_case_in_query():
    """
    Test nested subqueries
    :return:
    """
    my_frame = query("""select wind, rh from forest_fires""")
    pandas_frame = FOREST_FIRES.copy()[["wind",
                                        "RH"]].rename(columns={"RH": "rh"})
    tm.assert_frame_equal(pandas_frame, my_frame)
Beispiel #19
0
def test_where_clause():
    """
    Test where clause
    :return:
    """
    my_frame = query("""select * from forest_fires where month = 'mar'""")
    pandas_frame = FOREST_FIRES.copy()
    pandas_frame = pandas_frame[pandas_frame.month == "mar"].reset_index(
        drop=True)
    tm.assert_frame_equal(pandas_frame, my_frame)
Beispiel #20
0
def test_group_by():
    """
    Test group by constraint
    :return:
    """
    my_frame, plan = query(
        """select month, day from forest_fires group by month, day""",
        show_execution_plan=True,
    )
    assert plan == "FOREST_FIRES.loc[:, ['month', 'day']].drop_duplicates(keep='first')"
Beispiel #21
0
def test_select_specific_fields():
    """
    Tests selecting specific fields
    :return:
    """
    my_frame = query(
        "select temp, RH, wind, rain as water, area from forest_fires")
    pandas_frame = FOREST_FIRES[["temp", "RH", "wind", "rain",
                                 "area"]].rename(columns={"rain": "water"})
    tm.assert_frame_equal(pandas_frame, my_frame)
Beispiel #22
0
def test_group_by():
    """
    Test group by constraint
    :return:
    """
    my_frame = query(
        """select month, day from forest_fires group by month, day""")
    pandas_frame = (FOREST_FIRES[["month", "day"
                                  ]].drop_duplicates().reset_index(drop=True))
    tm.assert_frame_equal(pandas_frame, my_frame)
Beispiel #23
0
def test_min():
    """
    Test the min
    :return:
    """
    my_frame = query("select min(temp) from forest_fires")
    pandas_frame = (FOREST_FIRES.agg({
        "temp": np.min
    }).to_frame("_col0").reset_index().drop(columns=["index"]))
    tm.assert_frame_equal(pandas_frame, my_frame)
Beispiel #24
0
def test_set_string_value_as_column_value():
    """
    Select a string like 'Yes' as a column value
    :return:
    """
    my_frame = query("""
    select wind, 'yes' as wind_yes from forest_fires""")
    pandas_frame = FOREST_FIRES.copy()
    pandas_frame["wind_yes"] = "yes"
    pandas_frame = pandas_frame[["wind", "wind_yes"]]
    tm.assert_frame_equal(pandas_frame, my_frame)
Beispiel #25
0
def test_distinct():
    """
    Test use of the distinct keyword
    :return:
    """
    my_frame = query("select distinct area, rain from forest_fires")
    pandas_frame = FOREST_FIRES[["area", "rain"]].copy()
    pandas_frame.drop_duplicates(keep="first", inplace=True)
    pandas_frame.reset_index(inplace=True)
    pandas_frame.drop(columns="index", inplace=True)
    tm.assert_frame_equal(pandas_frame, my_frame)
Beispiel #26
0
def test_count_star():
    """
    Test the count aggregation
    :return:
    """
    my_frame = query("select count(*) from forest_fires")
    pandas_frame = FOREST_FIRES.copy()
    pandas_frame = (pandas_frame[[
        "month"
    ]].count().to_frame("_col0").reset_index(drop=True))
    tm.assert_frame_equal(pandas_frame, my_frame)
Beispiel #27
0
def test_using_math():
    """
    Test the mathematical operations and order of operations
    :return:
    """
    my_frame, plan = query(
        "select temp, 1 + 2 * 3 as my_number from forest_fires",
        show_execution_plan=True,
    )

    assert plan == "FOREST_FIRES.loc[:, ['temp']].assign(my_number=1 + 2 * 3)"
Beispiel #28
0
def test_distinct():
    """
    Test use of the distinct keyword
    :return:
    """
    my_frame, plan = query(
        "select distinct area, rain from forest_fires", show_execution_plan=True
    )
    assert (
        plan == "FOREST_FIRES.loc[:, ['area', 'rain']].drop_duplicates("
        "keep='first', inplace=True)"
    )
Beispiel #29
0
def test_set_string_value_as_column_value():
    """
    Select a string like 'Yes' as a column value
    :return:
    """
    my_frame, plan = query(
        """
    select wind, 'yes' as wind_yes from forest_fires""",
        show_execution_plan=True,
    )

    assert plan == "FOREST_FIRES.loc[:, ['wind']].assign(wind_yes='yes', )"
Beispiel #30
0
def test_min():
    """
    Test the min
    :return:
    """
    my_frame, plan = query(
        "select min(temp) from forest_fires", show_execution_plan=True
    )
    assert (
        plan == "FOREST_FIRES.loc[:, ['temp']].assign(__=1).groupby(['__']).agg("
        "**{'_col0': ('temp', 'min')}).reset_index(drop=True)"
    )