def test_for_non_existent_table(): """ Check that exception is raised if table does not exist :return: """ try: query("select * from a_table_that_is_not_here") except Exception as err: assert isinstance(err, DataFrameDoesNotExist)
def test_for_valid_query(): """ Test that exception is raised for invalid query :return: """ sql = "hello world!" try: query(sql) except InvalidQueryException as err: assert isinstance(err, InvalidQueryException)
def test_rank_statement_many_columns(): """ Test rank statement :return: """ my_frame = query(""" select wind, rain, month, rank() over(order by wind desc, rain asc, month) as rank from forest_fires """) pandas_frame = FOREST_FIRES.copy()[["wind", "rain", "month"]] pandas_frame.sort_values(by=["wind", "rain", "month"], ascending=[False, True, True], inplace=True) pandas_frame.reset_index(inplace=True) rank_map = {} rank_counter = 1 rank_offset = 0 pandas_frame["rank"] = 0 rank_series = pandas_frame["rank"].copy() for row_num, row in enumerate(pandas_frame.iterrows()): key = "".join(map(str, list(list(row)[1])[1:4])) if rank_map.get(key): rank_offset += 1 rank = rank_map[key] else: rank = rank_counter + rank_offset rank_map[key] = rank rank_counter += 1 rank_series[row_num] = rank pandas_frame["rank"] = rank_series pandas_frame.sort_values(by="index", ascending=True, inplace=True) pandas_frame.drop(columns=["index"], inplace=True) pandas_frame.reset_index(drop=True, inplace=True) tm.assert_frame_equal(pandas_frame, my_frame)
def test_case_statement_w_name(): """ Test using case statements :return: """ my_frame, plan = query( """ select case when wind > 5 then 'strong' when wind = 5 then 'mid' else 'weak' end as wind_strength from forest_fires """, show_execution_plan=True, ) assert ( plan == "FOREST_FIRES.loc[:, []].assign(wind_strength=NONE_SERIES" ".mask(((FOREST_FIRES['wind']>5) ^ (FALSE_SERIES)) & " "(FOREST_FIRES['wind']>5), 'strong')" ".mask(((FOREST_FIRES['wind']==5) ^ ((FALSE_SERIES) | " "(FOREST_FIRES['wind']>5))) & (FOREST_FIRES['wind']==5), " "'mid').where(((FALSE_SERIES) | (FOREST_FIRES['wind']>5)) " "| (FOREST_FIRES['wind']==5), 'weak'))" )
def test_type_conversion(): """ Tests sql as statements :return: """ my_frame = query("""select cast(temp as int64), cast(RH as float64) my_rh, wind, rain, area, cast(2.0 as int64) my_int, cast(3 as float64) as my_float, cast(7 as object) as my_object, cast(0 as bool) as my_bool from forest_fires""") fire_frame = FOREST_FIRES[["temp", "RH", "wind", "rain", "area"]].rename(columns={"RH": "my_rh"}) fire_frame["my_int"] = 2 fire_frame["my_float"] = 3 fire_frame["my_object"] = str(7) fire_frame["my_bool"] = 0 pandas_frame = fire_frame.astype({ "temp": "int64", "my_rh": "float64", "my_int": "int64", "my_float": "float64", "my_bool": "bool", }) tm.assert_frame_equal(pandas_frame, my_frame)
def test_case_insensitivity(): """ Tests to ensure that the sql is case insensitive for table names :return: """ frame, plan = query("select * from FOREST_fires", show_execution_plan=True) assert plan == "FOREST_FIRES"
def test_select_columns_from_two_tables_with_same_column_name(): """ Test selecting tables :return: """ my_frame = query( """select * from forest_fires table1, forest_fires table2""") table1 = FOREST_FIRES.copy() table2 = FOREST_FIRES.copy() pandas_frame = merge(table1.assign(__=1), table2.assign(__=1), on="__", how="inner", copy=False) del pandas_frame["__"] renamed = {} for column in pandas_frame.columns: if "_x" in column: renamed[column] = "table1." + column.replace("_x", "") if "_y" in column: renamed[column] = "table2." + column.replace("_y", "") pandas_frame.rename(columns=renamed, inplace=True) for column in my_frame.columns: tm.assert_series_equal(pandas_frame[column], my_frame[column]) tm.assert_frame_equal(my_frame, pandas_frame)
def test_sql_data_types(): """ Tests sql data types :return: """ my_frame = query(""" select cast(avocado_id as object) as avocado_id_object, cast(avocado_id as int16) as avocado_id_int16, cast(avocado_id as smallint) as avocado_id_smallint, cast(avocado_id as int32) as avocado_id_int32, cast(avocado_id as int) as avocado_id_int, cast(avocado_id as int64) as avocado_id_int64, cast(avocado_id as bigint) as avocado_id_bigint, cast(avocado_id as float) as avocado_id_float, cast(avocado_id as float16) as avocado_id_float16, cast(avocado_id as float32) as avocado_id_float32, cast(avocado_id as float64) as avocado_id_float64, cast(avocado_id as bool) as avocado_id_bool, cast(avocado_id as category) as avocado_id_category, cast(date as datetime64) as date, cast(date as timestamp) as time, cast(region as varchar) as region_varchar, cast(region as string) as region_string from avocado """) pandas_frame = AVOCADO.copy()[["avocado_id", "Date", "region"]] pandas_frame["avocado_id_object"] = pandas_frame["avocado_id"].astype( "object") pandas_frame["avocado_id_int16"] = pandas_frame["avocado_id"].astype( "int16") pandas_frame["avocado_id_smallint"] = pandas_frame["avocado_id"].astype( "int16") pandas_frame["avocado_id_int32"] = pandas_frame["avocado_id"].astype( "int32") pandas_frame["avocado_id_int"] = pandas_frame["avocado_id"].astype("int32") pandas_frame["avocado_id_int64"] = pandas_frame["avocado_id"].astype( "int64") pandas_frame["avocado_id_bigint"] = pandas_frame["avocado_id"].astype( "int64") pandas_frame["avocado_id_float"] = pandas_frame["avocado_id"].astype( "float") pandas_frame["avocado_id_float16"] = pandas_frame["avocado_id"].astype( "float16") pandas_frame["avocado_id_float32"] = pandas_frame["avocado_id"].astype( "float32") pandas_frame["avocado_id_float64"] = pandas_frame["avocado_id"].astype( "float64") pandas_frame["avocado_id_bool"] = pandas_frame["avocado_id"].astype("bool") pandas_frame["avocado_id_category"] = pandas_frame["avocado_id"].astype( "category") pandas_frame["date"] = pandas_frame["Date"].astype("datetime64") pandas_frame["time"] = pandas_frame["Date"].astype("datetime64") pandas_frame["region_varchar"] = pandas_frame["region"].astype("string") pandas_frame["region_string"] = pandas_frame["region"].astype("string") pandas_frame = pandas_frame.drop(columns=["avocado_id", "Date", "region"]) tm.assert_frame_equal(pandas_frame, my_frame)
def test_limit(): """ Test limit clause :return: """ my_frame = query("""select * from forest_fires limit 10""") pandas_frame = FOREST_FIRES.copy().head(10) tm.assert_frame_equal(pandas_frame, my_frame)
def test_select_star(): """ Tests the simple select * case :return: """ frame, plan = query("select * from forest_fires", show_execution_plan=True) print(plan) assert plan == "FOREST_FIRES"
def test_case_insensitivity(): """ Tests to ensure that the sql is case insensitive for table names :return: """ my_frame = query("select * from FOREST_fires") pandas_frame = FOREST_FIRES tm.assert_frame_equal(pandas_frame, my_frame)
def test_select_star(): """ Tests the simple select * case :return: """ my_frame = query("select * from forest_fires") pandas_frame = FOREST_FIRES tm.assert_frame_equal(pandas_frame, my_frame)
def test_limit(): """ Test limit clause :return: """ my_frame, plan = query( """select * from forest_fires limit 10""", show_execution_plan=True ) assert plan == "FOREST_FIRES.head(10)"
def test_using_math(): """ Test the mathematical operations and order of operations :return: """ my_frame = query("select temp, 1 + 2 * 3 as my_number from forest_fires") pandas_frame = FOREST_FIRES[["temp"]].copy() pandas_frame["my_number"] = 1 + 2 * 3 tm.assert_frame_equal(pandas_frame, my_frame)
def test_subquery(): """ Test ability to perform subqueries :return: """ my_frame = query( "select * from (select area, rain from forest_fires) rain_area") pandas_frame = FOREST_FIRES[["area", "rain"]].copy() tm.assert_frame_equal(pandas_frame, my_frame)
def test_where_clause(): """ Test where clause :return: """ my_frame, plan = query( """select * from forest_fires where month = 'mar'""", show_execution_plan=True ) assert plan == "FOREST_FIRES.loc[FOREST_FIRES['month']=='mar', :]"
def test_maintain_case_in_query(): """ Test nested subqueries :return: """ my_frame, plan = query( """select wind, rh from forest_fires""", show_execution_plan=True ) assert plan == "FOREST_FIRES.loc[:, ['wind', 'RH']].rename(columns={'RH': 'rh'})"
def test_maintain_case_in_query(): """ Test nested subqueries :return: """ my_frame = query("""select wind, rh from forest_fires""") pandas_frame = FOREST_FIRES.copy()[["wind", "RH"]].rename(columns={"RH": "rh"}) tm.assert_frame_equal(pandas_frame, my_frame)
def test_where_clause(): """ Test where clause :return: """ my_frame = query("""select * from forest_fires where month = 'mar'""") pandas_frame = FOREST_FIRES.copy() pandas_frame = pandas_frame[pandas_frame.month == "mar"].reset_index( drop=True) tm.assert_frame_equal(pandas_frame, my_frame)
def test_group_by(): """ Test group by constraint :return: """ my_frame, plan = query( """select month, day from forest_fires group by month, day""", show_execution_plan=True, ) assert plan == "FOREST_FIRES.loc[:, ['month', 'day']].drop_duplicates(keep='first')"
def test_select_specific_fields(): """ Tests selecting specific fields :return: """ my_frame = query( "select temp, RH, wind, rain as water, area from forest_fires") pandas_frame = FOREST_FIRES[["temp", "RH", "wind", "rain", "area"]].rename(columns={"rain": "water"}) tm.assert_frame_equal(pandas_frame, my_frame)
def test_group_by(): """ Test group by constraint :return: """ my_frame = query( """select month, day from forest_fires group by month, day""") pandas_frame = (FOREST_FIRES[["month", "day" ]].drop_duplicates().reset_index(drop=True)) tm.assert_frame_equal(pandas_frame, my_frame)
def test_min(): """ Test the min :return: """ my_frame = query("select min(temp) from forest_fires") pandas_frame = (FOREST_FIRES.agg({ "temp": np.min }).to_frame("_col0").reset_index().drop(columns=["index"])) tm.assert_frame_equal(pandas_frame, my_frame)
def test_set_string_value_as_column_value(): """ Select a string like 'Yes' as a column value :return: """ my_frame = query(""" select wind, 'yes' as wind_yes from forest_fires""") pandas_frame = FOREST_FIRES.copy() pandas_frame["wind_yes"] = "yes" pandas_frame = pandas_frame[["wind", "wind_yes"]] tm.assert_frame_equal(pandas_frame, my_frame)
def test_distinct(): """ Test use of the distinct keyword :return: """ my_frame = query("select distinct area, rain from forest_fires") pandas_frame = FOREST_FIRES[["area", "rain"]].copy() pandas_frame.drop_duplicates(keep="first", inplace=True) pandas_frame.reset_index(inplace=True) pandas_frame.drop(columns="index", inplace=True) tm.assert_frame_equal(pandas_frame, my_frame)
def test_count_star(): """ Test the count aggregation :return: """ my_frame = query("select count(*) from forest_fires") pandas_frame = FOREST_FIRES.copy() pandas_frame = (pandas_frame[[ "month" ]].count().to_frame("_col0").reset_index(drop=True)) tm.assert_frame_equal(pandas_frame, my_frame)
def test_using_math(): """ Test the mathematical operations and order of operations :return: """ my_frame, plan = query( "select temp, 1 + 2 * 3 as my_number from forest_fires", show_execution_plan=True, ) assert plan == "FOREST_FIRES.loc[:, ['temp']].assign(my_number=1 + 2 * 3)"
def test_distinct(): """ Test use of the distinct keyword :return: """ my_frame, plan = query( "select distinct area, rain from forest_fires", show_execution_plan=True ) assert ( plan == "FOREST_FIRES.loc[:, ['area', 'rain']].drop_duplicates(" "keep='first', inplace=True)" )
def test_set_string_value_as_column_value(): """ Select a string like 'Yes' as a column value :return: """ my_frame, plan = query( """ select wind, 'yes' as wind_yes from forest_fires""", show_execution_plan=True, ) assert plan == "FOREST_FIRES.loc[:, ['wind']].assign(wind_yes='yes', )"
def test_min(): """ Test the min :return: """ my_frame, plan = query( "select min(temp) from forest_fires", show_execution_plan=True ) assert ( plan == "FOREST_FIRES.loc[:, ['temp']].assign(__=1).groupby(['__']).agg(" "**{'_col0': ('temp', 'min')}).reset_index(drop=True)" )