def test_in_operator_expression_numerical(): """ Test using in operator in a sql query :return: """ my_frame = query(""" select * from forest_fires where X in (5, 9) """) pandas_frame = FOREST_FIRES.copy() pandas_frame = pandas_frame[(pandas_frame["X"]).isin( (5, 9))].reset_index(drop=True) tm.assert_frame_equal(pandas_frame, my_frame)
def test_not_in_operator(): """ Test using in operator in a sql query :return: """ my_frame = query(""" select * from forest_fires where day not in ('fri', 'sun') """) pandas_frame = FOREST_FIRES.copy() pandas_frame = pandas_frame[~pandas_frame.day.isin( ("fri", "sun"))].reset_index(drop=True) tm.assert_frame_equal(pandas_frame, my_frame)
def test_operations_between_columns_and_numbers(): """ Tests operations between columns :return: """ my_frame = query( """select temp * wind + rain / dmc + 37 from forest_fires""") pandas_frame = FOREST_FIRES.copy() pandas_frame["_col0"] = (pandas_frame["temp"] * pandas_frame["wind"] + pandas_frame["rain"] / pandas_frame["DMC"] + 37) pandas_frame = pandas_frame["_col0"].to_frame() tm.assert_frame_equal(pandas_frame, my_frame)
def test_rank_statement_one_column(): """ Test rank statement :return: """ my_frame = query(""" select wind, rank() over(order by wind) as wind_rank from forest_fires """) pandas_frame = FOREST_FIRES.copy()[["wind"]] pandas_frame["wind_rank"] = pandas_frame.wind.rank( method="min").astype("int") tm.assert_frame_equal(pandas_frame, my_frame)
def test_order_by(): """ Test order by clause :return: """ my_frame = query( """select * from forest_fires order by temp desc, wind asc, area""") pandas_frame = FOREST_FIRES.copy() pandas_frame.sort_values(by=["temp", "wind", "area"], ascending=[0, 1, 1], inplace=True) pandas_frame.reset_index(drop=True, inplace=True) tm.assert_frame_equal(pandas_frame, my_frame)
def test_select_star_from_multiple_tables(): """ Test selecting from two different tables :return: """ my_frame = query("""select * from forest_fires, digimon_mon_list""") forest_fires = FOREST_FIRES.copy() digimon_mon_list_new = DIGIMON_MON_LIST.copy() forest_fires["_temp_id"] = 1 digimon_mon_list_new["_temp_id"] = 1 pandas_frame = merge(forest_fires, digimon_mon_list_new, on="_temp_id").drop(columns=["_temp_id"]) tm.assert_frame_equal(pandas_frame, my_frame)
def test_having_one_condition(): """ Test having clause :return: """ my_frame = query("select min(temp) from forest_fires having min(temp) > 2") pandas_frame = FOREST_FIRES.copy() pandas_frame["_col0"] = FOREST_FIRES["temp"] aggregated_df = pandas_frame.aggregate({ "_col0": "min" }).to_frame().transpose() pandas_frame = aggregated_df[aggregated_df["_col0"] > 2] tm.assert_frame_equal(pandas_frame, my_frame)
def test_intersect_distinct(): """ Test union distinct in queries :return: """ my_frame = query(""" select * from forest_fires order by wind desc limit 5 intersect distinct select * from forest_fires order by wind desc limit 3 """) pandas_frame1 = (FOREST_FIRES.copy().sort_values(by=["wind"], ascending=[False ]).head(5)) pandas_frame2 = (FOREST_FIRES.copy().sort_values(by=["wind"], ascending=[False ]).head(3)) pandas_frame = merge( left=pandas_frame1, right=pandas_frame2, how="inner", on=list(pandas_frame1.columns), ) tm.assert_frame_equal(pandas_frame, my_frame)
def test_having_with_group_by(): """ Test having clause :return: """ my_frame = query( "select day, min(temp) from forest_fires group by day having min(temp) > 5" ) pandas_frame = FOREST_FIRES.copy() pandas_frame["_col0"] = FOREST_FIRES["temp"] pandas_frame = (pandas_frame[["day", "_col0"]].groupby("day").aggregate( {"_col0": np.min})) pandas_frame = pandas_frame[pandas_frame["_col0"] > 5].reset_index() tm.assert_frame_equal(pandas_frame, my_frame)
def test_case_statement_with_same_conditions(): """ Test using case statements :return: """ my_frame = query(""" select case when wind > 5 then month when wind > 5 then 'mid' else day end from forest_fires """) pandas_frame = FOREST_FIRES.copy()[["wind"]] pandas_frame.loc[pandas_frame.wind > 5, "_col0"] = FOREST_FIRES["month"] pandas_frame.loc[~(pandas_frame.wind > 5), "_col0"] = FOREST_FIRES["day"] pandas_frame.drop(columns=["wind"], inplace=True) tm.assert_frame_equal(pandas_frame, my_frame)
def test_timestamps(): """ Select now() as date :return: """ with freeze_time(datetime.now()): my_frame = query(""" select wind, now(), today(), timestamp('2019-01-31', '23:20:32') from forest_fires""") pandas_frame = FOREST_FIRES.copy()[["wind"]] pandas_frame["now()"] = datetime.now() pandas_frame["today()"] = date.today() pandas_frame["_literal0"] = datetime(2019, 1, 31, 23, 20, 32) tm.assert_frame_equal(pandas_frame, my_frame)
def test_between_operator(): """ Test using between operator :return: """ my_frame = query(""" select * from forest_fires where wind between 5 and 6 """) pandas_frame = FOREST_FIRES.copy() pandas_frame = pandas_frame[(pandas_frame.wind >= 5) & (pandas_frame.wind <= 6)].reset_index( drop=True) tm.assert_frame_equal(pandas_frame, my_frame)
def test_rank_over_partition_by(): """ Test rank partition by statement :return: """ my_frame = query(""" select wind, rain, month, day, rank() over(partition by day order by wind desc, rain asc, month) as rank from forest_fires """) pandas_frame = FOREST_FIRES.copy()[["wind", "rain", "month", "day"]] partition_slice = 4 rank_map = {} partition_rank_counter = {} partition_rank_offset = {} pandas_frame.sort_values(by=["wind", "rain", "month"], ascending=[False, True, True], inplace=True) pandas_frame.reset_index(inplace=True) pandas_frame["rank"] = 0 rank_series = pandas_frame["rank"].copy() for row_num, series_tuple in enumerate(pandas_frame.iterrows()): row = series_tuple[1] row_list = list(row)[1:partition_slice] partition_list = list(row)[partition_slice:5] key = str(row_list) partition_key = str(partition_list) if rank_map.get(partition_key): if rank_map[partition_key].get(key): partition_rank_counter[partition_key] += 1 rank = rank_map[partition_key][key] else: partition_rank_counter[partition_key] += 1 rank = (partition_rank_counter[partition_key] + partition_rank_offset[partition_key]) rank_map[partition_key][key] = rank else: rank = 1 rank_map[partition_key] = {} partition_rank_counter[partition_key] = 1 partition_rank_offset[partition_key] = 0 rank_map[partition_key][key] = rank rank_series[row_num] = rank pandas_frame["rank"] = rank_series pandas_frame.sort_values(by="index", ascending=True, inplace=True) pandas_frame.drop(columns=["index"], inplace=True) pandas_frame.reset_index(drop=True, inplace=True) tm.assert_frame_equal(pandas_frame, my_frame)
def test_case_statement_w_no_name(): """ Test using case statements :return: """ my_frame = query(""" select case when wind > 5 then 'strong' when wind = 5 then 'mid' else 'weak' end from forest_fires """) pandas_frame = FOREST_FIRES.copy()[["wind"]] pandas_frame.loc[pandas_frame.wind > 5, "_col0"] = "strong" pandas_frame.loc[pandas_frame.wind == 5, "_col0"] = "mid" pandas_frame.loc[~((pandas_frame.wind == 5) | (pandas_frame.wind > 5)), "_col0"] = "weak" pandas_frame.drop(columns=["wind"], inplace=True) tm.assert_frame_equal(pandas_frame, my_frame)
def test_agg_w_groupby(): """ Test using aggregates and group by together :return: """ my_frame = query( "select day, month, min(temp), max(temp) from forest_fires group by day, month" ) pandas_frame = FOREST_FIRES.copy() pandas_frame["_col0"] = pandas_frame.temp pandas_frame["_col1"] = pandas_frame.temp pandas_frame = (pandas_frame.groupby(["day", "month"]).aggregate({ "_col0": np.min, "_col1": np.max }).reset_index()) tm.assert_frame_equal(pandas_frame, my_frame)
def test_boolean_order_of_operations_with_parens(): """ Test boolean order of operations with parentheses :return: """ my_frame = query("select * from forest_fires " "where (month = 'oct' and day = 'fri') or " "(month = 'nov' and day = 'tue')") pandas_frame = FOREST_FIRES.copy() pandas_frame = pandas_frame[((pandas_frame["month"] == "oct") & (pandas_frame["day"] == "fri")) | ((pandas_frame["month"] == "nov") & (pandas_frame["day"] == "tue"))].reset_index( drop=True) tm.assert_frame_equal(pandas_frame, my_frame)
def test_all_boolean_ops_clause(): """ Test where clause :return: """ my_frame = query( """select * from forest_fires where month = 'mar' and temp > 8 and rain >= 0 and area != 0 and dc < 100 and ffmc <= 90.1 """) pandas_frame = FOREST_FIRES.copy() pandas_frame = pandas_frame[(pandas_frame.month == "mar") & (pandas_frame.temp > 8.0) & (pandas_frame.rain >= 0) & (pandas_frame.area != 0) & (pandas_frame.DC < 100) & (pandas_frame.FFMC <= 90.1)].reset_index( drop=True) tm.assert_frame_equal(pandas_frame, my_frame)
def test_having_multiple_conditions(): """ Test having clause :return: """ my_frame = query( "select min(temp) from forest_fires having min(temp) > 2 and " "max(dc) < 200 or max(dc) > 1000") pandas_frame = FOREST_FIRES.copy() pandas_frame["_col0"] = FOREST_FIRES["temp"] aggregated_df = pandas_frame.aggregate({ "_col0": "min" }).to_frame().transpose() max_dc_df = (FOREST_FIRES["DC"].aggregate({ "DC": "max" }).to_frame().reset_index(drop=True)) pandas_frame = aggregated_df[(aggregated_df["_col0"] > 2) & (max_dc_df["DC"] < 200) | (max_dc_df["DC"] > 1000)] tm.assert_frame_equal(pandas_frame, my_frame)
def test_multiple_aggs(): """ Test multiple aggregations :return: """ my_frame = query( "select min(temp), max(temp), avg(temp), max(wind) from forest_fires") pandas_frame = FOREST_FIRES.copy() pandas_frame["_col0"] = FOREST_FIRES.temp.copy() pandas_frame["_col1"] = FOREST_FIRES.temp.copy() pandas_frame["_col2"] = FOREST_FIRES.temp.copy() pandas_frame = pandas_frame.agg({ "_col0": np.min, "_col1": np.max, "_col2": np.mean, "wind": np.max }) pandas_frame.rename({"wind": "_col3"}, inplace=True) pandas_frame = pandas_frame.to_frame().transpose() tm.assert_frame_equal(pandas_frame, my_frame)
def test_select_star_from_multiple_tables(): """ Test selecting from two different tables :return: """ my_frame = query("""select * from forest_fires, digimon_mon_list""") forest_fires = FOREST_FIRES.copy() digimon_mon_list = DIGIMON_MON_LIST.copy() pandas_frame = merge(forest_fires.assign(__=1), digimon_mon_list.assign(__=1), on="__", how="inner") del pandas_frame["__"] renamed = {} for column in pandas_frame.columns: if "_x" in column: renamed[column] = "forest_fires." + column.replace("_x", "") if "_y" in column: renamed[column] = "digimon_mon_list." + column.replace("_y", "") pandas_frame.rename(columns=renamed, inplace=True) tm.assert_frame_equal(pandas_frame, my_frame)
def pandas_frame2_for_set_ops(): return (FOREST_FIRES.copy().sort_values(by=["wind"], ascending=[True], kind="mergesort").head(5))