def test_time_offsets_accuracy(self): payload = get_query_context("birth_names") payload["queries"][0]["metrics"] = ["sum__num"] payload["queries"][0]["groupby"] = ["state"] payload["queries"][0]["is_timeseries"] = True payload["queries"][0]["timeseries_limit"] = 5 payload["queries"][0]["time_offsets"] = [] payload["queries"][0]["time_range"] = "1980 : 1991" payload["queries"][0]["granularity"] = "ds" payload["queries"][0]["extras"]["time_grain_sqla"] = "P1Y" query_context = ChartDataQueryContextSchema().load(payload) query_object = query_context.queries[0] query_result = query_context.get_query_result(query_object) # get main query dataframe df = query_result.df # set time_offsets to query_object payload["queries"][0]["time_offsets"] = [ "3 years ago", "3 years later" ] query_context = ChartDataQueryContextSchema().load(payload) query_object = query_context.queries[0] time_offsets_obj = query_context.processing_time_offsets( df, query_object) df_with_offsets = time_offsets_obj["df"] df_with_offsets = df_with_offsets.set_index(["__timestamp", "state"]) # should get correct data when apply "3 years ago" payload["queries"][0]["time_offsets"] = [] payload["queries"][0]["time_range"] = "1977 : 1988" query_context = ChartDataQueryContextSchema().load(payload) query_object = query_context.queries[0] query_result = query_context.get_query_result(query_object) # get df for "3 years ago" df_3_years_ago = query_result.df df_3_years_ago["__timestamp"] = df_3_years_ago[ "__timestamp"] + DateOffset(years=3) df_3_years_ago = df_3_years_ago.set_index(["__timestamp", "state"]) for index, row in df_with_offsets.iterrows(): if index in df_3_years_ago.index: assert (row["sum__num__3 years ago"] == df_3_years_ago.loc[index]["sum__num"]) # should get correct data when apply "3 years later" payload["queries"][0]["time_offsets"] = [] payload["queries"][0]["time_range"] = "1983 : 1994" query_context = ChartDataQueryContextSchema().load(payload) query_object = query_context.queries[0] query_result = query_context.get_query_result(query_object) # get df for "3 years later" df_3_years_later = query_result.df df_3_years_later["__timestamp"] = df_3_years_later[ "__timestamp"] - DateOffset(years=3) df_3_years_later = df_3_years_later.set_index(["__timestamp", "state"]) for index, row in df_with_offsets.iterrows(): if index in df_3_years_later.index: assert (row["sum__num__3 years later"] == df_3_years_later.loc[index]["sum__num"])
def test_time_offsets_sql(self): payload = get_query_context("birth_names") payload["queries"][0]["metrics"] = ["sum__num"] payload["queries"][0]["groupby"] = ["state"] payload["queries"][0]["is_timeseries"] = True payload["queries"][0]["timeseries_limit"] = 5 payload["queries"][0]["time_offsets"] = [] payload["queries"][0]["time_range"] = "1980 : 1991" payload["queries"][0]["granularity"] = "ds" payload["queries"][0]["extras"]["time_grain_sqla"] = "P1Y" query_context = ChartDataQueryContextSchema().load(payload) query_object = query_context.queries[0] query_result = query_context.get_query_result(query_object) # get main query dataframe df = query_result.df # set time_offsets to query_object payload["queries"][0]["time_offsets"] = ["3 years ago", "3 years later"] query_context = ChartDataQueryContextSchema().load(payload) query_object = query_context.queries[0] time_offsets_obj = query_context.processing_time_offsets(df, query_object) query_from_1977_to_1988 = time_offsets_obj["queries"][0] query_from_1983_to_1994 = time_offsets_obj["queries"][1] # should generate expected date range in sql assert "1977-01-01" in query_from_1977_to_1988 assert "1988-01-01" in query_from_1977_to_1988 assert "1983-01-01" in query_from_1983_to_1994 assert "1994-01-01" in query_from_1983_to_1994
def test_processing_time_offsets_cache(self): """ Ensure that time_offsets can generate the correct query """ self.login(username="******") payload = get_query_context("birth_names") payload["queries"][0]["metrics"] = ["sum__num"] # should process empty dateframe correctly # due to "name" is random generated, each time_offset slice will be empty payload["queries"][0]["groupby"] = ["name"] payload["queries"][0]["is_timeseries"] = True payload["queries"][0]["timeseries_limit"] = 5 payload["queries"][0]["time_offsets"] = [] payload["queries"][0]["time_range"] = "1990 : 1991" payload["queries"][0]["granularity"] = "ds" payload["queries"][0]["extras"]["time_grain_sqla"] = "P1Y" query_context = ChartDataQueryContextSchema().load(payload) query_object = query_context.queries[0] query_result = query_context.get_query_result(query_object) # get main query dataframe df = query_result.df payload["queries"][0]["time_offsets"] = ["1 year ago", "1 year later"] query_context = ChartDataQueryContextSchema().load(payload) query_object = query_context.queries[0] # query without cache query_context.processing_time_offsets(df, query_object) # query with cache rv = query_context.processing_time_offsets(df, query_object) cache_keys = rv["cache_keys"] cache_keys__1_year_ago = cache_keys[0] cache_keys__1_year_later = cache_keys[1] self.assertIsNotNone(cache_keys__1_year_ago) self.assertIsNotNone(cache_keys__1_year_later) self.assertNotEqual(cache_keys__1_year_ago, cache_keys__1_year_later) # swap offsets payload["queries"][0]["time_offsets"] = ["1 year later", "1 year ago"] query_context = ChartDataQueryContextSchema().load(payload) query_object = query_context.queries[0] rv = query_context.processing_time_offsets(df, query_object) cache_keys = rv["cache_keys"] self.assertEqual(cache_keys__1_year_ago, cache_keys[1]) self.assertEqual(cache_keys__1_year_later, cache_keys[0]) # remove all offsets payload["queries"][0]["time_offsets"] = [] query_context = ChartDataQueryContextSchema().load(payload) query_object = query_context.queries[0] rv = query_context.processing_time_offsets( df, query_object, ) self.assertIs(rv["df"], df) self.assertEqual(rv["queries"], []) self.assertEqual(rv["cache_keys"], [])