コード例 #1
0
    def test_processing_time_offsets_cache(self):
        """
        Ensure that time_offsets can generate the correct query
        """
        self.login(username="******")
        payload = get_query_context("birth_names")
        payload["queries"][0]["metrics"] = ["sum__num"]
        # should process empty dateframe correctly
        # due to "name" is random generated, each time_offset slice will be empty
        payload["queries"][0]["groupby"] = ["name"]
        payload["queries"][0]["is_timeseries"] = True
        payload["queries"][0]["timeseries_limit"] = 5
        payload["queries"][0]["time_offsets"] = []
        payload["queries"][0]["time_range"] = "1990 : 1991"
        payload["queries"][0]["granularity"] = "ds"
        payload["queries"][0]["extras"]["time_grain_sqla"] = "P1Y"
        query_context = ChartDataQueryContextSchema().load(payload)
        query_object = query_context.queries[0]
        query_result = query_context.get_query_result(query_object)
        # get main query dataframe
        df = query_result.df

        payload["queries"][0]["time_offsets"] = ["1 year ago", "1 year later"]
        query_context = ChartDataQueryContextSchema().load(payload)
        query_object = query_context.queries[0]
        # query without cache
        query_context.processing_time_offsets(df, query_object)
        # query with cache
        rv = query_context.processing_time_offsets(df, query_object)
        cache_keys = rv["cache_keys"]
        cache_keys__1_year_ago = cache_keys[0]
        cache_keys__1_year_later = cache_keys[1]
        self.assertIsNotNone(cache_keys__1_year_ago)
        self.assertIsNotNone(cache_keys__1_year_later)
        self.assertNotEqual(cache_keys__1_year_ago, cache_keys__1_year_later)

        # swap offsets
        payload["queries"][0]["time_offsets"] = ["1 year later", "1 year ago"]
        query_context = ChartDataQueryContextSchema().load(payload)
        query_object = query_context.queries[0]
        rv = query_context.processing_time_offsets(df, query_object)
        cache_keys = rv["cache_keys"]
        self.assertEqual(cache_keys__1_year_ago, cache_keys[1])
        self.assertEqual(cache_keys__1_year_later, cache_keys[0])

        # remove all offsets
        payload["queries"][0]["time_offsets"] = []
        query_context = ChartDataQueryContextSchema().load(payload)
        query_object = query_context.queries[0]
        rv = query_context.processing_time_offsets(
            df,
            query_object,
        )
        self.assertIs(rv["df"], df)
        self.assertEqual(rv["queries"], [])
        self.assertEqual(rv["cache_keys"], [])
コード例 #2
0
    def test_time_offsets_sql(self):
        payload = get_query_context("birth_names")
        payload["queries"][0]["metrics"] = ["sum__num"]
        payload["queries"][0]["groupby"] = ["state"]
        payload["queries"][0]["is_timeseries"] = True
        payload["queries"][0]["timeseries_limit"] = 5
        payload["queries"][0]["time_offsets"] = []
        payload["queries"][0]["time_range"] = "1980 : 1991"
        payload["queries"][0]["granularity"] = "ds"
        payload["queries"][0]["extras"]["time_grain_sqla"] = "P1Y"
        query_context = ChartDataQueryContextSchema().load(payload)
        query_object = query_context.queries[0]
        query_result = query_context.get_query_result(query_object)
        # get main query dataframe
        df = query_result.df

        # set time_offsets to query_object
        payload["queries"][0]["time_offsets"] = ["3 years ago", "3 years later"]
        query_context = ChartDataQueryContextSchema().load(payload)
        query_object = query_context.queries[0]
        time_offsets_obj = query_context.processing_time_offsets(df, query_object)
        query_from_1977_to_1988 = time_offsets_obj["queries"][0]
        query_from_1983_to_1994 = time_offsets_obj["queries"][1]

        # should generate expected date range in sql
        assert "1977-01-01" in query_from_1977_to_1988
        assert "1988-01-01" in query_from_1977_to_1988
        assert "1983-01-01" in query_from_1983_to_1994
        assert "1994-01-01" in query_from_1983_to_1994
コード例 #3
0
    def test_time_offsets_accuracy(self):
        payload = get_query_context("birth_names")
        payload["queries"][0]["metrics"] = ["sum__num"]
        payload["queries"][0]["groupby"] = ["state"]
        payload["queries"][0]["is_timeseries"] = True
        payload["queries"][0]["timeseries_limit"] = 5
        payload["queries"][0]["time_offsets"] = []
        payload["queries"][0]["time_range"] = "1980 : 1991"
        payload["queries"][0]["granularity"] = "ds"
        payload["queries"][0]["extras"]["time_grain_sqla"] = "P1Y"
        query_context = ChartDataQueryContextSchema().load(payload)
        query_object = query_context.queries[0]
        query_result = query_context.get_query_result(query_object)
        # get main query dataframe
        df = query_result.df

        # set time_offsets to query_object
        payload["queries"][0]["time_offsets"] = [
            "3 years ago", "3 years later"
        ]
        query_context = ChartDataQueryContextSchema().load(payload)
        query_object = query_context.queries[0]
        time_offsets_obj = query_context.processing_time_offsets(
            df, query_object)
        df_with_offsets = time_offsets_obj["df"]
        df_with_offsets = df_with_offsets.set_index(["__timestamp", "state"])

        # should get correct data when apply "3 years ago"
        payload["queries"][0]["time_offsets"] = []
        payload["queries"][0]["time_range"] = "1977 : 1988"
        query_context = ChartDataQueryContextSchema().load(payload)
        query_object = query_context.queries[0]
        query_result = query_context.get_query_result(query_object)
        # get df for "3 years ago"
        df_3_years_ago = query_result.df
        df_3_years_ago["__timestamp"] = df_3_years_ago[
            "__timestamp"] + DateOffset(years=3)
        df_3_years_ago = df_3_years_ago.set_index(["__timestamp", "state"])
        for index, row in df_with_offsets.iterrows():
            if index in df_3_years_ago.index:
                assert (row["sum__num__3 years ago"] ==
                        df_3_years_ago.loc[index]["sum__num"])

        # should get correct data when apply "3 years later"
        payload["queries"][0]["time_offsets"] = []
        payload["queries"][0]["time_range"] = "1983 : 1994"
        query_context = ChartDataQueryContextSchema().load(payload)
        query_object = query_context.queries[0]
        query_result = query_context.get_query_result(query_object)
        # get df for "3 years later"
        df_3_years_later = query_result.df
        df_3_years_later["__timestamp"] = df_3_years_later[
            "__timestamp"] - DateOffset(years=3)
        df_3_years_later = df_3_years_later.set_index(["__timestamp", "state"])
        for index, row in df_with_offsets.iterrows():
            if index in df_3_years_later.index:
                assert (row["sum__num__3 years later"] ==
                        df_3_years_later.loc[index]["sum__num"])