def get_query(self): """ Returns a string representing an SQL query. The string will point to the database cache of this query if it exists. Returns ------- str SQL query string. """ try: table_name = self.fully_qualified_table_name schema, name = table_name.split(".") state_machine = QueryStateMachine(get_redis(), self.query_id, get_db().conn_id) state_machine.wait_until_complete() if state_machine.is_completed and get_db().has_table(schema=schema, name=name): try: touch_cache(get_db(), self.query_id) except ValueError: pass # Cache record not written yet, which can happen for Models # which will call through to this method from their `_make_query` method while writing metadata. # In that scenario, the table _is_ written, but won't be visible from the connection touch_cache uses # as the cache metadata transaction isn't complete! return "SELECT * FROM {}".format(table_name) except NotImplementedError: pass return self._make_query()
def test_touch_cache_record_for_table(flowmachine_connect): """ Touching a cache record for a table should update access count and last accessed but not touch score, or counter. """ table = Table("events.calls_20160101") flowmachine_connect.engine.execute( f"UPDATE cache.cached SET compute_time = 1 WHERE query_id=%s", table.query_id ) # Compute time for tables is zero, so set to 1 to avoid zeroing out assert 0 == get_score(flowmachine_connect, table.query_id) assert (1 == flowmachine_connect.fetch( f"SELECT access_count FROM cache.cached WHERE query_id='{table.query_id}'" )[0][0]) accessed_at = flowmachine_connect.fetch( f"SELECT last_accessed FROM cache.cached WHERE query_id='{table.query_id}'" )[0][0] touch_cache(flowmachine_connect, table.query_id) assert 0 == get_score(flowmachine_connect, table.query_id) assert (2 == flowmachine_connect.fetch( f"SELECT access_count FROM cache.cached WHERE query_id='{table.query_id}'" )[0][0]) # No cache touch should be recorded assert (2 == flowmachine_connect.fetch( "SELECT nextval('cache.cache_touches');")[0][0]) assert (accessed_at < flowmachine_connect.fetch( f"SELECT last_accessed FROM cache.cached WHERE query_id='{table.query_id}'" )[0][0])
def test_touch_cache_record_for_query(flowmachine_connect): """ Touching a cache record for a query should update access count, last accessed, & counter. """ table = daily_location("2016-01-01").store().result() assert ( 1 == get_db().fetch( f"SELECT access_count FROM cache.cached WHERE query_id='{table.query_id}'" )[0][0] ) accessed_at = get_db().fetch( f"SELECT last_accessed FROM cache.cached WHERE query_id='{table.query_id}'" )[0][0] touch_cache(get_db(), table.query_id) assert ( 2 == get_db().fetch( f"SELECT access_count FROM cache.cached WHERE query_id='{table.query_id}'" )[0][0] ) # Two cache touches should have been recorded assert 4 == get_db().fetch("SELECT nextval('cache.cache_touches');")[0][0] assert ( accessed_at < get_db().fetch( f"SELECT last_accessed FROM cache.cached WHERE query_id='{table.query_id}'" )[0][0] )
def test_cache_miss_value_error_rescore(): """ ValueError should be raised if we try to rescore something not in cache. """ connection_mock = Mock() connection_mock.fetch.return_value = [] with pytest.raises(ValueError): touch_cache(connection_mock, "NOT_IN_CACHE")
def test_scoring(flowmachine_connect): """ Test that score updating algorithm is correct by comparing to cachey as reference implementation """ dl = daily_location("2016-01-01").store().result() dl_time = get_compute_time(get_db(), dl.query_id) dl_size = get_size_of_table(get_db(), dl.table_name, "cache") initial_score = get_score(get_db(), dl.query_id) cachey_scorer = Scorer(halflife=1000.0) cache_score = cachey_scorer.touch("dl", dl_time / dl_size) assert cache_score == pytest.approx(initial_score) # Touch again new_score = touch_cache(get_db(), dl.query_id) updated_cache_score = cachey_scorer.touch("dl") assert updated_cache_score == pytest.approx(new_score) # Add another unrelated cache record, which should have a higher initial score dl_2 = daily_location("2016-01-02").store().result() dl_time = get_compute_time(get_db(), dl_2.query_id) dl_size = get_size_of_table(get_db(), dl_2.table_name, "cache") cache_score = cachey_scorer.touch("dl_2", dl_time / dl_size) assert cache_score == pytest.approx(get_score(get_db(), dl_2.query_id))