def test_get_sql(zmq_port, zmq_host): """ Running 'get_sql' on finished query returns the expected result. """ # # Run daily_location query. # msg = { "action": "run_query", "params": { "query_kind": "spatial_aggregate", "locations": { "query_kind": "daily_location", "date": "2016-01-01", "method": "last", "aggregation_unit": "admin3", "subscriber_subset": None, }, }, "request_id": "DUMMY_ID", } q = RedactedSpatialAggregate(spatial_aggregate=SpatialAggregate( locations=daily_location( date="2016-01-01", method="last", spatial_unit=make_spatial_unit("admin", level=3), table=None, subscriber_subset=None, hours=None, ))) expected_query_id = q.query_id reply = send_zmq_message_and_receive_reply(msg, port=zmq_port, host=zmq_host) # assert reply["status"] in ("executing", "queued", "completed") assert "success" == reply["status"] assert expected_query_id == reply["payload"]["query_id"] # # Wait until the query has finished. # poll_until_done(zmq_port, expected_query_id) # # Get query result. # msg = { "action": "get_sql_for_query_result", "params": { "query_id": expected_query_id }, "request_id": "DUMMY_ID", } reply = send_zmq_message_and_receive_reply(msg, port=zmq_port, host=zmq_host) assert "success" == reply["status"] assert f"SELECT * FROM cache.x{expected_query_id}" == reply["payload"][ "sql"]
def test_get_query_kind(params, zmq_port, zmq_host): """ Running 'get_query_kind' against an existing query_id returns the expected query kind. """ # # Run query. # msg = {"action": "run_query", "params": params, "request_id": "DUMMY_ID"} reply = send_zmq_message_and_receive_reply(msg, port=zmq_port, host=zmq_host) # assert reply["status"] in ("executing", "queued", "completed") assert reply["status"] in ("success") query_id = reply["payload"]["query_id"] # # Wait until the query has finished. # poll_until_done(zmq_port, query_id) # # Get query result. # msg = { "action": "get_query_kind", "params": {"query_id": query_id}, "request_id": "DUMMY_ID", } reply = send_zmq_message_and_receive_reply(msg, port=zmq_port, host=zmq_host) assert "success" == reply["status"] assert query_id == reply["payload"]["query_id"] assert "spatial_aggregate" == reply["payload"]["query_kind"]
def test_poll_existing_query(zmq_port, zmq_host): """ Polling an existing query id returns expected reply. """ expected_query_id = "dummy_query_foobar" msg = { "action": "run_query", "params": { "query_kind": "dummy_query", "dummy_param": "foobar", "aggregation_unit": "admin3", }, "request_id": "DUMMY_ID", } reply = send_zmq_message_and_receive_reply(msg, port=zmq_port, host=zmq_host) expected_reply = { "status": "success", "msg": "", "payload": { "progress": { "eligible": 0, "queued": 0, "running": 0 }, "query_id": expected_query_id, }, } assert expected_reply == reply # Poll until done to ensure we don't send the poll message until the query state has finished updating. poll_until_done(zmq_port, expected_query_id) msg = { "action": "poll_query", "params": { "query_id": expected_query_id }, "request_id": "DUMMY_ID", } reply = send_zmq_message_and_receive_reply(msg, port=zmq_port, host=zmq_host) expected_reply = { "status": "success", "msg": "", "payload": { "query_id": expected_query_id, "query_kind": "dummy_query", "query_state": "completed", "progress": { "eligible": 0, "queued": 0, "running": 0 }, }, } assert expected_reply == reply
async def test_get_query_kind_for_nonexistent_query_id(zmq_port, zmq_host): """ Running 'get_query_kind' on a non-existent query id returns an error. """ # # Try getting query result for nonexistent ID. # msg = { "action": "get_query_kind", "params": { "query_id": "FOOBAR" }, "request_id": "DUMMY_ID", } reply = send_zmq_message_and_receive_reply(msg, port=zmq_port, host=zmq_host) assert { "status": "error", "payload": { "query_id": "FOOBAR", "query_state": "awol" }, "msg": "Unknown query id: 'FOOBAR'", } == reply
def test_run_dfs_metric_total_amount_query(zmq_host, zmq_port): """ Can run dfs metric query and receive successful response including the query_id. """ msg = { "action": "run_query", "params": { "query_kind": "dfs_metric_total_amount", "metric": "commission", "start_date": "2016-01-02", "end_date": "2016-01-05", "aggregation_unit": "admin2", }, "request_id": "DUMMY_ID", } reply = send_zmq_message_and_receive_reply(msg, port=zmq_port, host=zmq_host) q = DFSTotalMetricAmount( metric="commission", start_date="2016-01-02", end_date="2016-01-05", aggregation_unit="admin2", ) expected_query_id = q.query_id assert "success" == reply["status"] assert expected_query_id == reply["payload"]["query_id"] assert ["query_id", "progress"] == list(reply["payload"].keys()) # FIXME: At the moment we have to explicitly wait for all running queries # to finish before finishing the test, otherwise unexpected behaviour may # occur when we reset the cache before the next test # (see https://github.com/Flowminder/FlowKit/issues/1245). poll_until_done(zmq_port, expected_query_id)
def test_get_sql_for_nonexistent_query_id(zmq_port, zmq_host): """ Polling a query with non-existent query id returns expected error. """ # # Try getting query result for nonexistent ID. # msg = { "action": "get_sql_for_query_result", "params": { "query_id": "FOOBAR" }, "request_id": "DUMMY_ID", } reply = send_zmq_message_and_receive_reply(msg, port=zmq_port, host=zmq_host) expected_reply = { "status": "error", "msg": "Unknown query id: 'FOOBAR'", "payload": { "query_id": "FOOBAR", "query_state": "awol" }, } assert expected_reply == reply
def test_get_available_queries(zmq_host, zmq_port): """ Action 'get_available_queries' returns list of available queries. """ msg = {"action": "get_available_queries", "request_id": "DUMMY_ID"} reply = send_zmq_message_and_receive_reply(msg, port=zmq_port, host=zmq_host) expected_reply = { "status": "success", "msg": "", "payload": { "available_queries": [ "dummy_query", "flows", "meaningful_locations_aggregate", "meaningful_locations_between_label_od_matrix", "meaningful_locations_between_dates_od_matrix", "geography", "location_event_counts", "unique_subscriber_counts", "location_introversion", "total_network_objects", "aggregate_network_objects", "dfs_metric_total_amount", "spatial_aggregate", "joined_spatial_aggregate", ] }, } assert expected_reply == reply
def test_send_zmq_message_and_receive_reply(zmq_host, zmq_port): """ Reply from the flowmachine server to the example message stored in `FM_EXAMPLE_MESSAGE` is as expected. """ # Check that FM_EXAMPLE_MESSAGE contains the expected message msg_expected = { "action": "run_query", "params": { "query_kind": "spatial_aggregate", "locations": { "query_kind": "daily_location", "date": "2016-01-01", "method": "last", "aggregation_unit": "admin3", "subscriber_subset": None, }, }, "request_id": "DUMMY_ID", } assert msg_expected == FM_EXAMPLE_MESSAGE # Check that the flowmachine server sends the expected reply reply = send_zmq_message_and_receive_reply(FM_EXAMPLE_MESSAGE, host=zmq_host, port=zmq_port) assert "e39b0d45bc6b46b7700c67cd52f00455" == reply["payload"]["query_id"] # assert reply["status"] in ("executing", "queued", "completed") assert reply["status"] in ("success")
async def test_poll_existing_query(zmq_port, zmq_host): """ Polling a query with non-existent query id returns expected error. """ expected_query_id = "dummy_query_d5d01a68ba6305f24a721b802341335b" msg = { "action": "run_query", "params": { "query_kind": "dummy_query", "dummy_param": "foobar" }, "request_id": "DUMMY_ID", } reply = send_zmq_message_and_receive_reply(msg, port=zmq_port, host=zmq_host) expected_reply = { "status": "success", "msg": "", "payload": { "query_id": expected_query_id }, } assert expected_reply == reply msg = { "action": "poll_query", "params": { "query_id": expected_query_id }, "request_id": "DUMMY_ID", } reply = send_zmq_message_and_receive_reply(msg, port=zmq_port, host=zmq_host) expected_reply = { "status": "success", "msg": "", "payload": { "query_id": expected_query_id, "query_kind": "dummy_query", "query_state": "completed", }, } assert expected_reply == reply
def test_ping_flowmachine_server(zmq_host, zmq_port): """ Sending the 'ping' action to the flowmachine server evokes a successful 'pong' response. """ msg = {"action": "ping", "request_id": "DUMMY_ID"} reply = send_zmq_message_and_receive_reply(msg, port=zmq_port, host=zmq_host) expected_reply = {"status": "success", "msg": "pong", "payload": {}} assert expected_reply == reply
async def test_get_query_params(params, zmq_port, zmq_host): """ Running 'get_query_params' against an existing query_id returns the expected parameters with which the query was run. """ # # Run query. # msg = {"action": "run_query", "params": params, "request_id": "DUMMY_ID"} reply = send_zmq_message_and_receive_reply(msg, port=zmq_port, host=zmq_host) query_id = reply["payload"]["query_id"] # assert reply["status"] in ("executing", "queued", "completed") assert reply["status"] == "success" # # Wait until the query has finished. # poll_until_done(zmq_port, query_id) # # Get query result. # msg = { "action": "get_query_params", "params": { "query_id": query_id }, "request_id": "DUMMY_ID", } reply = send_zmq_message_and_receive_reply(msg, port=zmq_port, host=zmq_host) expected_reply = { "status": "success", "msg": "", "payload": { "query_id": query_id, "query_params": params }, } assert expected_reply == reply
async def test_get_sql(zmq_port, zmq_host): """ Running 'get_sql' on finished query returns the expected result. """ # # Run daily_location query. # msg = { "action": "run_query", "params": { "query_kind": "spatial_aggregate", "locations": { "query_kind": "daily_location", "date": "2016-01-01", "method": "last", "aggregation_unit": "admin3", "subscriber_subset": None, }, }, "request_id": "DUMMY_ID", } expected_query_id = "e39b0d45bc6b46b7700c67cd52f00455" reply = send_zmq_message_and_receive_reply(msg, port=zmq_port, host=zmq_host) # assert reply["status"] in ("executing", "queued", "completed") assert reply["status"] in ("success") # # Wait until the query has finished. # poll_until_done(zmq_port, expected_query_id) # # Get query result. # msg = { "action": "get_sql_for_query_result", "params": {"query_id": expected_query_id}, "request_id": "DUMMY_ID", } reply = send_zmq_message_and_receive_reply(msg, port=zmq_port, host=zmq_host) assert "success" == reply["status"] assert f"SELECT * FROM cache.x{expected_query_id}" == reply["payload"]["sql"]
async def test_get_available_dates(zmq_port, zmq_host): """ action 'get_available_dates' against an existing query_id returns the expected parameters with which the query was run. """ msg = {"action": "get_available_dates", "request_id": "DUMMY_ID"} reply = send_zmq_message_and_receive_reply(msg, port=zmq_port, host=zmq_host) assert reply["status"] == "success" expected_reply = { "status": "success", "msg": "", "payload": { "calls": [ "2016-01-01", "2016-01-02", "2016-01-03", "2016-01-04", "2016-01-05", "2016-01-06", "2016-01-07", ], "mds": [ "2016-01-01", "2016-01-02", "2016-01-03", "2016-01-04", "2016-01-05", "2016-01-06", "2016-01-07", ], "topups": [ "2016-01-01", "2016-01-02", "2016-01-03", "2016-01-04", "2016-01-05", "2016-01-06", "2016-01-07", ], "sms": [ "2016-01-01", "2016-01-02", "2016-01-03", "2016-01-04", "2016-01-05", "2016-01-06", "2016-01-07", ], "forwards": [], }, } assert expected_reply == reply
def test_api_spec_of_flowmachine_query_schemas(zmq_host, zmq_port, diff_reporter): """ Verify the API spec for flowmachine queries. """ msg = {"action": "get_query_schemas", "request_id": "DUMMY_ID"} reply = send_zmq_message_and_receive_reply(msg, port=zmq_port, host=zmq_host) print(reply) assert "success" == reply["status"] spec_as_json_string = json.dumps( sort_recursively(reply["payload"]["query_schemas"]), indent=2 ) diff_reporter(spec_as_json_string)
def test_poll_query_with_nonexistent_query_id_fails(zmq_port, zmq_host): """ Polling a query with non-existent query id returns expected error. """ msg = { "action": "poll_query", "params": {"query_id": "FOOBAR"}, "request_id": "DUMMY_ID", } reply = send_zmq_message_and_receive_reply(msg, port=zmq_port, host=zmq_host) assert { "status": "error", "payload": {"query_id": "FOOBAR", "query_state": "awol"}, "msg": "Unknown query id: 'FOOBAR'", } == reply
def test_unknown_action_returns_error(zmq_host, zmq_port): """ Unknown action returns an error response. """ msg = {"action": "foobar", "request_id": "DUMMY_ID"} reply = send_zmq_message_and_receive_reply(msg, port=zmq_port, host=zmq_host) expected_reply = { "status": "error", "msg": "Invalid action request.", "payload": { "action": [ "Must be one of: ping, get_available_queries, get_query_schemas, run_query, poll_query, get_query_kind, get_query_params, get_sql_for_query_result, get_geo_sql_for_query_result, get_geography, get_available_dates." ] }, } assert expected_reply == reply
async def test_invalid_event_types(event_types, zmq_port, zmq_host): """ Action 'get_available_dates' returns an error if invalid event types are passed. """ msg = { "action": "get_available_dates", "request_id": "DUMMY_ID", "params": { "event_types": event_types }, } reply = send_zmq_message_and_receive_reply(msg, port=zmq_port, host=zmq_host) assert reply["status"] == "error" assert reply["msg"] == "Invalid value for argument `event_types`."
def test_send_zmq_message_and_receive_reply(zmq_host, zmq_port): """ Reply from the flowmachine server to the example message stored in `FM_EXAMPLE_MESSAGE` is as expected. """ # Check that FM_EXAMPLE_MESSAGE contains the expected message msg_expected = { "action": "run_query", "params": { "query_kind": "spatial_aggregate", "locations": { "query_kind": "daily_location", "date": "2016-01-01", "method": "last", "aggregation_unit": "admin3", "subscriber_subset": None, }, }, "request_id": "DUMMY_ID", } assert msg_expected == FM_EXAMPLE_MESSAGE q = RedactedSpatialAggregate(spatial_aggregate=SpatialAggregate( locations=daily_location( date="2016-01-01", method="last", spatial_unit=make_spatial_unit("admin", level=3), table=None, subscriber_subset=None, ))) expected_query_id = q.query_id # Check that the flowmachine server sends the expected reply reply = send_zmq_message_and_receive_reply(FM_EXAMPLE_MESSAGE, host=zmq_host, port=zmq_port) assert expected_query_id == reply["payload"]["query_id"] # assert reply["status"] in ("executing", "queued", "completed") assert reply["status"] in ("success") # FIXME: At the moment we have to explicitly wait for all running queries # to finish before finishing the test, otherwise unexpected behaviour may # occur when we reset the cache before the next test # (see https://github.com/Flowminder/FlowKit/issues/1245). poll_until_done(zmq_port, expected_query_id)
async def test_run_query_with_wrong_parameters(params, expected_error_messages, zmq_port, zmq_host): """ Run daily_location query and check that the resulting table contains the expected rows. """ msg_run_query = { "action": "run_query", "params": params, "request_id": "DUMMY_ID" } reply = send_zmq_message_and_receive_reply(msg_run_query, port=zmq_port, host=zmq_host) # expected_reason = f"Error when constructing query of kind daily_location with parameters {params}: '{expected_error_msg}'" # expected_reason = "Message contains unexpected key(s): ['query_kind'], 'data': {}" assert "error" == reply["status"] assert expected_error_messages == reply["payload"]
def test_run_daily_location_query(zmq_host, zmq_port): """ Can run daily location query and receive successful response including the query_id. """ msg = { "action": "run_query", "params": { "query_kind": "spatial_aggregate", "locations": { "query_kind": "daily_location", "date": "2016-01-01", "method": "most-common", "aggregation_unit": "admin3", "subscriber_subset": None, }, }, "request_id": "DUMMY_ID", } reply = send_zmq_message_and_receive_reply(msg, port=zmq_port, host=zmq_host) q = RedactedSpatialAggregate( spatial_aggregate=SpatialAggregate( locations=daily_location( date="2016-01-01", method="most-common", spatial_unit=make_spatial_unit("admin", level=3), table=None, subscriber_subset=None, hours=None, ) ) ) expected_query_id = q.query_id assert "success" == reply["status"] assert expected_query_id == reply["payload"]["query_id"] assert ["query_id", "progress"] == list(reply["payload"].keys()) # FIXME: At the moment we have to explicitly wait for all running queries # to finish before finishing the test, otherwise unexpected behaviour may # occur when we reset the cache before the next test # (see https://github.com/Flowminder/FlowKit/issues/1245). poll_until_done(zmq_port, expected_query_id)
async def test_wrongly_formatted_zmq_message(zmq_port, zmq_host): """ """ msg = { "foo": "bar", "params": { "query_kind": "daily_location", "date": "2016-01-01", "method": "last", "aggregation_unit": "admin3", "subscriber_subset": None, }, "request_id": "DUMMY_ID", } reply = send_zmq_message_and_receive_reply(msg, port=zmq_port, host=zmq_host) assert False
def poll_until_done(port, query_id, max_tries=100): """ Send zmq message to flowmachine on port `port` which polls the query with id `query_id` until the return status is "completed". """ host = "localhost" msg = { "action": "poll_query", "params": { "query_id": query_id }, "request_id": "DUMMY_ID", } for i in itertools.count(): if i > max_tries: raise RuntimeError( "Timeout reached but query is not done. Aborting.") print(f"[DDD] Polling query {query_id}...") reply = send_zmq_message_and_receive_reply(msg, port=port, host=host) if "completed" == reply["payload"]["query_state"]: break time.sleep(0.1)
def test_cache_content( start_flowmachine_server_with_or_without_dependency_caching, fm_conn, redis): """ Run a query with dependency caching turned on, and check that its dependencies are cached. Run a query with dependency caching turned off, and check that only the query itself is cached. """ # Can't use the zmq_port fixture here as we're running against a different FlowMachine server zmq_port = os.getenv("FLOWMACHINE_PORT") msg_run_query = { "action": "run_query", "params": { "query_kind": "spatial_aggregate", "locations": { "query_kind": "daily_location", "date": "2016-01-01", "method": "last", "aggregation_unit": "admin3", "subscriber_subset": None, }, }, "request_id": "DUMMY_ID", } q = RedactedSpatialAggregate(spatial_aggregate=SpatialAggregate( locations=daily_location( date="2016-01-01", method="last", spatial_unit=make_spatial_unit("admin", level=3), table=None, subscriber_subset=None, ))) # Get list of tables that should be cached expected_cache_tables = [q.table_name] if "false" == os.getenv("FLOWMACHINE_SERVER_DISABLE_DEPENDENCY_CACHING"): dependencies = unstored_dependencies_graph(q) for node, query_obj in dependencies.nodes(data="query_object"): try: schema, table_name = query_obj.fully_qualified_table_name.split( ".") if schema == "cache": expected_cache_tables.append(table_name) except NotImplementedError: # Some queries cannot be cached, and don't have table names pass # Check that we are starting with an empty cache. assert cache_schema_is_empty(get_db(), check_internal_tables_are_empty=False) # Send message to run the daily_location query, and check it was accepted reply = send_zmq_message_and_receive_reply(msg_run_query, port=zmq_port, host="localhost") assert reply["status"] == "success" query_id = reply["payload"]["query_id"] # Wait until the query has finished. poll_until_done(zmq_port, query_id) # Check that the cache contains the correct tables. assert sorted(expected_cache_tables) == get_cache_tables(get_db())
async def test_run_query(zmq_port, zmq_host, fm_conn, redis): """ Run daily_location query and check the resulting table contains the expected rows. """ msg_run_query = { "action": "run_query", "params": { "query_kind": "spatial_aggregate", "locations": { "query_kind": "daily_location", "date": "2016-01-01", "method": "last", "aggregation_unit": "admin3", "subscriber_subset": None, }, }, "request_id": "DUMMY_ID", } q = SpatialAggregate(locations=daily_location( date="2016-01-01", method="last", spatial_unit=make_spatial_unit("admin", level=3), subscriber_subset=None, )) expected_query_id = q.md5 # # Check that we are starting with a clean slate (no cache tables, empty redis). # reset_cache(fm_conn, redis, protect_table_objects=False) assert cache_schema_is_empty(fm_conn) assert not redis.exists(expected_query_id) # # Send message to run the daily_location query, check it was accepted # and a redis lookup was created for the query id. # reply = send_zmq_message_and_receive_reply(msg_run_query, port=zmq_port, host=zmq_host) # assert reply["status"] in ("executing", "queued", "completed") assert reply["status"] in ("success") assert expected_query_id == reply["payload"]["query_id"] # assert redis.exists(expected_query_id) # # Wait until the query has finished. # poll_until_done(zmq_port, expected_query_id) # # Check that a cache table for the query result was created # and that it contains the expected number of rows. # output_cache_table = f"x{expected_query_id}" assert [output_cache_table] == get_cache_tables(fm_conn) num_rows = fm_conn.engine.execute( f"SELECT COUNT(*) FROM cache.{output_cache_table}").fetchone()[0] assert num_rows == 25 # # In addition, check first few rows of the result are as expected. # first_few_rows_expected = [ ("524 1 01 04", 13), ("524 1 02 09", 26), ("524 1 03 13", 20), ] first_few_rows = fm_conn.engine.execute( f"SELECT * FROM cache.{output_cache_table} ORDER BY pcod LIMIT 3" ).fetchall() assert first_few_rows_expected == first_few_rows