Beispiel #1
0
def test_unstored_dependencies_graph():
    """
    Test that unstored_dependencies_graph returns the correct graph in an example case.
    """
    # Create dummy queries with dependency structure
    #
    #           5:unstored
    #            /       \
    #       3:stored    4:unstored
    #      /       \     /
    # 1:unstored   2:unstored
    #
    # Note: we add a string parameter to each query so that they have different query IDs
    dummy1 = DummyQuery(dummy_param=["dummy1"])
    dummy2 = DummyQuery(dummy_param=["dummy2"])
    dummy3 = DummyQuery(dummy_param=["dummy3", dummy1, dummy2])
    dummy4 = DummyQuery(dummy_param=["dummy4", dummy2])
    dummy5 = DummyQuery(dummy_param=["dummy5", dummy3, dummy4])
    dummy3.store()

    expected_query_nodes = [dummy2, dummy4]
    graph = unstored_dependencies_graph(dummy5)
    assert not any(dict(graph.nodes(data="stored")).values())
    assert len(graph) == len(expected_query_nodes)
    for query in expected_query_nodes:
        assert f"x{query.query_id}" in graph.nodes()
        assert (graph.nodes[f"x{query.query_id}"]["query_object"].query_id ==
                query.query_id)
Beispiel #2
0
def test_unstored_dependencies_graph_for_stored_query():
    """
    Test that the unstored dependencies graph for a stored query is empty.
    """
    dummy1 = DummyQuery(dummy_param=["dummy1"])
    dummy2 = DummyQuery(dummy_param=["dummy2"])
    dummy3 = DummyQuery(dummy_param=["dummy3", dummy1, dummy2])
    dummy3.store()

    graph = unstored_dependencies_graph(dummy3)
    assert len(graph) == 0
Beispiel #3
0
    def to_sql(
        self,
        name: str,
        schema: Union[str, None] = None,
        store_dependencies: bool = False,
    ) -> Future:
        """
        Store the result of the calculation back into the database.

        Parameters
        ----------
        name : str
            name of the table
        schema : str, default None
            Name of an existing schema. If none will use the postgres default,
            see postgres docs for more info.
        store_dependencies : bool, default False
            If True, store the dependencies of this query.

        Returns
        -------
        Future
            Future object, containing this query and any result information.

        Notes
        -----

        This method will return a Future immediately.
        """
        if len(name) > MAX_POSTGRES_NAME_LENGTH:
            err_msg = (
                "The table name {} is too long ({}) chars. Postgres allows only table names"
                " of length {}").format(name, len(name),
                                        MAX_POSTGRES_NAME_LENGTH)
            raise NameTooLongError(err_msg)

        def write_query(query_ddl_ops: List[str], connection: Engine) -> float:
            plan_time = 0
            ddl_op_results = []
            for ddl_op in query_ddl_ops:
                try:
                    ddl_op_result = connection.execute(ddl_op)
                except Exception as e:
                    logger.error(
                        f"Error executing SQL: '{ddl_op}'. Error was {e}")
                    raise e
                try:
                    ddl_op_results.append(ddl_op_result.fetchall())
                except ResourceClosedError:
                    pass  # Nothing to do here
                for ddl_op_result in ddl_op_results:
                    try:
                        plan = ddl_op_result[0][0][0]  # Should be a query plan
                        plan_time += plan["Execution Time"]
                    except (IndexError, KeyError):
                        pass  # Not an explain result
            logger.debug("Executed queries.")
            return plan_time

        if store_dependencies:
            store_queries_in_order(unstored_dependencies_graph(
                self))  # Need to ensure we're behind our deps in the queue

        ddl_ops_func = self._make_sql

        current_state, changed_to_queue = QueryStateMachine(
            get_redis(), self.query_id,
            get_db().conn_id).enqueue()
        logger.debug(
            f"Attempted to enqueue query '{self.query_id}', query state is now {current_state} and change happened {'here and now' if changed_to_queue else 'elsewhere'}."
        )
        # name, redis, query, connection, ddl_ops_func, write_func, schema = None, sleep_duration = 1
        store_future = submit_to_executor(
            write_query_to_cache,
            name=name,
            schema=schema,
            query=self,
            connection=get_db(),
            redis=get_redis(),
            ddl_ops_func=ddl_ops_func,
            write_func=write_query,
        )
        return store_future
def test_cache_content(
        start_flowmachine_server_with_or_without_dependency_caching, fm_conn,
        redis):
    """
    Run a query with dependency caching turned on, and check that its dependencies are cached.
    Run a query with dependency caching turned off, and check that only the query itself is cached.
    """
    # Can't use the zmq_port fixture here as we're running against a different FlowMachine server
    zmq_port = os.getenv("FLOWMACHINE_PORT")

    msg_run_query = {
        "action": "run_query",
        "params": {
            "query_kind": "spatial_aggregate",
            "locations": {
                "query_kind": "daily_location",
                "date": "2016-01-01",
                "method": "last",
                "aggregation_unit": "admin3",
                "subscriber_subset": None,
            },
        },
        "request_id": "DUMMY_ID",
    }
    q = RedactedSpatialAggregate(spatial_aggregate=SpatialAggregate(
        locations=daily_location(
            date="2016-01-01",
            method="last",
            spatial_unit=make_spatial_unit("admin", level=3),
            table=None,
            subscriber_subset=None,
        )))

    # Get list of tables that should be cached
    expected_cache_tables = [q.table_name]
    if "false" == os.getenv("FLOWMACHINE_SERVER_DISABLE_DEPENDENCY_CACHING"):
        dependencies = unstored_dependencies_graph(q)
        for node, query_obj in dependencies.nodes(data="query_object"):
            try:
                schema, table_name = query_obj.fully_qualified_table_name.split(
                    ".")
                if schema == "cache":
                    expected_cache_tables.append(table_name)
            except NotImplementedError:
                # Some queries cannot be cached, and don't have table names
                pass

    # Check that we are starting with an empty cache.
    assert cache_schema_is_empty(get_db(),
                                 check_internal_tables_are_empty=False)

    # Send message to run the daily_location query, and check it was accepted
    reply = send_zmq_message_and_receive_reply(msg_run_query,
                                               port=zmq_port,
                                               host="localhost")
    assert reply["status"] == "success"
    query_id = reply["payload"]["query_id"]

    # Wait until the query has finished.
    poll_until_done(zmq_port, query_id)

    # Check that the cache contains the correct tables.
    assert sorted(expected_cache_tables) == get_cache_tables(get_db())