Ejemplo n.º 1
0
async def action_handler__get_sql(config: "FlowmachineServerConfig",
                                  query_id: str) -> ZMQReply:
    """
    Handler for the 'get_sql' action.

    Returns a SQL string which can be run against flowdb to obtain
    the result of the query with given `query_id`.
    """
    # TODO: currently we can't use QueryStateMachine to determine whether
    # the query_id belongs to a valid query object, so we need to check it
    # manually. Would be good to add a QueryState.UNKNOWN so that we can
    # avoid this separate treatment.
    q_info_lookup = QueryInfoLookup(get_redis())
    if not q_info_lookup.query_is_known(query_id):
        msg = f"Unknown query id: '{query_id}'"
        payload = {"query_id": query_id, "query_state": "awol"}
        return ZMQReply(status="error", msg=msg, payload=payload)

    query_state = QueryStateMachine(get_redis(), query_id,
                                    get_db().conn_id).current_query_state

    if query_state == QueryState.COMPLETED:
        q = get_query_object_by_id(get_db(), query_id)
        sql = q.get_query()
        payload = {
            "query_id": query_id,
            "query_state": query_state,
            "sql": sql
        }
        return ZMQReply(status="success", payload=payload)
    else:
        msg = f"Query with id '{query_id}' {query_state.description}."
        payload = {"query_id": query_id, "query_state": query_state}
        return ZMQReply(status="error", msg=msg, payload=payload)
Ejemplo n.º 2
0
async def action_handler__poll_query(config: "FlowmachineServerConfig",
                                     query_id: str) -> ZMQReply:
    """
    Handler for the 'poll_query' action.

    Returns the status of the query with the given `query_id`.
    """
    query_kind = _get_query_kind_for_query_id(query_id)
    # TODO: we should probably be able to use the QueryStateMachine to determine
    # whether the query already exists.
    if query_kind is None:
        payload = {"query_id": query_id, "query_state": "awol"}
        return ZMQReply(status="error",
                        msg=f"Unknown query id: '{query_id}'",
                        payload=payload)
    else:
        q_state_machine = QueryStateMachine(get_redis(), query_id,
                                            get_db().conn_id)
        payload = {
            "query_id":
            query_id,
            "query_kind":
            query_kind,
            "query_state":
            q_state_machine.current_query_state,
            "progress":
            query_progress(FlowmachineQuerySchema().load(
                QueryInfoLookup(get_redis()).get_query_params(
                    query_id))._flowmachine_query_obj),
        }
        return ZMQReply(status="success", payload=payload)
Ejemplo n.º 3
0
def real_connections(flowmachine_connect):
    with connections():
        try:
            yield
        finally:
            reset_cache(get_db(), get_redis(), protect_table_objects=False)
            get_db().engine.dispose()  # Close the connection
            get_redis().flushdb()  # Empty the redis
Ejemplo n.º 4
0
def reset_flowdb_and_redis(fm_conn):
    """
    Reset flowdb into a pristine state (by resetting the cache schema)
    and delete any existing keys from redis.

    This fixture is automatically run before every test so that each
    test has a clean database to work with.
    """
    print("[DDD] Resetting flowdb and redis into a pristine state")
    reset_cache_schema(get_db(), redis_instance=get_redis())
    delete_all_redis_keys(redis_instance=get_redis())
Ejemplo n.º 5
0
def test_cache_reset(flowmachine_connect):
    """
    Test that cache and redis are both reset.
    """
    stored_query = daily_location("2016-01-01").store().result()
    assert (QueryStateMachine(
        get_redis(), stored_query.query_id,
        get_db().conn_id).current_query_state == QueryState.COMPLETED)
    assert stored_query.is_stored
    reset_cache(get_db(), get_redis())
    assert (QueryStateMachine(
        get_redis(), stored_query.query_id,
        get_db().conn_id).current_query_state == QueryState.KNOWN)
    assert not stored_query.is_stored
Ejemplo n.º 6
0
async def test_query_run_logged(json_log, server_config):
    # Local import so pytest can capture stdout
    logger = getLogger("flowmachine.query_run_log")
    logger.handlers[0].stream = sys.stdout  # Reset log stream for capsys
    msg_contents = {
        "action": "run_query",
        "request_id": "DUMMY_API_REQUEST_ID",
        "params": {
            "query_kind": "dummy_query",
            "dummy_param": "DUMMY"
        },
    }
    set_log_level(
        "flowmachine.debug",
        "ERROR")  # Logging of query runs should be independent of other logs
    get_redis().get.return_value = (
        b"known"  # Mock enough redis to get to the log messages
    )
    reply = await get_reply_for_message(msg_str=json.dumps(msg_contents),
                                        config=server_config)

    log_lines = json_log()
    print(reply)
    log_lines = log_lines.out
    assert log_lines[0]["action_request"][
        "request_id"] == "DUMMY_API_REQUEST_ID"
    assert log_lines[0]["action_request"]["action"] == "run_query"
    assert log_lines[0]["logger"] == "flowmachine.query_run_log"
Ejemplo n.º 7
0
    def get_query(self):
        """
        Returns a  string representing an SQL query. The string will point
        to the database cache of this query if it exists.

        Returns
        -------
        str
            SQL query string.

        """
        try:
            table_name = self.fully_qualified_table_name
            schema, name = table_name.split(".")
            state_machine = QueryStateMachine(get_redis(), self.query_id,
                                              get_db().conn_id)
            state_machine.wait_until_complete()
            if state_machine.is_completed and get_db().has_table(schema=schema,
                                                                 name=name):
                try:
                    touch_cache(get_db(), self.query_id)
                except ValueError:
                    pass  # Cache record not written yet, which can happen for Models
                    # which will call through to this method from their `_make_query` method while writing metadata.
                # In that scenario, the table _is_ written, but won't be visible from the connection touch_cache uses
                # as the cache metadata transaction isn't complete!
                return "SELECT * FROM {}".format(table_name)
        except NotImplementedError:
            pass
        return self._make_query()
Ejemplo n.º 8
0
def start_flowmachine_server_with_or_without_dependency_caching(
    request, logging_config, monkeypatch
):
    """
    Starts a FlowMachine server in a separate process, with function scope
    (i.e. a server will be started and stopped for each test that uses this fixture).
    Tests using this fixture will run twice: once with dependency caching disabled,
    and again with dependency caching enabled.
    """

    # Ensure this server runs on a different port from the session-scoped server
    main_zmq_port = os.getenv("FLOWMACHINE_PORT", "5555")
    monkeypatch.setenv("FLOWMACHINE_PORT", str(int(main_zmq_port) + 1))
    # Turn dependency caching on or off
    monkeypatch.setenv("FLOWMACHINE_SERVER_DISABLE_DEPENDENCY_CACHING", request.param)
    # Start the server
    fm_thread = Process(target=flowmachine.core.server.server.main)
    fm_thread.start()

    # Create a new flowmachine connection, because we can't use the old one after starting a new process.
    new_conn = make_flowmachine_connection_object()
    with flowmachine.core.context.context(new_conn, get_executor(), get_redis()):
        yield

    new_conn.close()

    fm_thread.terminate()
    sleep(2)  # Wait a moment to make sure coverage of subprocess finishes being written
Ejemplo n.º 9
0
def test_blocks_on_store_cascades():
    """
    If a store is running on a query that is used
    in a another query, that query should wait.
    """
    dl = daily_location("2016-01-01", spatial_unit=make_spatial_unit("cell"))
    dl2 = daily_location("2016-01-02", spatial_unit=make_spatial_unit("cell"))
    store_future = dl.store()
    store_future.result()
    hl = ModalLocation(dl, dl2)
    timer = []

    def unlock(timer, redis, db_id):
        qsm = QueryStateMachine(redis, dl.query_id, db_id)
        qsm.enqueue()
        for i in range(101):
            timer.append(i)
        qsm.execute()
        qsm.finish()

    timeout = Thread(target=unlock,
                     args=(timer, get_redis(), get_db().conn_id))
    timeout.start()
    hl.get_query()
    assert len(timer) == 101
    timeout.join()
Ejemplo n.º 10
0
async def test_rerun_query_after_cancelled(server_config, real_connections):
    """
    Test that a query can be rerun after it has been cancelled.
    """
    query_obj = (FlowmachineQuerySchema().load(
        dict(
            query_kind="spatial_aggregate",
            locations=dict(
                query_kind="daily_location",
                date="2016-01-01",
                method="last",
                aggregation_unit="admin3",
            ),
        ))._flowmachine_query_obj)
    query_id = query_obj.query_id
    qsm = QueryStateMachine(get_redis(), query_id, get_db().conn_id)
    qsm.enqueue()
    qsm.cancel()
    assert not query_obj.is_stored
    assert qsm.is_cancelled
    query_info_lookup = QueryInfoLookup(get_redis())
    query_info_lookup.register_query(
        query_id,
        dict(
            query_kind="spatial_aggregate",
            locations=dict(
                query_kind="daily_location",
                date="2016-01-01",
                method="last",
                aggregation_unit="admin3",
            ),
        ),
    )

    msg = await action_handler__run_query(
        config=server_config,
        query_kind="spatial_aggregate",
        locations=dict(
            query_kind="daily_location",
            date="2016-01-01",
            method="last",
            aggregation_unit="admin3",
        ),
    )
    assert msg["status"] == ZMQReplyStatus.SUCCESS
    qsm.wait_until_complete()
    assert query_obj.is_stored
Ejemplo n.º 11
0
async def test_get_query_bad_id(server_config):
    """
    Get sql handler should send back an error status for a nonexistent id
    """
    get_redis().get.return_value = None
    msg = await action_handler__get_query_params(config=server_config,
                                                 query_id="DUMMY_ID")
    assert msg.status == ZMQReplyStatus.ERROR
Ejemplo n.º 12
0
def redis():
    """
    Return redis instance to use when running the tests.

    Currently this is hardcoded to get_redis() but this
    fixture avoids hard-coding it in all our tests.
    """
    return get_redis()
Ejemplo n.º 13
0
 def write_model_result(query_ddl_ops: List[str],
                        connection: Engine) -> float:
     if store_dependencies:
         store_all_unstored_dependencies(self)
     self._df.to_sql(name, connection, schema=schema, index=False)
     QueryStateMachine(get_redis(), self.query_id,
                       get_db().conn_id).finish()
     return self._runtime
Ejemplo n.º 14
0
def test_drop_query_errors():
    """Test that resetting a query's cache will error if in a state where that isn't possible."""
    q = DummyQuery(dummy_id=1, sleep_time=5)
    qsm = QueryStateMachine(get_redis(), q.query_id, get_db().conn_id)
    # Mark the query as in the process of resetting
    qsm.enqueue()
    qsm.execute()
    with pytest.raises(QueryResetFailedException):
        q.invalidate_db_cache()
Ejemplo n.º 15
0
def test_cache_reset_protects_tables(flowmachine_connect):
    """
    Resetting the cache should preserve Table entries.
    """
    # Regression test for https://github.com/Flowminder/FlowKit/issues/832
    dl_query = daily_location(date="2016-01-03", method="last")
    reset_cache(get_db(), get_redis())
    for dep in dl_query._get_stored_dependencies():
        assert dep.query_id in [x.query_id for x in Query.get_stored()]
    dl_query.store().result()  # Original bug caused this to error
Ejemplo n.º 16
0
def test_store_exceptions(fail_event, expected_exception):
    """Test that exceptions are raised when watching a store op triggered elsewhere."""
    q = DummyQuery(dummy_id=1, sleep_time=5)
    qsm = QueryStateMachine(get_redis(), q.query_id, get_db().conn_id)
    # Mark the query as having begun executing elsewhere
    qsm.enqueue()
    qsm.execute()
    q_fut = q.store()
    qsm.trigger_event(fail_event)
    with pytest.raises(expected_exception):
        raise q_fut.exception()
Ejemplo n.º 17
0
def test_redis_resync_runtimeerror(flowmachine_connect, dummy_redis):
    """
    Test that a runtime error is raised if redis is being updated from multiple places when trying to resync.
    """
    stored_query = daily_location("2016-01-01").store().result()
    assert (QueryStateMachine(
        get_redis(), stored_query.query_id,
        get_db().conn_id).current_query_state == QueryState.COMPLETED)
    dummy_redis.allow_flush = False
    with pytest.raises(RuntimeError):
        resync_redis_with_cache(get_db(), dummy_redis)
Ejemplo n.º 18
0
    def query_state(self) -> "QueryState":
        """
        Return the current query state.

        Returns
        -------
        flowmachine.core.query_state.QueryState
            The current query state
        """
        state_machine = QueryStateMachine(get_redis(), self.query_id,
                                          get_db().conn_id)
        return state_machine.current_query_state
Ejemplo n.º 19
0
async def action_handler__get_geo_sql(config: "FlowmachineServerConfig",
                                      query_id: str) -> ZMQReply:
    """
    Handler for the 'get_sql' action.

    Returns a SQL string which can be run against flowdb to obtain
    the result of the query with given `query_id`.
    """
    # TODO: currently we can't use QueryStateMachine to determine whether
    # the query_id belongs to a valid query object, so we need to check it
    # manually. Would be good to add a QueryState.UNKNOWN so that we can
    # avoid this separate treatment.
    q_info_lookup = QueryInfoLookup(get_redis())
    if not q_info_lookup.query_is_known(query_id):
        msg = f"Unknown query id: '{query_id}'"
        payload = {"query_id": query_id, "query_state": "awol"}
        return ZMQReply(status="error", msg=msg, payload=payload)

    query_state = QueryStateMachine(get_redis(), query_id,
                                    get_db().conn_id).current_query_state

    if query_state == QueryState.COMPLETED:
        q = get_query_object_by_id(get_db(), query_id)
        try:
            sql = q.geojson_query()
            payload = {
                "query_id": query_id,
                "query_state": query_state,
                "sql": sql,
                "aggregation_unit": q.spatial_unit.canonical_name,
            }
            return ZMQReply(status="success", payload=payload)
        except AttributeError:
            msg = f"Query with id '{query_id}' has no geojson compatible representation."  # TODO: This codepath is untested because all queries right now have geography
            payload = {"query_id": query_id, "query_state": "errored"}
            return ZMQReply(status="error", msg=msg, payload=payload)
    else:
        msg = f"Query with id '{query_id}' {query_state.description}."
        payload = {"query_id": query_id, "query_state": query_state}
        return ZMQReply(status="error", msg=msg, payload=payload)
Ejemplo n.º 20
0
def test_redis_resync(flowmachine_connect):
    """
    Test that redis states can be resynced to the flowdb cache.
    """
    stored_query = daily_location("2016-01-01").store().result()
    assert (
        QueryStateMachine(
            get_redis(), stored_query.query_id, get_db().conn_id
        ).current_query_state
        == QueryState.COMPLETED
    )
    assert stored_query.is_stored
    get_redis().flushdb()
    assert stored_query.is_stored
    assert (
        QueryStateMachine(
            get_redis(), stored_query.query_id, get_db().conn_id
        ).current_query_state
        == QueryState.KNOWN
    )
    resync_redis_with_cache(get_db(), get_redis())
    assert (
        QueryStateMachine(
            get_redis(), stored_query.query_id, get_db().conn_id
        ).current_query_state
        == QueryState.COMPLETED
    )
Ejemplo n.º 21
0
def unstored_dependencies_graph(query_obj: "Query") -> nx.DiGraph:
    """
    Produce a dependency graph of the unstored queries on which this query depends.

    Parameters
    ----------
    query_obj : Query
        Query object to produce a dependency graph for.

    Returns
    -------
    networkx.DiGraph

    Notes
    -----
    If store() or invalidate_db_cache() is called on any query while this
    function is executing, the resulting graph may not be correct.
    The queries listed as dependencies are not _guaranteed_ to be
    used in the actual running of a query, only to be referenced by it.
    """
    deps = []

    if not query_obj.is_stored:
        openlist = list(
            zip([query_obj] * len(query_obj.dependencies),
                query_obj.dependencies))

        while openlist:
            y, x = openlist.pop()
            if y is query_obj:
                # We don't want to include this query in the graph, only its dependencies.
                y = None
            # Wait for query to complete before checking whether it's stored.
            q_state_machine = QueryStateMachine(get_redis(), x.query_id,
                                                get_db().conn_id)
            q_state_machine.wait_until_complete()
            if not x.is_stored:
                deps.append((y, x))
                openlist += list(zip([x] * len(x.dependencies),
                                     x.dependencies))

    def get_node_attrs(q):
        attrs = {}
        attrs["query_object"] = q
        attrs["name"] = q.__class__.__name__
        attrs["stored"] = False
        attrs["shape"] = "rect"
        attrs["label"] = f"{attrs['name']}."
        return attrs

    return _assemble_dependency_graph(dependencies=deps,
                                      attrs_func=get_node_attrs)
Ejemplo n.º 22
0
def test_drop_query_blocks(monkeypatch):
    """Test that resetting a query's cache will block if that's already happening."""
    monkeypatch.setattr(flowmachine.core.query, "_sleep",
                        Mock(side_effect=BlockingIOError))
    q = DummyQuery(dummy_id=1, sleep_time=5)
    qsm = QueryStateMachine(get_redis(), q.query_id, get_db().conn_id)
    # Mark the query as in the process of resetting
    qsm.enqueue()
    qsm.execute()
    qsm.finish()
    qsm.reset()
    with pytest.raises(BlockingIOError):
        q.invalidate_db_cache()
Ejemplo n.º 23
0
    def __call__(self, value) -> Union[None, str]:
        from flowmachine.core.server.query_schemas import FlowmachineQuerySchema

        if (value is not None) and (value is not missing):
            try:
                (FlowmachineQuerySchema().load(
                    QueryInfoLookup(get_redis()).get_query_params(
                        value))._flowmachine_query_obj)
            except UnkownQueryIdError:
                if not cache_table_exists(get_db(), value):
                    raise ValidationError("Must be None or a valid query id.")

        return value
Ejemplo n.º 24
0
async def action_handler__get_query_params(config: "FlowmachineServerConfig",
                                           query_id: str) -> ZMQReply:
    """
    Handler for the 'get_query_params' action.

    Returns query parameters of the query with the given `query_id`.
    """
    q_info_lookup = QueryInfoLookup(get_redis())
    try:
        query_params = q_info_lookup.get_query_params(query_id)
    except UnkownQueryIdError:
        payload = {"query_id": query_id, "query_state": "awol"}
        return ZMQReply(status="error",
                        msg=f"Unknown query id: '{query_id}'",
                        payload=payload)

    payload = {"query_id": query_id, "query_params": query_params}
    return ZMQReply(status="success", payload=payload)
Ejemplo n.º 25
0
    def deserialize(
        self,
        value: typing.Any,
        attr: str = None,
        data: typing.Mapping[str, typing.Any] = None,
        **kwargs,
    ) -> Union[None, Table]:
        from flowmachine.core.server.query_schemas import FlowmachineQuerySchema

        table_name = super().deserialize(value, attr, data, **kwargs)
        if (table_name is missing) or (table_name is None):
            return table_name
        else:
            try:
                return (FlowmachineQuerySchema().load(
                    QueryInfoLookup(get_redis()).get_query_params(
                        value))._flowmachine_query_obj)
            except UnkownQueryIdError:
                return get_query_object_by_id(get_db(), value)
Ejemplo n.º 26
0
def test_cache_metadata_write_error(flowmachine_connect, dummy_redis,
                                    monkeypatch):
    """
    Test that errors during cache metadata writing leave the query state machine in error state.
    """
    # Regression test for https://github.com/Flowminder/FlowKit/issues/833

    writer_mock = Mock(side_effect=TestException)
    dl_query = daily_location(date="2016-01-03", method="last")
    assert not dl_query.is_stored
    monkeypatch.setattr("flowmachine.core.cache.write_cache_metadata",
                        writer_mock)

    store_future = dl_query.store()
    with pytest.raises(TestException):
        store_future.result()
    assert not dl_query.is_stored
    assert (QueryStateMachine(
        get_redis(), dl_query.query_id,
        get_db().conn_id).current_query_state == QueryState.ERRORED)
Ejemplo n.º 27
0
def _get_query_kind_for_query_id(query_id: str) -> Union[None, str]:
    """
    Helper function to look up the query kind corresponding to the
    given query id. Returns `None` if the query_id does not exist.

    Parameters
    ----------
    query_id : str
        Identifier of the query.

    Returns
    -------
    str or None
        The query kind associated with this query_id (or None
        if no query with this query_id exists).
    """
    q_info_lookup = QueryInfoLookup(get_redis())
    try:
        return q_info_lookup.get_query_kind(query_id)
    except UnkownQueryIdError:
        return None
Ejemplo n.º 28
0
def test_get_query_blocks_on_store():
    """
    If a store is running get_query should block.
    """
    dl = daily_location("2016-01-01", spatial_unit=make_spatial_unit("cell"))
    dl.store().result()
    timer = []

    def unlock(timer, redis, db_id):
        qsm = QueryStateMachine(redis, dl.query_id, db_id)
        qsm.enqueue()
        for i in range(101):
            timer.append(i)
        qsm.execute()
        qsm.finish()

    timeout = Thread(target=unlock,
                     args=(timer, get_redis(), get_db().conn_id))
    timeout.start()
    dl.get_query()
    assert len(timer) == 101
    timeout.join()
Ejemplo n.º 29
0
def mocked_connections(monkeypatch):
    """
    Fixture which mocks out the setup methods for logger,
    connection, redis and threadpool and yields the mocks.

    Parameters
    ----------
    monkeypatch

    Yields
    ------
    tuple of mocks
        Mocks for init_logging, Connection, StrictRedis and _start_threadpool

    """

    logging_mock = Mock()
    connection_mock = Mock()
    connection_mock.return_value.engine.begin.return_value.__enter__ = Mock()
    connection_mock.return_value.engine.begin.return_value.__exit__ = Mock()
    connection_mock.return_value.fetch.return_value = MagicMock(
        return_value=[])
    redis_mock = Mock(name="mocked_connections_redis")
    tp_mock = Mock(return_value=None)
    monkeypatch.setattr(flowmachine.core.init, "set_log_level", logging_mock)
    monkeypatch.setattr(flowmachine.core.init, "Connection", connection_mock)
    monkeypatch.setattr("redis.StrictRedis", redis_mock)
    monkeypatch.setattr(concurrent.futures.thread.ThreadPoolExecutor,
                        "__init__", tp_mock)
    #  get any existing context
    connection = get_db()
    redis = get_redis()
    tp = get_executor()
    yield logging_mock, connection_mock, redis_mock, tp_mock
    #  Reset context
    bind_context(connection, tp, redis)
Ejemplo n.º 30
0
async def test_rerun_query_after_removed_from_cache(dummy_redis, server_config,
                                                    real_connections):
    """
    Test that a query can be rerun after it has been removed from the cache.
    """
    msg = await action_handler__run_query(
        config=server_config,
        query_kind="spatial_aggregate",
        locations=dict(
            query_kind="daily_location",
            date="2016-01-01",
            method="last",
            aggregation_unit="admin3",
        ),
    )
    query_id = msg["payload"]["query_id"]
    qsm = QueryStateMachine(get_redis(), query_id, get_db().conn_id)
    qsm.wait_until_complete()
    query_obj = get_query_object_by_id(get_db(), query_id)
    assert query_obj.is_stored
    query_obj.invalidate_db_cache()
    assert not query_obj.is_stored
    assert qsm.is_known
    msg = await action_handler__run_query(
        config=server_config,
        query_kind="spatial_aggregate",
        locations=dict(
            query_kind="daily_location",
            date="2016-01-01",
            method="last",
            aggregation_unit="admin3",
        ),
    )
    assert msg["status"] == ZMQReplyStatus.SUCCESS
    qsm.wait_until_complete()
    assert query_obj.is_stored