def test_entrypoint_requests_share_same_group(self):
        planner = WorkPlanner()
        ep1 = entrypoint()
        ep2 = entrypoint()

        ep1 = planner._assign_group(ep1)
        ep2 = planner._assign_group(ep2)
        self.assertEqual([0, 0], [ep1.group, ep2.group])
    def test_entrypoint_not_paralleled_with_broadcast(self):
        planner = WorkPlanner()
        bcast = broadcast()
        ep1 = entrypoint()

        bcast = planner._assign_group(bcast)
        ep1 = planner._assign_group(ep1)
        self.assertEqual([0, 1], [bcast.group, ep1.group])
    def test_no_hang_when_group_id_cycled(self):
        planner = WorkPlanner()

        def my_state():
            return State(next_group_id=99999,
                         active_commands=LinkedList(),
                         current_group_id=99999,
                         next_group_commands=set(),
                         is_shutdown=False)

        planner = WorkPlanner(init_state_factory=my_state)

        tracker = GroupTracker()
        thread_count = 1
        for i in range(10):
            planner.add_command(process_event())

        for j in range(thread_count):
            planner.add_command(Die())

        exc = None

        def fn(planner: WorkPlanner):
            nonlocal exc
            try:
                while True:
                    LOG.log(TRACE, "Requesting for a work")
                    # import pudb.remote
                    # pudb.remote.set_trace(term_size=(120, 40), port=9998)
                    cmd = planner.get_next_command()
                    LOG.log(TRACE, "The command is received %s [group=%s]",
                            type(cmd), cmd.group)

                    if isinstance(cmd, Die):
                        LOG.log(TRACE,
                                "Poison pill is received - exiting. Bye!")
                        planner.notify_finished(cmd)
                        break
                    tracker.log(cmd)
                    LOG.log(TRACE, "The job is done, notifying the planner")
                    planner.notify_finished(cmd)
                    LOG.log(TRACE, "Notified. ")

            except Exception as e:
                LOG.exception('*** ERROR ***')
                exc = e

        workers = [
            Thread(target=fn, args=(planner, )) for t in range(thread_count)
        ]
        for t in workers:
            t.start()

        for t in workers:
            t.join()
        if exc:
            raise exc
        groups_processed = tracker.get_tracks()
        self.assertEqual([99999, 10**5, 0, 1, 2, 3, 4,
                          5, 6, 7], groups_processed)
    def test_ha_nvec_get_shares_group_always(self):
        planner = WorkPlanner()

        assign = planner._assign_group

        msgs_after_bc = [
            assign(broadcast()),
            assign(nvec_get()),
            assign(broadcast()),
            assign(entrypoint())
        ]
        msgs_after_ep = [
            assign(entrypoint()),
            assign(nvec_get()),
            assign(broadcast()),
            assign(entrypoint())
        ]
        msgs_after_nvec = [
            assign(entrypoint()),
            assign(nvec_get()),
            assign(nvec_get()),
            assign(entrypoint())
        ]
        self.assertEqual([0, 0, 1, 2], [m.group for m in msgs_after_bc])
        self.assertEqual([2, 2, 3, 4], [m.group for m in msgs_after_ep])
        self.assertEqual([4, 4, 4, 4], [m.group for m in msgs_after_nvec])
Beispiel #5
0
def main():
    # Note: no logging must happen before this call.
    # Otherwise the log configuration will not apply.
    setup_logging()

    # [KN] The elements in the queue will appear if
    # 1. A callback is invoked from ha_link (this will happen in a motr
    #    thread which must be free ASAP)
    # 2. A new HA notification has come form Consul via HTTP
    # [KN] The messages are consumed by Python thread created by
    # _run_qconsumer_thread function.
    #
    # [KN] Note: The server is launched in the main thread.
    planner = WorkPlanner()

    util: ConsulUtil = ConsulUtil()
    _remove_stale_session(util)
    cfg: HL_Fids = _get_motr_fids(util)

    LOG.info('Welcome to HaX')
    LOG.info(f'Setting up ha_link interface with the options as follows: '
             f'hax fid = {cfg.hax_fid}, hax endpoint = {cfg.hax_ep}, '
             f'HA fid = {cfg.ha_fid}')

    ffi = HaxFFI()
    herald = DeliveryHerald()
    motr = Motr(planner=planner, ffi=ffi, herald=herald, consul_util=util)

    # Note that consumer thread must be started before we invoke motr.start(..)
    # Reason: hax process will send entrypoint request and somebody needs
    # to reply it.

    # TODO make the number of threads configurable
    consumer_threads = [
        _run_qconsumer_thread(planner, motr, herald, util, i) for i in range(4)
    ]

    try:
        # [KN] We use just the first profile for Spiel API for now.
        motr.start(cfg.hax_ep,
                   process=cfg.hax_fid,
                   ha_service=cfg.ha_fid,
                   profile=cfg.profiles[0])
        LOG.info('Motr API has been started')
        rconfc_starter = _run_rconfc_starter_thread(motr, consul_util=util)

        stats_updater = _run_stats_updater_thread(motr, consul_util=util)
        event_poller = _run_thread(create_ha_thread(planner, util))
        # [KN] This is a blocking call. It will work until the program is
        # terminated by signal

        server = ServerRunner(planner, herald, consul_util=util)
        server.run(threads_to_wait=[
            *consumer_threads, stats_updater, rconfc_starter, event_poller
        ])
    except Exception:
        LOG.exception('Exiting due to an exception')
    finally:
        motr.fini()
        def fn(planner: WorkPlanner):
            nonlocal exc
            try:
                while True:
                    LOG.log(TRACE, "Requesting for a work")
                    cmd = planner.get_next_command()
                    LOG.log(TRACE, "The command is received %s [group=%s]",
                            type(cmd), cmd.group)

                    if isinstance(cmd, BroadcastHAStates):
                        time.sleep(1)

                    if isinstance(cmd, EntrypointRequest):
                        planner.shutdown()

                    if isinstance(cmd, Die):
                        LOG.log(TRACE,
                                "Poison pill is received - exiting. Bye!")
                        planner.notify_finished(cmd)
                        break
                    tracker.log(cmd)
                    LOG.log(TRACE, "The job is done, notifying the planner")
                    planner.notify_finished(cmd)
                    LOG.log(TRACE, "Notified. ")

            except Exception as e:
                LOG.exception('*** ERROR ***')
                exc = e
    def test_entrypoint_executed_asap(self):
        planner = WorkPlanner()

        a = planner._assign_group
        self.assertEqual(
            [0, 1, 0],
            [a(i)[0].group
             for i in [broadcast(), broadcast(),
                       entrypoint()]])
        def fn(planner: WorkPlanner):
            nonlocal exc
            try:
                while True:
                    LOG.log(TRACE, "Requesting for a work")
                    cmd = planner.get_next_command()
                    LOG.log(TRACE, "The command is received")
                    if isinstance(cmd, Die):
                        LOG.log(TRACE,
                                "Poison pill is received - exiting. Bye!")
                        break

                    sleep(0.5)
                    LOG.log(TRACE, "The job is done, notifying the planner")
                    planner.notify_finished(cmd)
                    LOG.log(TRACE, "Notified. ")

            except Exception as e:
                LOG.exception('*** ERROR ***')
                exc = e
    def test_broadcast_starts_new_group(self):
        planner = WorkPlanner()

        assign = planner._assign_group

        msgs = [
            assign(broadcast()),
            assign(broadcast()),
            assign(broadcast()),
            assign(entrypoint())
        ]
        self.assertEqual([0, 1, 2, 3], [m.group for m in msgs])
    def test_broadcast_does_not_start_new_group(self):
        planner = WorkPlanner()

        assign = planner._assign_group

        msgs = [
            assign(broadcast()),
            assign(broadcast()),
            assign(broadcast()),
            assign(nvec_get())
        ]
        self.assertEqual([0, 1, 2, 0], [m.group for (m, _) in msgs])
        def fn(planner: WorkPlanner):
            nonlocal exc
            try:
                while True:
                    LOG.log(TRACE, "Requesting for a work")
                    # import pudb.remote
                    # pudb.remote.set_trace(term_size=(120, 40), port=9998)
                    cmd = planner.get_next_command()
                    LOG.log(TRACE, "The command is received %s [group=%s]",
                            type(cmd), cmd.group)

                    if isinstance(cmd, Die):
                        LOG.log(TRACE,
                                "Poison pill is received - exiting. Bye!")
                        planner.notify_finished(cmd)
                        break
                    tracker.log(cmd)
                    LOG.log(TRACE, "The job is done, notifying the planner")
                    planner.notify_finished(cmd)
                    LOG.log(TRACE, "Notified. ")

            except Exception as e:
                LOG.exception('*** ERROR ***')
                exc = e
    def test_group_id_cycled(self):
        def my_state():
            return State(next_group_id=99999,
                         active_commands=LinkedList(),
                         current_group_id=99999,
                         next_group_commands=set(),
                         is_shutdown=False)

        planner = WorkPlanner(init_state_factory=my_state)
        assign = planner._assign_group

        msgs = [
            assign(broadcast()),
            assign(broadcast()),
            assign(broadcast()),
            assign(broadcast()),
        ]
        self.assertEqual([99999, 10**5, 0, 1], [m.group for (m, _) in msgs])
    def test_parallelism_is_possible(self):
        planner = WorkPlanner()
        for i in range(40):
            planner.add_command(entrypoint())

        for j in range(4):
            planner.add_command(Die())

        exc = None

        def fn(planner: WorkPlanner):
            nonlocal exc
            try:
                while True:
                    LOG.log(TRACE, "Requesting for a work")
                    cmd = planner.get_next_command()
                    LOG.log(TRACE, "The command is received")
                    if isinstance(cmd, Die):
                        LOG.log(TRACE,
                                "Poison pill is received - exiting. Bye!")
                        break

                    sleep(0.5)
                    LOG.log(TRACE, "The job is done, notifying the planner")
                    planner.notify_finished(cmd)
                    LOG.log(TRACE, "Notified. ")

            except Exception as e:
                LOG.exception('*** ERROR ***')
                exc = e

        workers = [Thread(target=fn, args=(planner, )) for t in range(4)]
        time_1 = time.time()
        for t in workers:
            t.start()

        for t in workers:
            t.join()
        time_2 = time.time()
        logging.info('Processing time %s', time_2 - time_1)
        if exc:
            raise exc
        self.assertTrue(planner.is_empty(), 'Not all commands were read out')
        # Every thread sleeps for 500ms. 40 commands * 0.5 gives 20 seconds if
        # the commands executed sequentially
        self.assertLess(time_2 - time_1, 19, 'Suspiciously slow')
Beispiel #14
0
 def update_process_failure(self, planner: WorkPlanner,
                            ha_states: List[HAState]) -> List[HAState]:
     new_ha_states: List[HAState] = []
     for state in ha_states:
         # We are only concerned with process statuses.
         if state.fid.container == ObjT.PROCESS.value:
             current_status = self.consul.get_process_current_status(
                 state.status, state.fid)
             if current_status == ServiceHealth.OK:
                 if (self.consul.get_process_local_status(
                         state.fid) == 'M0_CONF_HA_PROCESS_STARTED'):
                     continue
             if current_status in (ServiceHealth.FAILED,
                                   ServiceHealth.STOPPED):
                 if (self.consul.get_process_local_status(
                         state.fid) == 'M0_CONF_HA_PROCESS_STOPPED'):
                     # Consul may report failure of a process multiple
                     # times, so we don't want to send duplicate failure
                     # notifications, it may cause delay in cleanup
                     # activities.
                     continue
             if current_status == ServiceHealth.UNKNOWN:
                 # We got service status as UNKNOWN, that means hax was
                 # notified about process failure but hax couldn't
                 # confirm if the process is in failed state or have
                 # failed and restarted. So, we will not loose the
                 # event and try again to confirm the real time
                 # process status by enqueing a broadcast event
                 # specific to this process.
                 # It is expected that the process status gets
                 # eventually confirmed as either failed or passing (OK).
                 # This situation typically arises due to delay
                 # in receiving failure notification during which the
                 # corresponding process might be restarting or have
                 # already restarted. Thus it is important to confirm
                 # the real time status of the process before
                 # broadcasting failure.
                 current_status = ServiceHealth.UNKNOWN
                 planner.add_command(
                     BroadcastHAStates(states=[
                         HAState(fid=state.fid, status=ServiceHealth.FAILED)
                     ],
                         reply_to=None))
             if current_status not in (ServiceHealth.UNKNOWN,
                                       ServiceHealth.OFFLINE):
                 # We also need to account and report the failure of remote
                 # Motr processes to this node's hax and motr processes.
                 # When Consul reports a remote process failure, hax
                 # confirms its current status from Consul KV and updates
                 # the list of failed services and also adds it to the
                 # broadcast list.
                 if current_status != ServiceHealth.OK:
                     event = m0HaProcessEvent.M0_CONF_HA_PROCESS_STOPPED
                 else:
                     event = m0HaProcessEvent.M0_CONF_HA_PROCESS_STARTED
                 self.consul.update_process_status(
                     ConfHaProcess(
                         chp_event=event,
                         chp_type=int(
                             m0HaProcessType.M0_CONF_HA_PROCESS_M0D),
                         chp_pid=0,
                         fid=state.fid))
             new_ha_states.append(
                 HAState(fid=state.fid, status=current_status))
         else:
             new_ha_states.append(state)
     return new_ha_states
Beispiel #15
0
    def _do_work(self, planner: WorkPlanner, motr: Motr):
        LOG.info('Handler thread has started')

        try:
            while True:
                try:
                    LOG.debug('Waiting for the next message')

                    item = planner.get_next_command()

                    LOG.debug('Got %s message from planner', item)
                    if isinstance(item, FirstEntrypointRequest):
                        motr.send_entrypoint_request_reply(
                            EntrypointRequest(
                                reply_context=item.reply_context,
                                req_id=item.req_id,
                                remote_rpc_endpoint=item.remote_rpc_endpoint,
                                process_fid=item.process_fid,
                                git_rev=item.git_rev,
                                pid=item.pid,
                                is_first_request=item.is_first_request))
                    elif isinstance(item, EntrypointRequest):
                        # While replying any Exception is catched. In such a
                        # case, the motr process will receive EAGAIN and
                        # hence will need to make new attempt by itself
                        motr.send_entrypoint_request_reply(item)
                    elif isinstance(item, ProcessEvent):
                        self._update_process_status(planner, motr, item.evt)
                    elif isinstance(item, HaNvecGetEvent):
                        fn = motr.ha_nvec_get_reply
                        # If a consul-related exception appears, it will
                        # be processed by repeat_if_fails.
                        #
                        # This thread will become blocked until that
                        # intermittent error gets resolved.
                        decorated = (repeat_if_fails(wait_seconds=5))(fn)
                        decorated(item)
                    elif isinstance(item, HaNvecSetEvent):
                        fn = motr.ha_nvec_set_process
                        # If a consul-related exception appears, it will
                        # be processed by repeat_if_fails.
                        #
                        # This thread will become blocked until that
                        # intermittent error gets resolved.
                        decorated = (repeat_if_fails(wait_seconds=5))(fn)
                        decorated(item)
                    elif isinstance(item, BroadcastHAStates):
                        LOG.info('HA states: %s', item.states)
                        ha_states = self.update_process_failure(
                            planner, item.states)
                        result: List[MessageId] = motr.broadcast_ha_states(
                            ha_states)
                        if item.reply_to:
                            item.reply_to.put(result)
                    elif isinstance(item, StobIoqError):
                        LOG.info('Stob IOQ: %s', item.fid)
                        payload = dump_json(item)
                        LOG.debug('Stob IOQ JSON: %s', payload)
                        offset = self.eq_publisher.publish('stob-ioq', payload)
                        LOG.debug('Written to epoch: %s', offset)
                    elif isinstance(item, SnsRepairStatus):
                        LOG.info('Requesting SNS repair status')
                        status = motr.get_repair_status(item.fid)
                        LOG.info('SNS repair status is received: %s', status)
                        item.reply_to.put(status)
                    elif isinstance(item, SnsRebalanceStatus):
                        LOG.info('Requesting SNS rebalance status')
                        status = motr.get_rebalance_status(item.fid)
                        LOG.info('SNS rebalance status is received: %s',
                                 status)
                        item.reply_to.put(status)
                    elif isinstance(item, SnsRebalanceStart):
                        LOG.info('Requesting SNS rebalance start')
                        motr.start_rebalance(item.fid)
                    elif isinstance(item, SnsRebalanceStop):
                        LOG.info('Requesting SNS rebalance stop')
                        motr.stop_rebalance(item.fid)
                    elif isinstance(item, SnsRebalancePause):
                        LOG.info('Requesting SNS rebalance pause')
                        motr.pause_rebalance(item.fid)
                    elif isinstance(item, SnsRebalanceResume):
                        LOG.info('Requesting SNS rebalance resume')
                        motr.resume_rebalance(item.fid)
                    elif isinstance(item, SnsRepairStart):
                        LOG.info('Requesting SNS repair start')
                        motr.start_repair(item.fid)
                    elif isinstance(item, SnsRepairStop):
                        LOG.info('Requesting SNS repair stop')
                        motr.stop_repair(item.fid)
                    elif isinstance(item, SnsRepairPause):
                        LOG.info('Requesting SNS repair pause')
                        motr.pause_repair(item.fid)
                    elif isinstance(item, SnsRepairResume):
                        LOG.info('Requesting SNS repair resume')
                        motr.resume_repair(item.fid)
                    elif isinstance(item, Die):
                        raise StopIteration()
                    else:
                        LOG.warning('Unsupported event type received: %s',
                                    item)
                except StopIteration:
                    raise
                except Exception:
                    # no op, swallow the exception
                    LOG.exception('**ERROR**')
                finally:
                    planner.notify_finished(item)
        except StopIteration:
            LOG.info('Consumer Stopped')
            if self.idx == 0:
                motr.stop()
        finally:
            LOG.info('Handler thread has exited')
Beispiel #16
0
def main():
    # Note: no logging must happen before this call.
    # Otherwise the log configuration will not apply.
    setup_logging()
    set_locale()
    inject.configure(di_configuration)

    state = inject.instance(HaxGlobalState)

    # [KN] The elements in the work planner will appear if
    # 1. A callback is invoked from ha_link (this will happen in a motr
    #    thread which must be free ASAP)
    # 2. A new HA notification has come form Consul via HTTP
    # [KN] The messages are consumed by Python threads created by
    # _run_qconsumer_thread function.
    #
    # [KN] Note: The server is launched in the main thread.
    planner = WorkPlanner()

    def handle_signal(sig, frame):
        state.set_stopping()
        planner.shutdown()

    # This is necessary to allow hax to exit early if Consul is not available
    # (otherwise _get_motr_fids() may be retrying forever even if the hax
    # process needs to shutdown).
    signal.signal(signal.SIGINT, handle_signal)

    util: ConsulUtil = ConsulUtil()
    # Avoid removing session on hax start as this will happen
    # on every node, thus leader election will keep re-triggering
    # until the final hax node starts, this will delay further
    # bootstrapping operations.
    _remove_stale_session(util)
    cfg: HL_Fids = _get_motr_fids(util)
    hax_http_port = util.get_hax_http_port()
    util.init_motr_processes_status()

    LOG.info('Welcome to HaX')
    LOG.info(f'Setting up ha_link interface with the options as follows: '
             f'hax fid = {cfg.hax_fid}, hax endpoint = {cfg.hax_ep}, '
             f'HA fid = {cfg.ha_fid}')

    ffi = HaxFFI()
    herald = DeliveryHerald()
    motr = Motr(planner=planner, ffi=ffi, herald=herald, consul_util=util)

    # Note that consumer thread must be started before we invoke motr.start(..)
    # Reason: hax process will send entrypoint request and somebody needs
    # to reply it.

    # TODO make the number of threads configurable
    consumer_threads = [
        _run_qconsumer_thread(planner, motr, herald, util, i)
        for i in range(32)
    ]

    try:
        # [KN] We use just the first profile for Spiel API for now.
        motr.start(cfg.hax_ep,
                   process=cfg.hax_fid,
                   ha_service=cfg.ha_fid,
                   profile=cfg.profiles[0])
        LOG.info('Motr API has been started')
        rconfc_starter = _run_rconfc_starter_thread(motr, consul_util=util)

        stats_updater = _run_stats_updater_thread(motr, consul_util=util)
        bc_updater = _run_bc_updater_thread(motr, consul_util=util)
        event_poller = _run_thread(create_ha_thread(planner, util))
        # [KN] This is a blocking call. It will work until the program is
        # terminated by signal

        server = ServerRunner(planner,
                              herald,
                              consul_util=util,
                              hax_state=state)
        server.run(threads_to_wait=[
            *consumer_threads, stats_updater, bc_updater, rconfc_starter,
            event_poller
        ],
                   port=hax_http_port)
    except Exception:
        LOG.exception('Exiting due to an exception')
    finally:
        motr.fini()
    def test_groups_processed_sequentially_4_threads(self):
        planner = WorkPlanner()
        group_idx = 0

        def ret_values(cmd: BaseMessage) -> bool:
            nonlocal group_idx
            # We don't care about the group distribution logic
            # in this test. Instead, we concentrate how different group
            # numbers are processed by the workers and the order
            # in which they are allowed to process the messages.
            #
            # _assign_group is invoked under a lock acquired, so this
            # increment is thread-safe.
            values = [0, 1, 0, 1, 0, 1, 0, 1, 0, 1]
            ret = bool(values[group_idx])
            group_idx += 1
            return ret

        setattr(planner, '_should_increase_group',
                Mock(side_effect=ret_values))
        tracker = GroupTracker()
        thread_count = 4
        for i in range(10):
            planner.add_command(entrypoint())

        for j in range(thread_count):
            planner.add_command(Die())

        exc = None

        def fn(planner: WorkPlanner):
            nonlocal exc
            try:
                while True:
                    LOG.log(TRACE, "Requesting for a work")
                    cmd = planner.get_next_command()
                    LOG.log(TRACE, "The command is received %s [group=%s]",
                            type(cmd), cmd.group)

                    if isinstance(cmd, Die):
                        LOG.log(TRACE,
                                "Poison pill is received - exiting. Bye!")
                        planner.notify_finished(cmd)
                        break
                    tracker.log(cmd)
                    LOG.log(TRACE, "The job is done, notifying the planner")
                    planner.notify_finished(cmd)
                    LOG.log(TRACE, "Notified. ")

            except Exception as e:
                LOG.exception('*** ERROR ***')
                exc = e

        workers = [
            Thread(target=fn, args=(planner, )) for t in range(thread_count)
        ]
        for t in workers:
            t.start()

        for t in workers:
            t.join()
        if exc:
            raise exc
        groups_processed = tracker.get_tracks()
        self.assertEqual([0, 1, 1, 2, 2, 3, 3, 4, 4, 5], groups_processed)
    def test_entrypoint_request_processed_asap(self):
        planner = WorkPlanner()
        group_idx = 0

        def ret_values(cmd: BaseMessage) -> bool:
            nonlocal group_idx
            # We don't care about the group distribution logic
            # in this test. Instead, we concentrate how different group
            # numbers are processed by the workers and the order
            # in which they are allowed to process the messages.
            #
            # _assign_group is invoked under a lock acquired, so this
            # increment is thread-safe.
            values = [0, 1, 0, 1, 0, 1, 0, 1, 0, 1]
            ret = bool(values[group_idx])
            group_idx += 1
            return ret

        tracker = TimeTracker()
        thread_count = 4
        planner.add_command(broadcast())
        planner.add_command(broadcast())
        planner.add_command(broadcast())
        planner.add_command(entrypoint())

        for j in range(thread_count):
            planner.add_command(Die())

        exc = None

        def fn(planner: WorkPlanner):
            nonlocal exc
            try:
                while True:
                    LOG.log(TRACE, "Requesting for a work")
                    cmd = planner.get_next_command()
                    LOG.log(TRACE, "The command is received %s [group=%s]",
                            type(cmd), cmd.group)
                    if isinstance(cmd, BroadcastHAStates):
                        time.sleep(1.5)

                    if isinstance(cmd, Die):
                        LOG.log(TRACE,
                                "Poison pill is received - exiting. Bye!")
                        planner.notify_finished(cmd)
                        break
                    tracker.log(cmd)
                    LOG.log(TRACE, "The job is done, notifying the planner")
                    planner.notify_finished(cmd)
                    LOG.log(TRACE, "Notified. ")

            except Exception as e:
                LOG.exception('*** ERROR ***')
                exc = e

        workers = [
            Thread(target=fn, args=(planner, )) for t in range(thread_count)
        ]
        for t in workers:
            t.start()

        for t in workers:
            t.join()
        if exc:
            raise exc
        tracks = tracker.get_tracks()
        (cmd, ts) = tracks[0]
        self.assertTrue(isinstance(cmd, EntrypointRequest))
    def test_workers_not_blocked_by_future_work(self):
        planner = WorkPlanner()
        group_idx = 0

        tracker = TimeTracker()
        thread_count = 2
        # We add way more commands than we have workers now
        for i in range(8):
            planner.add_command(broadcast())

        planner.add_command(entrypoint())

        for j in range(thread_count):
            planner.add_command(Die())

        exc = None

        def fn(planner: WorkPlanner):
            nonlocal exc
            try:
                while True:
                    LOG.log(TRACE, "Requesting for a work")
                    cmd = planner.get_next_command()
                    LOG.log(TRACE, "The command is received %s [group=%s]",
                            type(cmd), cmd.group)

                    if isinstance(cmd, BroadcastHAStates):
                        time.sleep(1)

                    if isinstance(cmd, EntrypointRequest):
                        planner.shutdown()

                    if isinstance(cmd, Die):
                        LOG.log(TRACE,
                                "Poison pill is received - exiting. Bye!")
                        planner.notify_finished(cmd)
                        break
                    tracker.log(cmd)
                    LOG.log(TRACE, "The job is done, notifying the planner")
                    planner.notify_finished(cmd)
                    LOG.log(TRACE, "Notified. ")

            except Exception as e:
                LOG.exception('*** ERROR ***')
                exc = e

        workers = [
            Thread(target=fn, args=(planner, )) for t in range(thread_count)
        ]

        t0 = time.time()
        for t in workers:
            t.start()

        for t in workers:
            t.join()
        if exc:
            raise exc
        tracks = tracker.get_tracks()

        idx, (cmd, ts) = self.find(tracks,
                             lambda a: isinstance(a[0], EntrypointRequest),
                             'EntrypointRequest not processed')
        self.assertTrue(ts - t0 < 3)
        self.assertTrue(len(tracks) < 4)