Exemple #1
0
    def wait_for_all(self,
                     promise: HaLinkMessagePromise,
                     timeout_sec: float = 30.0):
        """
        Blocks the current thread until all of the messages in
        promise._ids are reported by Motr as delivered.

        Raises NotDelivered exception when timeout_sec exceeds.
        """

        condition = Condition()
        skip_await = False

        with self.lock:
            self.groom_unsorted(promise)
            self.waiting_clients[promise] = condition
            skip_await = promise in self.recently_delivered

        while not promise.is_empty():
            if skip_await:
                LOG.log(
                    TRACE, 'Promise %s has been confirmed before, '
                    'no need to block', promise)
                skip_await = False
            else:
                with condition:
                    LOG.log(TRACE, 'Blocking until %s is confirmed', promise)
                    condition.wait(timeout=timeout_sec)
            with self.lock:
                self._verify_delivered(promise, timeout_sec)
                if not promise.is_empty():
                    self.waiting_clients[promise] = condition
Exemple #2
0
    def wait_for_all(self,
                     promise: HaLinkMessagePromise,
                     timeout_sec: float = 30.0):
        """
        Blocks the current thread until all of the messages in
        promise._ids are reported by Motr as delivered.

        Raises NotDelivered exception when timeout_sec exceeds.
        """

        condition = Condition()
        with self.lock:
            self.waiting_clients[promise] = condition
            LOG.log(TRACE, 'waiting clients %s', self.waiting_clients)

        while not promise.is_empty():
            with condition:
                LOG.log(TRACE, 'Blocking until %s is confirmed', promise)
                condition.wait(timeout=timeout_sec)
            with self.lock:
                if promise not in self.recently_delivered:
                    raise NotDelivered('None of message tags =' +
                                       str(promise) +
                                       '  were delivered to Motr')
                confirmed_msgs = self.recently_delivered.pop(promise)
                LOG.log(TRACE, 'Thread unblocked - %s just received',
                        confirmed_msgs)
                del self.waiting_clients[promise]
                promise.exclude_ids(confirmed_msgs)
                if not promise.is_empty():
                    self.waiting_clients[promise] = condition
Exemple #3
0
 def check_if_delivered_locked(
         self, promise: HaLinkMessagePromise) -> HaLinkMessagePromise:
     if not self.lock.locked():
         raise RuntimeError('DeliveryHerald.lock not acquired')
     if promise in self.recently_delivered:
         confirmed_msgs = self.recently_delivered.pop(promise)
         LOG.debug('Thread unblocked - %s just received', confirmed_msgs)
         del self.waiting_clients[promise]
         promise.exclude_ids(confirmed_msgs)
     return promise
Exemple #4
0
    def _verify_delivered(self, promise: HaLinkMessagePromise,
                          timeout_sec: float):
        """
        Verify if any message in promise._ids are reported by Motr
        as delivered. Calling function should hold the self.lock.
        """

        del self.waiting_clients[promise]
        if promise not in self.recently_delivered:
            raise NotDelivered('None of message tags =' + str(promise) +
                               '  were delivered to Motr within ' +
                               str(timeout_sec) + ' seconds timeout')
        confirmed_msgs = self.recently_delivered.pop(promise)
        LOG.log(TRACE, 'Thread unblocked - %s just received', confirmed_msgs)
        promise.exclude_ids(confirmed_msgs)
Exemple #5
0
 def notify_hax_stop(self):
     LOG.debug('Notifying hax stop')
     hax_fid = self.consul_util.get_hax_fid()
     hax_endpoint = self.consul_util.get_hax_endpoint()
     ids = self._ffi.hax_stop(self._ha_ctx, hax_fid.to_c(),
                              make_c_str(hax_endpoint))
     self.herald.wait_for_all(HaLinkMessagePromise(ids))
    def test_exception_raised_by_timeout(self):
        herald = DeliveryHerald()
        notified_ok = True

        def fn():
            try:
                sleep(1.5)
                herald.notify_delivered(MessageId(halink_ctx=43, tag=3))
            except:
                logging.exception('*** ERROR ***')
                notified_ok = False

        t = Thread(target=fn)
        t.start()

        m = MessageId
        try:
            with self.assertRaises(NotDelivered):
                herald.wait_for_any(HaLinkMessagePromise(
                    [m(42, 1), m(42, 3), m(42, 4)]),
                                    timeout_sec=5)
        finally:
            t.join()
        self.assertTrue(notified_ok,
                        'Unexpected exception appeared in notifier thread')
    def test_works_under_load(self):
        herald = DeliveryHerald()
        notified_ok = True

        def fn(msg: MessageId):
            try:
                sleep(1.5)
                herald.notify_delivered(msg)
            except:
                logging.exception('*** ERROR ***')
                notified_ok = False

        threads = [
            Thread(target=fn, args=(MessageId(100, i), ))
            for i in range(1, 32)
        ]
        for t in threads:
            t.start()

        def m(x):
            return MessageId(halink_ctx=100, tag=x)

        try:
            herald.wait_for_all(HaLinkMessagePromise(
                [m(5), m(25), m(28), m(31)]),
                                timeout_sec=5)
        finally:
            for t in threads:
                t.join()
        self.assertTrue(notified_ok,
                        'Unexpected exception appeared in notifier thread')
    def test_works_if_all_messages_confirmed(self):
        herald = DeliveryHerald()
        notified_ok = True

        def fn():
            try:
                sleep(1.5)
                herald.notify_delivered(MessageId(halink_ctx=42, tag=3))
                herald.notify_delivered(MessageId(halink_ctx=42, tag=1))
            except:
                logging.exception('*** ERROR ***')
                notified_ok = False

        t = Thread(target=fn)
        t.start()

        m = MessageId
        try:
            herald.wait_for_all(HaLinkMessagePromise([m(42, 1),
                                                      m(42, 3)]),
                                timeout_sec=5)
        finally:
            t.join()
        self.assertTrue(notified_ok,
                        'Unexpected exception appeared in notifier thread')
Exemple #9
0
    def handle_ioq_stob_error(self, payload: Dict[str, Any]) -> None:
        fid = Fid.parse(payload['conf_sdev'])
        if fid.is_null():
            LOG.debug('Fid is 0:0. Skipping the message.')
            return

        q: Queue = Queue(1)
        self.planner.add_command(
            BroadcastHAStates(states=[HAState(fid, status=ObjHealth.FAILED)],
                              reply_to=q))
        ids: List[MessageId] = q.get()
        self.herald.wait_for_any(HaLinkMessagePromise(ids))
Exemple #10
0
    def handle_ioq_stob_error(self, payload: Dict[str, Any]) -> None:
        fid = Fid.parse(payload['conf_sdev'])
        if fid.is_null():
            logging.debug('Fid is 0:0. Skipping the message.')
            return

        q: Queue = Queue(1)
        self.queue.put(
            BroadcastHAStates(states=[HAState(fid, status='offline')],
                              reply_to=q))
        ids: List[MessageId] = q.get()
        self.herald.wait_for_any(HaLinkMessagePromise(ids))
Exemple #11
0
    def wait_for_all(self,
                     promise: HaLinkMessagePromise,
                     timeout_sec: float = 30.0):
        """
        Blocks the current thread until all of the messages in
        promise._ids are reported by Motr as delivered.

        Raises NotDelivered exception when timeout_sec exceeds.
        """

        condition = Condition()
        with self.lock:
            self.waiting_clients[promise] = condition
            LOG.log(TRACE, 'waiting clients %s', self.waiting_clients)

        while not promise.is_empty():
            with condition:
                LOG.log(TRACE, 'Blocking until %s is confirmed', promise)
                condition.wait(timeout=timeout_sec)
            with self.lock:
                self._verify_delivered(promise, timeout_sec)
                if not promise.is_empty():
                    self.waiting_clients[promise] = condition
Exemple #12
0
    def handle_device_state_set(self, payload: Dict[str, Any]) -> None:
        # To add check for multiple object entries in a payload.
        # for objinfo in payload:
        hastate: Optional[HAState] = self.to_ha_state(payload)
        if not hastate:
            LOG.debug('No ha states to broadcast.')
            return

        q: Queue = Queue(1)
        LOG.debug('HA broadcast, node: %s device: %s state: %s',
                  payload['node'], payload['device'], payload['state'])
        self.queue.put(BroadcastHAStates(states=[hastate], reply_to=q))
        ids: List[MessageId] = q.get()
        self.herald.wait_for_any(HaLinkMessagePromise(ids))
    def test_if_delivered_earlier_than_awaited_notified_immediately(self):
        herald = DeliveryHerald()
        notified_ok = True
        thread_count = 1
        latch = CountDownLatch(thread_count)

        def fn(msg: MessageId):
            try:
                LOG.debug('Thread started')
                herald.notify_delivered(msg)
                LOG.debug('Notified delivery %s', msg)
                latch.count_down()
                LOG.debug('Main thread unblocked')

            except:
                logging.exception('*** ERROR ***')
                notified_ok = False

        threads = [
            Thread(target=fn, args=(MessageId(100, i + 1), ))
            for i in range(thread_count)
        ]

        for t in threads:
            t.start()
        # Block until all the threads come to latch.count_down() and thus
        # the message is notified for sure
        latch.await()

        def m(x):
            return MessageId(halink_ctx=100, tag=x)

        try:
            started = time()
            herald.wait_for_all(HaLinkMessagePromise([m(1)]),
                                timeout_sec=2)
            finished = time()
        finally:
            for t in threads:
                t.join()
        self.assertTrue(notified_ok,
                        'Unexpected exception appeared in notifier thread')
        self.assertLess(
            finished - started, 5,
            'Awaiting thread was unblocked only by a timeout. It means '
            'that unsorted_deliveries was analyzed too late.'
        )
    def test_if_delivered_earlier_than_awaited_wait_many(self):
        herald = DeliveryHerald()
        notified_ok = True
        thread_count = 6
        latch = CountDownLatch(thread_count)

        def fn(msg: MessageId):
            try:
                LOG.debug('Thread started')
                herald.notify_delivered(msg)
                LOG.debug('Notified delivery %s', msg)
                latch.count_down()
                LOG.debug('Main thread unblocked')

            except:
                logging.exception('*** ERROR ***')
                notified_ok = False

        threads = [
            Thread(target=fn, args=(MessageId(100, i + 1), ))
            for i in range(thread_count)
        ]

        for t in threads:
            t.start()
        # Block until all the threads come to latch.count_down() and thus
        # the message is notified for sure
        latch.await()

        def m(x):
            return MessageId(halink_ctx=100, tag=x)

        try:
            herald.wait_for_all(HaLinkMessagePromise([m(1), m(5)]),
                                timeout_sec=2)
        finally:
            for t in threads:
                t.join()
        self.assertTrue(notified_ok,
                        'Unexpected exception appeared in notifier thread')
        self.assertEqual(4, len(herald.unsorted_deliveries.keys()))
    def test_it_works(self):
        herald = DeliveryHerald()
        notified_ok = True

        def fn():
            try:
                sleep(1.5)
                herald.notify_delivered(MessageId(halink_ctx=100, tag=1))
            except:
                logging.exception('*** ERROR ***')
                notified_ok = False

        t = Thread(target=fn)
        t.start()

        m = MessageId
        herald.wait_for_any(HaLinkMessagePromise(
            [m(100, 1), m(100, 3), m(100, 4)]),
                            timeout_sec=10)
        t.join()
        self.assertTrue(notified_ok,
                        'Unexpected exception appeared in notifier thread')
Exemple #16
0
    def _do_work(self, q: Queue, motr: Motr):
        ffi = motr._ffi
        LOG.info('Handler thread has started')
        ffi.adopt_motr_thread()

        def pull_msg():
            try:
                return q.get(block=False)
            except Empty:
                return None

        try:
            while True:
                try:
                    LOG.debug('Waiting for the next message')

                    item = pull_msg()
                    while item is None:
                        time.sleep(0.2)
                        if self.is_stopped:
                            raise StopIteration()
                        item = pull_msg()

                    LOG.debug('Got %s message from queue', item)
                    if isinstance(item, FirstEntrypointRequest):
                        LOG.debug('first entrypoint request, broadcast FAILED')
                        ids: List[MessageId] = motr.broadcast_ha_states([
                            HAState(fid=item.process_fid,
                                    status=ServiceHealth.FAILED)
                        ])
                        LOG.debug('waiting for broadcast of %s for ep: %s',
                                  ids, item.remote_rpc_endpoint)
                        self.herald.wait_for_all(HaLinkMessagePromise(ids))
                        motr.send_entrypoint_request_reply(
                            EntrypointRequest(
                                reply_context=item.reply_context,
                                req_id=item.req_id,
                                remote_rpc_endpoint=item.remote_rpc_endpoint,
                                process_fid=item.process_fid,
                                git_rev=item.git_rev,
                                pid=item.pid,
                                is_first_request=item.is_first_request))
                    elif isinstance(item, EntrypointRequest):
                        # While replying any Exception is catched. In such a
                        # case, the motr process will receive EAGAIN and
                        # hence will need to make new attempt by itself
                        motr.send_entrypoint_request_reply(item)
                    elif isinstance(item, ProcessEvent):
                        self._update_process_status(q, item.evt)
                    elif isinstance(item, HaNvecGetEvent):
                        fn = motr.ha_nvec_get_reply
                        # If a consul-related exception appears, it will
                        # be processed by repeat_if_fails.
                        #
                        # This thread will become blocked until that
                        # intermittent error gets resolved.
                        decorated = (repeat_if_fails(wait_seconds=5))(fn)
                        decorated(item)
                    elif isinstance(item, BroadcastHAStates):
                        LOG.info('HA states: %s', item.states)
                        ha_states = self.update_process_failure(q, item.states)
                        result: List[MessageId] = motr.broadcast_ha_states(
                            ha_states)
                        if item.reply_to:
                            item.reply_to.put(result)
                    elif isinstance(item, StobIoqError):
                        LOG.info('Stob IOQ: %s', item.fid)
                        payload = dump_json(item)
                        LOG.debug('Stob IOQ JSON: %s', payload)
                        offset = self.eq_publisher.publish('stob-ioq', payload)
                        LOG.debug('Written to epoch: %s', offset)
                    elif isinstance(item, SnsRepairStatus):
                        LOG.info('Requesting SNS repair status')
                        status = motr.get_repair_status(item.fid)
                        LOG.info('SNS repair status is received: %s', status)
                        item.reply_to.put(status)
                    elif isinstance(item, SnsRebalanceStatus):
                        LOG.info('Requesting SNS rebalance status')
                        status = motr.get_rebalance_status(item.fid)
                        LOG.info('SNS rebalance status is received: %s',
                                 status)
                        item.reply_to.put(status)
                    elif isinstance(item, SnsRebalanceStart):
                        LOG.info('Requesting SNS rebalance start')
                        motr.start_rebalance(item.fid)
                    elif isinstance(item, SnsRebalanceStop):
                        LOG.info('Requesting SNS rebalance stop')
                        motr.stop_rebalance(item.fid)
                    elif isinstance(item, SnsRebalancePause):
                        LOG.info('Requesting SNS rebalance pause')
                        motr.pause_rebalance(item.fid)
                    elif isinstance(item, SnsRebalanceResume):
                        LOG.info('Requesting SNS rebalance resume')
                        motr.resume_rebalance(item.fid)
                    elif isinstance(item, SnsRepairStart):
                        LOG.info('Requesting SNS repair start')
                        motr.start_repair(item.fid)
                    elif isinstance(item, SnsRepairStop):
                        LOG.info('Requesting SNS repair stop')
                        motr.stop_repair(item.fid)
                    elif isinstance(item, SnsRepairPause):
                        LOG.info('Requesting SNS repair pause')
                        motr.pause_repair(item.fid)
                    elif isinstance(item, SnsRepairResume):
                        LOG.info('Requesting SNS repair resume')
                        motr.resume_repair(item.fid)

                    else:
                        LOG.warning('Unsupported event type received: %s',
                                    item)
                except StopIteration:
                    raise
                except Exception:
                    # no op, swallow the exception
                    LOG.exception('**ERROR**')
        except StopIteration:
            ffi.shun_motr_thread()
        finally:
            LOG.info('Handler thread has exited')