def _do_work(self, q: Queue, motr: Motr): ffi = motr._ffi LOG.info('Handler thread has started') ffi.adopt_motr_thread() def pull_msg(): try: return q.get(block=False) except Empty: return None try: while True: try: LOG.debug('Waiting for the next message') item = pull_msg() while item is None: time.sleep(0.2) if self.is_stopped: raise StopIteration() item = pull_msg() LOG.debug('Got %s message from queue', item) if isinstance(item, FirstEntrypointRequest): LOG.debug('first entrypoint request, broadcast FAILED') ids: List[MessageId] = motr.broadcast_ha_states([ HAState(fid=item.process_fid, status=ServiceHealth.FAILED) ]) LOG.debug('waiting for broadcast of %s for ep: %s', ids, item.remote_rpc_endpoint) self.herald.wait_for_all(HaLinkMessagePromise(ids)) motr.send_entrypoint_request_reply( EntrypointRequest( reply_context=item.reply_context, req_id=item.req_id, remote_rpc_endpoint=item.remote_rpc_endpoint, process_fid=item.process_fid, git_rev=item.git_rev, pid=item.pid, is_first_request=item.is_first_request)) elif isinstance(item, EntrypointRequest): # While replying any Exception is catched. In such a # case, the motr process will receive EAGAIN and # hence will need to make new attempt by itself motr.send_entrypoint_request_reply(item) elif isinstance(item, ProcessEvent): self._update_process_status(q, item.evt) elif isinstance(item, HaNvecGetEvent): fn = motr.ha_nvec_get_reply # If a consul-related exception appears, it will # be processed by repeat_if_fails. # # This thread will become blocked until that # intermittent error gets resolved. decorated = (repeat_if_fails(wait_seconds=5))(fn) decorated(item) elif isinstance(item, BroadcastHAStates): LOG.info('HA states: %s', item.states) ha_states = self.update_process_failure(q, item.states) result: List[MessageId] = motr.broadcast_ha_states( ha_states) if item.reply_to: item.reply_to.put(result) elif isinstance(item, StobIoqError): LOG.info('Stob IOQ: %s', item.fid) payload = dump_json(item) LOG.debug('Stob IOQ JSON: %s', payload) offset = self.eq_publisher.publish('stob-ioq', payload) LOG.debug('Written to epoch: %s', offset) elif isinstance(item, SnsRepairStatus): LOG.info('Requesting SNS repair status') status = motr.get_repair_status(item.fid) LOG.info('SNS repair status is received: %s', status) item.reply_to.put(status) elif isinstance(item, SnsRebalanceStatus): LOG.info('Requesting SNS rebalance status') status = motr.get_rebalance_status(item.fid) LOG.info('SNS rebalance status is received: %s', status) item.reply_to.put(status) elif isinstance(item, SnsRebalanceStart): LOG.info('Requesting SNS rebalance start') motr.start_rebalance(item.fid) elif isinstance(item, SnsRebalanceStop): LOG.info('Requesting SNS rebalance stop') motr.stop_rebalance(item.fid) elif isinstance(item, SnsRebalancePause): LOG.info('Requesting SNS rebalance pause') motr.pause_rebalance(item.fid) elif isinstance(item, SnsRebalanceResume): LOG.info('Requesting SNS rebalance resume') motr.resume_rebalance(item.fid) elif isinstance(item, SnsRepairStart): LOG.info('Requesting SNS repair start') motr.start_repair(item.fid) elif isinstance(item, SnsRepairStop): LOG.info('Requesting SNS repair stop') motr.stop_repair(item.fid) elif isinstance(item, SnsRepairPause): LOG.info('Requesting SNS repair pause') motr.pause_repair(item.fid) elif isinstance(item, SnsRepairResume): LOG.info('Requesting SNS repair resume') motr.resume_repair(item.fid) else: LOG.warning('Unsupported event type received: %s', item) except StopIteration: raise except Exception: # no op, swallow the exception LOG.exception('**ERROR**') except StopIteration: ffi.shun_motr_thread() finally: LOG.info('Handler thread has exited')
def _do_work(self, planner: WorkPlanner, motr: Motr): LOG.info('Handler thread has started') try: while True: try: LOG.debug('Waiting for the next message') item = planner.get_next_command() LOG.debug('Got %s message from planner', item) if isinstance(item, FirstEntrypointRequest): motr.send_entrypoint_request_reply( EntrypointRequest( reply_context=item.reply_context, req_id=item.req_id, remote_rpc_endpoint=item.remote_rpc_endpoint, process_fid=item.process_fid, git_rev=item.git_rev, pid=item.pid, is_first_request=item.is_first_request)) elif isinstance(item, EntrypointRequest): # While replying any Exception is catched. In such a # case, the motr process will receive EAGAIN and # hence will need to make new attempt by itself motr.send_entrypoint_request_reply(item) elif isinstance(item, ProcessEvent): self._update_process_status(planner, motr, item.evt) elif isinstance(item, HaNvecGetEvent): fn = motr.ha_nvec_get_reply # If a consul-related exception appears, it will # be processed by repeat_if_fails. # # This thread will become blocked until that # intermittent error gets resolved. decorated = (repeat_if_fails(wait_seconds=5))(fn) decorated(item) elif isinstance(item, HaNvecSetEvent): fn = motr.ha_nvec_set_process # If a consul-related exception appears, it will # be processed by repeat_if_fails. # # This thread will become blocked until that # intermittent error gets resolved. decorated = (repeat_if_fails(wait_seconds=5))(fn) decorated(item) elif isinstance(item, BroadcastHAStates): LOG.info('HA states: %s', item.states) ha_states = self.update_process_failure( planner, item.states) result: List[MessageId] = motr.broadcast_ha_states( ha_states) if item.reply_to: item.reply_to.put(result) elif isinstance(item, StobIoqError): LOG.info('Stob IOQ: %s', item.fid) payload = dump_json(item) LOG.debug('Stob IOQ JSON: %s', payload) offset = self.eq_publisher.publish('stob-ioq', payload) LOG.debug('Written to epoch: %s', offset) elif isinstance(item, SnsRepairStatus): LOG.info('Requesting SNS repair status') status = motr.get_repair_status(item.fid) LOG.info('SNS repair status is received: %s', status) item.reply_to.put(status) elif isinstance(item, SnsRebalanceStatus): LOG.info('Requesting SNS rebalance status') status = motr.get_rebalance_status(item.fid) LOG.info('SNS rebalance status is received: %s', status) item.reply_to.put(status) elif isinstance(item, SnsRebalanceStart): LOG.info('Requesting SNS rebalance start') motr.start_rebalance(item.fid) elif isinstance(item, SnsRebalanceStop): LOG.info('Requesting SNS rebalance stop') motr.stop_rebalance(item.fid) elif isinstance(item, SnsRebalancePause): LOG.info('Requesting SNS rebalance pause') motr.pause_rebalance(item.fid) elif isinstance(item, SnsRebalanceResume): LOG.info('Requesting SNS rebalance resume') motr.resume_rebalance(item.fid) elif isinstance(item, SnsRepairStart): LOG.info('Requesting SNS repair start') motr.start_repair(item.fid) elif isinstance(item, SnsRepairStop): LOG.info('Requesting SNS repair stop') motr.stop_repair(item.fid) elif isinstance(item, SnsRepairPause): LOG.info('Requesting SNS repair pause') motr.pause_repair(item.fid) elif isinstance(item, SnsRepairResume): LOG.info('Requesting SNS repair resume') motr.resume_repair(item.fid) elif isinstance(item, Die): raise StopIteration() else: LOG.warning('Unsupported event type received: %s', item) except StopIteration: raise except Exception: # no op, swallow the exception LOG.exception('**ERROR**') finally: planner.notify_finished(item) except StopIteration: LOG.info('Consumer Stopped') if self.idx == 0: motr.stop() finally: LOG.info('Handler thread has exited')