def _ha_broadcast(self, notes: List[HaNoteStruct], broadcast_hax_only: bool) -> List[MessageId]: message_ids: List[MessageId] = [] nr_notes_to_be_sent = len(notes) notes_sent = 0 LOG.debug('Broadcasting %d notes', nr_notes_to_be_sent) while notes: notes_to_send = notes[0:MAX_MOTR_NVEC_UPDATE_SZ] notes_to_send_len = len(notes_to_send) notes_sent += notes_to_send_len if broadcast_hax_only: hax_endpoint = self.consul_util.get_hax_endpoint() message_ids = self._ffi.ha_broadcast_hax_only( self._ha_ctx, make_array(HaNoteStruct, notes_to_send), notes_to_send_len, make_c_str(hax_endpoint)) else: message_ids = self._ffi.ha_broadcast( self._ha_ctx, make_array(HaNoteStruct, notes_to_send), notes_to_send_len) LOG.debug('Broadcast HA state complete, message_ids = %s', message_ids) notes = notes[MAX_MOTR_NVEC_UPDATE_SZ:] assert notes_sent == nr_notes_to_be_sent return message_ids
def broadcast_ha_states(self, ha_states: List[HAState], notify_devices=True) -> List[MessageId]: LOG.debug('Broadcasting HA states %s over ha_link', ha_states) def ha_obj_state(st): return HaNoteStruct.M0_NC_ONLINE if st.status == ServiceHealth.OK \ else HaNoteStruct.M0_NC_FAILED notes = [] for st in ha_states: if st.status in (ServiceHealth.UNKNOWN, ServiceHealth.OFFLINE): continue note = HaNoteStruct(st.fid.to_c(), ha_obj_state(st)) notes.append(note) if (st.fid.container == ObjT.PROCESS.value and st.status == ServiceHealth.STOPPED): notify_devices = False notes += self._generate_sub_services(note, self.consul_util, notify_devices) # For process failure, we report failure for the corresponding # node (enclosure) and CVGs. if (st.fid.container == ObjT.PROCESS.value and st.status in (ServiceHealth.FAILED, ServiceHealth.OK)): notes += self.notify_node_status(note) if st.fid.container == ObjT.DRIVE.value: self.consul_util.update_drive_state([st.fid], st.status) if not notes: return [] message_ids: List[MessageId] = self._ffi.ha_broadcast( self._ha_ctx, make_array(HaNoteStruct, notes), len(notes)) LOG.debug( 'Broadcast HA state complete with the following message_ids = %s', message_ids) return message_ids
def send_entrypoint_request_reply(self, message: EntrypointRequest): reply_context = message.reply_context req_id = message.req_id remote_rpc_endpoint = message.remote_rpc_endpoint process_fid = message.process_fid LOG.debug('Processing entrypoint request from remote endpoint' " '{}', process fid {}".format(remote_rpc_endpoint, str(process_fid))) sess = principal_rm = confds = None try: util = self.consul_util sess = util.get_leader_session_no_wait() principal_rm = util.get_session_node(sess) confds = util.get_confd_list() rm_fid = util.get_rm_fid() except Exception: LOG.exception('Failed to get the data from Consul.' ' Replying with EAGAIN error code.') self._ffi.entrypoint_reply(reply_context, req_id.to_c(), EAGAIN, 0, make_array(FidStruct, []), make_array(c.c_char_p, []), 0, Fid(0, 0).to_c(), None) LOG.debug('Reply sent') return rc_quorum = int(len(confds) / 2 + 1) rm_eps = None for svc in confds: if svc.node == principal_rm: rm_eps = svc.address break if not rm_eps: raise RuntimeError('No RM node found in Consul') confd_fids = [x.fid.to_c() for x in confds] confd_eps = [make_c_str(x.address) for x in confds] LOG.debug('Passing the entrypoint reply to hax.c layer') self._ffi.entrypoint_reply(reply_context, req_id.to_c(), 0, len(confds), make_array(FidStruct, confd_fids), make_array(c.c_char_p, confd_eps), rc_quorum, rm_fid.to_c(), make_c_str(rm_eps)) LOG.debug('Entrypoint request has been replied to')
def broadcast_ha_states(self, ha_states: List[HAState], notify_devices=True) -> List[MessageId]: LOG.debug('Broadcasting HA states %s over ha_link', ha_states) def ha_obj_state(st): return HaNoteStruct.M0_NC_ONLINE if st.status == ServiceHealth.OK \ else HaNoteStruct.M0_NC_FAILED notes = [] for st in ha_states: if st.status in (ServiceHealth.UNKNOWN, ServiceHealth.OFFLINE): continue note = HaNoteStruct(st.fid.to_c(), ha_obj_state(st)) notes.append(note) if st.fid.container == ObjT.PROCESS.value: self.consul_util.set_process_state(st.fid, st.status) notes += self._generate_sub_services(note, self.consul_util, notify_devices) # For process failure, we report failure for the corresponding # node (enclosure) and CVGs if all Io services are failed. if (st.fid.container == ObjT.PROCESS.value and st.status in (ServiceHealth.FAILED, ServiceHealth.OK)): # Check if we need to mark node as failed, # otherwise just mark controller as failed/OK # If we receive process failure then we will check if all IO # services are failed, if True then we will mark node as failed # If we receive process 'OK' then we will check if node is # not in failed state then we will mark node as OK # If both the above conditions are not true then we will just # mark controller status is_node_failed = self.is_node_failed(note) if (st.status == ServiceHealth.FAILED and is_node_failed): notes += self.notify_node_status_by_process(note) elif (st.status == ServiceHealth.OK and not is_node_failed): notes += self.notify_node_status_by_process(note) else: ctrl_note = self.get_ctrl_status(note) if ctrl_note is not None: notes.append(ctrl_note) if st.fid.container == ObjT.DRIVE.value: self.consul_util.update_drive_state([st.fid], st.status) elif st.fid.container == ObjT.NODE.value: self.consul_util.set_node_state(st.fid, st.status) notes += self.add_enclosing_devices_by_node(st.fid, st.status) if not notes: return [] message_ids: List[MessageId] = self._ffi.ha_broadcast( self._ha_ctx, make_array(HaNoteStruct, notes), len(notes)) LOG.debug( 'Broadcast HA state complete with the following message_ids = %s', message_ids) return message_ids
def ha_nvec_get_reply(self, event: HaNvecGetEvent, kv_cache=None) -> None: LOG.debug('Preparing the reply for HaNvecGetEvent (nvec size = %s)', len(event.nvec)) notes: List[HaNoteStruct] = [] for n in event.nvec: n.note.no_state = self.consul_util.get_conf_obj_status( ObjT[n.obj_t], n.note.no_id.f_key, kv_cache=kv_cache) notes.append(n.note) LOG.debug('Replying ha nvec of length ' + str(len(event.nvec))) self._ffi.ha_nvec_reply(event.hax_msg, make_array(HaNoteStruct, notes), len(notes))
def ha_nvec_get_reply(self, event: HaNvecGetEvent) -> None: LOG.debug('Preparing the reply for HaNvecGetEvent (nvec size = %s)', len(event.nvec)) notes: List[HaNoteStruct] = [] for n in event.nvec: n.note.no_state = HaNoteStruct.M0_NC_ONLINE if (n.obj_t in (ObjT.PROCESS.name, ObjT.SERVICE.name) and self.consul_util.get_conf_obj_status( ObjT[n.obj_t], n.note.no_id.f_key) != 'passing'): n.note.no_state = HaNoteStruct.M0_NC_FAILED notes.append(n.note) LOG.debug('Replying ha nvec of length ' + str(len(event.nvec))) self._ffi.ha_nvec_reply(event.hax_msg, make_array(HaNoteStruct, notes), len(notes))
def broadcast_ha_states(self, ha_states: List[HAState]) -> List[MessageId]: LOG.debug('Broadcasting HA states %s over ha_link', ha_states) def ha_obj_state(st): return HaNoteStruct.M0_NC_ONLINE if st.status == ServiceHealth.OK \ else HaNoteStruct.M0_NC_FAILED notes = [] for st in ha_states: note = HaNoteStruct(st.fid.to_c(), ha_obj_state(st)) notes.append(note) notes += self._generate_sub_services(note, self.consul_util) message_ids: List[MessageId] = self._ffi.ha_broadcast( self._ha_ctx, make_array(HaNoteStruct, notes), len(notes)) LOG.debug( 'Broadcast HA state complete with the following message_ids = %s', message_ids) return message_ids
def send_entrypoint_request_reply(self, message: EntrypointRequest): reply_context = message.reply_context req_id = message.req_id remote_rpc_endpoint = message.remote_rpc_endpoint process_fid = message.process_fid e_rc = EAGAIN LOG.debug('Processing entrypoint request from remote endpoint' " '{}', process fid {}".format(remote_rpc_endpoint, str(process_fid))) sess = principal_rm = confds = None try: util = self.consul_util # When stopping, there's a possibility that hax may receive # an entrypoint request from motr land. In order to unblock # motr land, reply with entrypoint request with no confds # and RM endpoints as the processes might have already # stopped. rc_quorum = 0 rm_fid = Fid(0, 0) if self.is_stopping: confds = [] else: sess = util.get_leader_session() principal_rm = util.get_session_node(sess) confds = util.get_confd_list() # Hax may receive entrypoint requests multiple times during its # lifetime. Hax starts motr rconfc to invoke spiel commands. Motr # rconfc establishes connection with principal RM, in case of # principal RM failure, rconfc invalidates its confc and again # requests entrypoint in a hope that there will be another confd # and principal RM elected so that rconfc can resume its # functionality. During shutdown, when each motr process stops, # including confds, hax broadcasts M0_NC_FAILED event for every # STOPPED or FAILED motr process. Motr rconfc on receiving the # failed events for confds, goes re-requests entrypoint information # and this goes on in a loop. In order to break this loop, the # the entrypoint reply must only report alive confds and rm # endpoints. While doing this we need to handle the bootstrapping # case, so we wait until bootstrapping is done that is all the # motr services are up, we check the confd status and exclude # corresponding confd from the entrypoint reply. # EOS-25726: It seems that the confds were reported as started # and they failed later. This could be due to a Motr issue # EOS-25695. # In such a case, when processes start out of order, a wrong # quorum value is reported that leads to further issues in Motr # process startup. Thus commenting this for now. Need to verify # if this affects hax shutdown. # active_confds = [] # if self.spiel_ready: # for confd in confds: # if not util.is_confd_failed(confd.fid): # active_confds.append(confd) # confds = active_confds if confds: rm_fid = util.get_rm_fid() rc_quorum = int(len(confds) / 2 + 1) rm_eps = None for svc in confds: if svc.node == principal_rm: rm_eps = svc.address break if confds and (not self.is_stopping) and (not rm_eps): if util.m0ds_stopping(): e_rc = 0 raise RuntimeError('No RM node found in Consul') except Exception: LOG.exception('Failed to get the data from Consul.' ' Replying with EAGAIN error code, with a 1' ' second delay.') # If replied EAGAIN, motr immediately sends a subsequent entrypoint # request and it is observed that several entrypoint requests are # received by hare in a second. This floods Hare, as an # intermediate solution, Hare dropped the requests in case of an # error preparing the same. But, motr does not send any subsequent # entrypoint requests as expected after a timeout. As per the # discussion, it is agreed upon to have a temporary fix in Hare. # https://jts.seagate.com/browse/EOS-27068 motr ticket is created # to track the same. sleep(1) self._ffi.entrypoint_reply(reply_context, req_id.to_c(), e_rc, 0, make_array(FidStruct, []), make_array(c.c_char_p, []), 0, Fid(0, 0).to_c(), None) LOG.debug('Reply sent') return confd_fids = [x.fid.to_c() for x in confds] confd_eps = [make_c_str(x.address) for x in confds] LOG.debug('Passing the entrypoint reply to hax.c layer') self._ffi.entrypoint_reply(reply_context, req_id.to_c(), 0, len(confds), make_array(FidStruct, confd_fids), make_array(c.c_char_p, confd_eps), rc_quorum, rm_fid.to_c(), make_c_str(rm_eps)) LOG.debug('Entrypoint request has been replied to')
def send_entrypoint_request_reply(self, message: EntrypointRequest): reply_context = message.reply_context req_id = message.req_id remote_rpc_endpoint = message.remote_rpc_endpoint process_fid = message.process_fid e_rc = EAGAIN LOG.debug('Processing entrypoint request from remote endpoint' " '{}', process fid {}".format(remote_rpc_endpoint, str(process_fid))) sess = principal_rm = confds = None try: util = self.consul_util # When stopping, there's a possibility that hax may receive # an entrypoint request from motr land. In order to unblock # motr land, reply with entrypoint request with no confds # and RM endpoints as the processes might have already # stopped. rc_quorum = 0 rm_fid = Fid(0, 0) if self.is_stopping: confds = [] else: sess = util.get_leader_session_no_wait() principal_rm = util.get_session_node(sess) confds = util.get_confd_list() # Hax may receive entrypoint requests multiple times during its # lifetime. Hax starts motr rconfc to invoke spiel commands. Motr # rconfc establishes connection with principal RM, in case of # principal RM failure, rconfc invalidates its confc and again # requests entrypoint in a hope that there will be another confd # and principal RM elected so that rconfc can resume its # functionality. During shutdown, when each motr process stops, # including confds, hax broadcasts M0_NC_FAILED event for every # STOPPED or FAILED motr process. Motr rconfc on receiving the # failed events for confds, goes re-requests entrypoint information # and this goes on in a loop. In order to break this loop, the # the entrypoint reply must only report alive confds and rm # endpoints. While doing this we need to handle the bootstrapping # case, so we wait until bootstrapping is done that is all the # motr services are up, we check the confd status and exclude # corresponding confd from the entrypoint reply. active_confds = [] if self.spiel_ready: for confd in confds: if not util.is_confd_failed(confd.fid): active_confds.append(confd) confds = active_confds if confds: rm_fid = util.get_rm_fid() rc_quorum = int(len(confds) / 2 + 1) rm_eps = None for svc in confds: if svc.node == principal_rm: rm_eps = svc.address break if confds and (not self.is_stopping) and (not rm_eps): if util.m0ds_stopping(): e_rc = 0 raise RuntimeError('No RM node found in Consul') except Exception: LOG.exception('Failed to get the data from Consul.' ' Replying with EAGAIN error code.') self._ffi.entrypoint_reply(reply_context, req_id.to_c(), e_rc, 0, make_array(FidStruct, []), make_array(c.c_char_p, []), 0, Fid(0, 0).to_c(), None) LOG.debug('Reply sent') return confd_fids = [x.fid.to_c() for x in confds] confd_eps = [make_c_str(x.address) for x in confds] LOG.debug('Passing the entrypoint reply to hax.c layer') self._ffi.entrypoint_reply(reply_context, req_id.to_c(), 0, len(confds), make_array(FidStruct, confd_fids), make_array(c.c_char_p, confd_eps), rc_quorum, rm_fid.to_c(), make_c_str(rm_eps)) LOG.debug('Entrypoint request has been replied to')