Example #1
0
    def _generate_sub_disks(self,
                            note: HaNoteStruct,
                            services: List[FidWithType],
                            cns: ConsulUtil,
                            kv_cache=None) -> List[HaNoteStruct]:
        disk_list = []
        new_state = note.no_state
        proc_fid = Fid.from_struct(note.no_id)

        state = (ObjHealth.OK if new_state == HaNoteStruct.M0_NC_ONLINE else
                 ObjHealth.OFFLINE)
        is_mkfs = self._is_mkfs(proc_fid)

        mkfs_down = is_mkfs and state != ObjHealth.OK

        if not mkfs_down:
            for svc in services:
                disk_list += cns.get_disks_by_parent_process(proc_fid, svc.fid)
        if disk_list:
            # XXX: Need to check the current state of the device, transition
            # to ONLINE only in case of an explicit request or iff the prior
            # state of the device is UNKNOWN/OFFLINE.
            if not mkfs_down:
                # We don't mark the devices as failed if the process is MKFS
                # and if its effective status is STOPPED (see EOS-24124).
                cns.update_drive_state(disk_list, state, device_event=False)
        LOG.debug('proc fid=%s encloses %d disks as follows: %s', proc_fid,
                  len(disk_list), disk_list)
        drive_ha_notes: List[HaNoteStruct] = []
        for drive_id in disk_list:
            # Get the drive state from Consul KV.
            dstate = cns.get_sdev_state(ObjT.DRIVE, drive_id.key)
            drive_ha_notes.append(
                HaNoteStruct(no_id=drive_id.to_c(), no_state=dstate))
        return drive_ha_notes
Example #2
0
    def is_node_failed(self, proc_note: HaNoteStruct, kv_cache=None):
        proc_fid = Fid.from_struct(proc_note.no_id)
        assert ObjT.PROCESS.value == proc_fid.container

        node = self.consul_util.get_process_node(proc_fid, kv_cache=kv_cache)

        return self.consul_util.all_io_services_failed(node, kv_cache=kv_cache)
Example #3
0
    def ha_nvec_set_process(self, event: HaNvecSetEvent) -> None:
        LOG.debug('Processing HaNvecSetEvent (nvec size = %s)',
                  len(event.nvec))
        self.consul_util.get_all_nodes()
        ha_states: List[HAState] = []
        bcast_ss: List[HAState] = []
        for n in event.nvec:
            fid = Fid.from_struct(n.note.no_id)
            obj_health = ObjHealth.from_ha_note_state(n.note.no_state)
            ha_states.append(HAState(fid, obj_health))
            if n.note.no_state in {
                    HaNoteStruct.M0_NC_REPAIRED, HaNoteStruct.M0_NC_ONLINE
            }:
                bcast_ss.append(HAState(fid, obj_health))

            # In case of failed repair, roll back to failed state.
            elif n.note.no_state == HaNoteStruct.M0_NC_REPAIR:
                obj_health = ObjHealth.from_ha_note_state(
                    HaNoteStruct.M0_NC_FAILED)
                bcast_ss.append(HAState(fid, obj_health))

            # In case of failed rebalance, roll back to repaired state.
            elif n.note.no_state == HaNoteStruct.M0_NC_REBALANCE:
                obj_health = ObjHealth.from_ha_note_state(
                    HaNoteStruct.M0_NC_REPAIRED)
                bcast_ss.append(HAState(fid, obj_health))

        LOG.debug('got ha_states %s', ha_states)
        if bcast_ss:
            self.broadcast_ha_states(bcast_ss)
Example #4
0
    def notify_node_status_by_process(
            self, proc_note: HaNoteStruct) -> List[HaNoteStruct]:
        # proc_note.no_state is of int type
        new_state = ServiceHealth.from_ha_note_state(proc_note.no_state)
        proc_fid = Fid.from_struct(proc_note.no_id)
        assert ObjT.PROCESS.value == proc_fid.container
        LOG.debug('Notifying node status for process_fid=%s state=%s',
                  proc_fid, new_state)

        node = self.consul_util.get_process_node(proc_fid)

        if new_state == ServiceHealth.OK:
            # Node can have multiple controllers. Node can be online, with
            # a single controller running online.
            # If we receive process 'OK', only the process state is
            # updated. So, we need to update the corresponding
            # controller state.
            ctrl_fid = self.consul_util.get_ioservice_ctrl_fid(proc_fid)
            if ctrl_fid:
                self.consul_util.set_ctrl_state(ctrl_fid, new_state)

        node_fid = self.consul_util.get_node_fid(node)
        notes = self.add_node_state_by_fid(node_fid, new_state)
        notes += self.add_enclosing_devices_by_node(node_fid,
                                                    new_state,
                                                    node=node)
        return notes
Example #5
0
 def _generate_sub_services(self, note: HaNoteStruct,
                            cns: ConsulUtil) -> List[HaNoteStruct]:
     new_state = note.no_state
     fid = Fid.from_struct(note.no_id)
     service_list = cns.get_services_by_parent_process(fid)
     LOG.debug('Process fid=%s encloses %s services as follows: %s', fid,
               len(service_list), service_list)
     return [
         HaNoteStruct(no_id=x.fid.to_c(), no_state=new_state)
         for x in service_list
     ]
Example #6
0
 def _generate_sub_disks(self, note: HaNoteStruct, services: List,
                         cns: ConsulUtil) -> List[HaNoteStruct]:
     disk_list = []
     new_state = note.no_state
     proc_fid = Fid.from_struct(note.no_id)
     for svc in services:
         disk_list += cns.get_disks_by_parent_process(proc_fid, svc.fid)
     LOG.debug('proc fid=%s encloses %d disks as follows: %s', proc_fid,
               len(disk_list), disk_list)
     return [
         HaNoteStruct(no_id=x.to_c(), no_state=new_state) for x in disk_list
     ]
Example #7
0
    def ha_nvec_get_reply(self, event: HaNvecGetEvent, kv_cache=None) -> None:
        LOG.debug('Preparing the reply for HaNvecGetEvent (nvec size = %s)',
                  len(event.nvec))
        self.consul_util.get_all_nodes()
        notes: List[HaNoteStruct] = []
        for n in event.nvec:
            fid = Fid.from_struct(n.note.no_id)
            n.note.no_state = self.consul_util.get_conf_obj_status(
                FidTypeToObjT[fid.container], fid.key, kv_cache=kv_cache)
            notes.append(n.note)

        LOG.debug('Replying ha nvec of length ' + str(len(event.nvec)))
        self._ffi.ha_nvec_reply(event.hax_msg, make_array(HaNoteStruct, notes),
                                len(notes))
Example #8
0
    def get_ctrl_status(self,
                        proc_note: HaNoteStruct) -> Optional[HaNoteStruct]:
        new_state = proc_note.no_state
        proc_fid = Fid.from_struct(proc_note.no_id)
        assert ObjT.PROCESS.value == proc_fid.container
        LOG.debug('Notifying ctrl status for process_fid=%s state=%s',
                  proc_fid, new_state)

        ctrl_fid = self.consul_util.get_ioservice_ctrl_fid(proc_fid)

        if ctrl_fid:
            # Update controller state in consul kv.
            self.consul_util.set_ctrl_state(
                ctrl_fid, ServiceHealth.from_ha_note_state(new_state))
            return HaNoteStruct(no_id=ctrl_fid.to_c(), no_state=new_state)
        return None
Example #9
0
 def _generate_sub_services(self,
                            note: HaNoteStruct,
                            cns: ConsulUtil,
                            notify_devices=True,
                            kv_cache=None) -> List[HaNoteStruct]:
     new_state = note.no_state
     fid = Fid.from_struct(note.no_id)
     service_list = cns.get_services_by_parent_process(fid,
                                                       kv_cache=kv_cache)
     LOG.debug('Process fid=%s encloses %s services as follows: %s', fid,
               len(service_list), service_list)
     service_notes = [
         HaNoteStruct(no_id=x.fid.to_c(), no_state=new_state)
         for x in service_list
     ]
     if notify_devices:
         service_notes += self._generate_sub_disks(note, service_list, cns)
     return service_notes
Example #10
0
    def get_ctrl_status(
            self,
            proc_note: HaNoteStruct,
            kv_cache=None) -> Optional[Tuple[HaNoteStruct, List[PutKV]]]:
        new_state = proc_note.no_state
        proc_fid = Fid.from_struct(proc_note.no_id)
        assert ObjT.PROCESS.value == proc_fid.container
        LOG.debug('Notifying ctrl status for process_fid=%s state=%s',
                  proc_fid, new_state)

        ctrl_fid = self.consul_util.get_ioservice_ctrl_fid(proc_fid,
                                                           kv_cache=kv_cache)

        if ctrl_fid:
            # Update controller state in consul kv.
            updates = self.consul_util.get_ctrl_state_updates(
                ctrl_fid,
                ObjHealth.from_ha_note_state(new_state),
                kv_cache=kv_cache)
            return (HaNoteStruct(no_id=ctrl_fid.to_c(),
                                 no_state=new_state), updates)
        return None
 def _generate_sub_disks(self, note: HaNoteStruct, services: List,
                         cns: ConsulUtil) -> List[HaNoteStruct]:
     disk_list = []
     new_state = note.no_state
     proc_fid = Fid.from_struct(note.no_id)
     for svc in services:
         disk_list += cns.get_disks_by_parent_process(proc_fid, svc.fid)
     if disk_list:
         state = (ServiceHealth.OK if new_state == HaNoteStruct.M0_NC_ONLINE
                  else ServiceHealth.OFFLINE)
         # XXX: Need to check the current state of the device, transition
         # to ONLINE only in case of an explicit request or iff the prior
         # state of the device is UNKNOWN/OFFLINE.
         cns.update_drive_state(disk_list, state, device_event=False)
     LOG.debug('proc fid=%s encloses %d disks as follows: %s', proc_fid,
               len(disk_list), disk_list)
     drive_ha_notes: List[HaNoteStruct] = []
     for drive_id in disk_list:
         # Get the drive state from Consul KV.
         dstate = cns.get_sdev_state(ObjT.DRIVE, drive_id.key)
         drive_ha_notes.append(
             HaNoteStruct(no_id=drive_id.to_c(), no_state=dstate))
     return drive_ha_notes
Example #12
0
    def notify_node_status_by_process(self,
                                      proc_note: HaNoteStruct,
                                      kv_cache=None) -> List[HaNoteStruct]:
        # proc_note.no_state is of int type
        new_state = ObjHealth.from_ha_note_state(proc_note.no_state)
        proc_fid = Fid.from_struct(proc_note.no_id)
        assert ObjT.PROCESS.value == proc_fid.container
        LOG.debug('Notifying node status for process_fid=%s state=%s',
                  proc_fid, new_state)

        node = self.consul_util.get_process_node(proc_fid, kv_cache=kv_cache)

        updates: List[PutKV] = []
        if new_state == ObjHealth.OK:
            # Node can have multiple controllers. Node can be online, with
            # a single controller running online.
            # If we receive process 'OK', only the process state is
            # updated. So, we need to update the corresponding
            # controller state.
            ctrl_fid = self.consul_util.get_ioservice_ctrl_fid(
                proc_fid, kv_cache=kv_cache)
            if ctrl_fid:
                updates = self.consul_util.get_ctrl_state_updates(
                    ctrl_fid, new_state, kv_cache=kv_cache)

        node_fid = self.consul_util.get_node_fid(node, kv_cache=kv_cache)
        # FIXME make these two functions to return List[PutKV] so that the
        # write operations can be delayed to reuse the cache as long as
        # possible
        notes = self.add_node_state_by_fid(node_fid, new_state)
        notes += self.add_enclosing_devices_by_node(node_fid,
                                                    new_state,
                                                    node=node,
                                                    kv_cache=kv_cache)
        self._write_updates(updates, kv_cache)
        return notes
    def notify_node_status(self,
                           proc_note: HaNoteStruct) -> List[HaNoteStruct]:
        new_state = proc_note.no_state
        proc_fid = Fid.from_struct(proc_note.no_id)
        assert ObjT.PROCESS.value == proc_fid.container
        LOG.debug('Notifying node status for process_fid=%s state=%s',
                  proc_fid, new_state)

        node = self.consul_util.get_process_node(proc_fid)

        node_fid = self.consul_util.get_node_fid(node)
        encl_fid = self.consul_util.get_node_encl_fid(node)
        ctrl_fid = self.consul_util.get_node_ctrl_fid(node)
        LOG.debug('node_fid: %s encl_fid: %s ctrl_fid: %s with state: %s',
                  node_fid, encl_fid, ctrl_fid, new_state)

        notes = []
        if node_fid and encl_fid and ctrl_fid:
            notes = [
                HaNoteStruct(no_id=x.to_c(), no_state=new_state)
                for x in [node_fid, encl_fid, ctrl_fid]
            ]

        return notes
Example #14
0
 def is_node_failed(self, proc_note: HaNoteStruct, kv_cache=None):
     proc_fid = Fid.from_struct(proc_note.no_id)