Beispiel #1
0
 def scan_sg_devices(self, req):
     #1. find fc devices
     #2. distinct by device wwid and storage wwn
     rsp = FcSanScanRsp()
     bash.bash_roe("timeout 120 /usr/bin/rescan-scsi-bus.sh -r")
     rsp.fiberChannelLunStructs = self.get_fc_luns()
     linux.set_fail_if_no_path()
     return jsonobject.dumps(rsp)
Beispiel #2
0
    def enable_multipath(self, req):
        rsp = AgentRsp()
        lvm.enable_multipath()

        r = bash.bash_r("grep '^[[:space:]]*alias' /etc/multipath.conf")
        if r == 0:
            bash.bash_roe(
                "sed -i 's/^[[:space:]]*alias/#alias/g' /etc/multipath.conf")
            bash.bash_roe("systemctl reload multipathd")

        linux.set_fail_if_no_path()
        return jsonobject.dumps(rsp)
    def check_disks(self, req):
        cmd = jsonobject.loads(req[http.REQUEST_BODY])
        rsp = AgentRsp()
        if cmd.failIfNoPath:
            linux.set_fail_if_no_path()
        for diskUuid in cmd.sharedBlockUuids:
            disk = CheckDisk(diskUuid)
            path = disk.get_path()
            if cmd.rescan:
                disk.rescan(path.split("/")[-1])

        if cmd.vgUuid is not None and lvm.vg_exists(cmd.vgUuid):
            rsp.totalCapacity, rsp.availableCapacity = lvm.get_vg_size(
                cmd.vgUuid, False)

        return jsonobject.dumps(rsp)
Beispiel #4
0
    def enable_multipath(self, req):
        rsp = AgentRsp()
        lvm.enable_multipath()

        bash.bash_roe(
            "sed -i 's/^[[:space:]]*alias/#alias/g' /etc/multipath.conf")
        current_t = time.time()
        bash.bash_roe(
            "mv /etc/multipath/bindings /etc/multipath/bindings.%s " %
            current_t +
            "&& md5sum /etc/multipath/bindings.*  | awk 'p[$1]++ { printf \"rm %s\\n\",$2;}' | bash"
        )
        bash.bash_roe(
            "mv /etc/multipath/wwids /etc/multipath/wwids.%s " % current_t +
            "&& md5sum /etc/multipath/wwids.*  | awk 'p[$1]++ { printf \"rm %s\\n\",$2;}' | bash"
        )
        bash.bash_roe("multipath -F; systemctl restart multipathd.service")
        linux.set_fail_if_no_path()
        return jsonobject.dumps(rsp)
Beispiel #5
0
    def iscsi_login(self, req):
        cmd = jsonobject.loads(req[http.REQUEST_BODY])
        rsp = IscsiLoginRsp()

        @linux.retry(times=5, sleep_time=1)
        def discovery_iscsi(iscsiServerIp, iscsiServerPort):
            r, o, e = bash.bash_roe(
                "timeout 10 iscsiadm -m discovery --type sendtargets --portal %s:%s"
                % (iscsiServerIp, iscsiServerPort))
            if r != 0:
                raise RetryException(
                    "can not discovery iscsi portal %s:%s, cause %s" %
                    (iscsiServerIp, iscsiServerPort, e))

            iqns = []
            for i in o.splitlines():
                if i.startswith("%s:%s," % (iscsiServerIp, iscsiServerPort)):
                    iqns.append(i.strip().split(" ")[-1])
            return iqns

        @linux.retry(times=20, sleep_time=1)
        def wait_iscsi_mknode(iscsiServerIp,
                              iscsiServerPort,
                              iscsiIqn,
                              e=None):
            disks_by_dev = bash.bash_o(
                "ls /dev/disk/by-path | grep %s:%s | grep %s" %
                (iscsiServerIp, iscsiServerPort,
                 iscsiIqn)).strip().splitlines()
            sid = bash.bash_o(
                "iscsiadm -m session | grep %s:%s | grep %s | awk '{print $2}'"
                % (iscsiServerIp, iscsiServerPort, iscsiIqn)).strip("[]\n ")
            if sid == "" or sid is None:
                err = "sid not found, this may because chap authentication failed"
                if e != None and e != "":
                    err += " ,error: %s" % e
                raise RetryException(e)
            bash.bash_o("iscsiadm -m session -r %s --rescan" % sid)
            #Get the host_Number of iqn, Will match the HTCL attribute of iscsi according to Host_number
            host_Number = bash.bash_o(
                "iscsiadm -m session -P 3 --sid=%s | grep 'Host Number:' | awk '{print $3}'"
                % sid).strip()
            #Use HCTL, IQN, "-" to match the number of unmounted Luns according to lsscsi --transport
            disks_by_no_mapping_lun = bash.bash_o(
                "lsscsi --transport | grep -w %s | awk '{print $1,$NF}' | grep -E '\<%s\>:[[:digit:]]*:[[:digit:]]*:[[:digit:]]*' | awk '{print $NF}' | grep -x '-'"
                % (iscsiIqn, host_Number)).strip().splitlines()
            disks_by_iscsi = bash.bash_o(
                "iscsiadm -m session -P 3 --sid=%s | grep Lun" %
                sid).strip().splitlines()
            if len(disks_by_dev) < (len(disks_by_iscsi) -
                                    len(disks_by_no_mapping_lun)):
                raise RetryException(
                    "iscsiadm says there are [%s] disks but only found [%s] disks on /dev/disk[%s], so not all disks loged in, and you can check the iscsi mounted disk by lsscsi --transport"
                    "it may recover after a while so check and login again" %
                    ((len(disks_by_iscsi) - len(disks_by_no_mapping_lun)),
                     len(disks_by_dev), disks_by_dev))

        def check_iscsi_conf():
            shell.call(
                "sed -i 's/.*iscsid.startup.*=.*/iscsid.startup = \/bin\/systemctl start iscsid.socket iscsiuio.soccket/' /etc/iscsi/iscsid.conf",
                exception=False)

        check_iscsi_conf()
        path = "/var/lib/iscsi/nodes"
        self.clean_iscsi_cache_configuration(path, cmd.iscsiServerIp,
                                             cmd.iscsiServerPort)
        iqns = cmd.iscsiTargets
        if iqns is None or len(iqns) == 0:
            try:
                iqns = discovery_iscsi(cmd.iscsiServerIp, cmd.iscsiServerPort)
            except Exception as e:
                current_hostname = shell.call('hostname')
                current_hostname = current_hostname.strip(' \t\n\r')
                rsp.error = "login iscsi server %s:%s on host %s failed, because %s" % \
                            (cmd.iscsiServerIp, cmd.iscsiServerPort, current_hostname, e.message)
                rsp.success = False
                return jsonobject.dumps(rsp)

        if iqns is None or len(iqns) == 0:
            rsp.iscsiTargetStructList = []
            return jsonobject.dumps(rsp)

        for iqn in iqns:
            t = IscsiTargetStruct()
            t.iqn = iqn
            try:
                if cmd.iscsiChapUserName and cmd.iscsiChapUserPassword:
                    bash.bash_o(
                        'iscsiadm --mode node --targetname "%s" -p %s:%s --op=update --name node.session.auth.authmethod --value=CHAP'
                        % (iqn, cmd.iscsiServerIp, cmd.iscsiServerPort))
                    bash.bash_o(
                        'iscsiadm --mode node --targetname "%s" -p %s:%s --op=update --name node.session.auth.username --value=%s'
                        % (iqn, cmd.iscsiServerIp, cmd.iscsiServerPort,
                           cmd.iscsiChapUserName))
                    bash.bash_o(
                        'iscsiadm --mode node --targetname "%s" -p %s:%s --op=update --name node.session.auth.password --value=%s'
                        % (iqn, cmd.iscsiServerIp, cmd.iscsiServerPort,
                           linux.shellquote(cmd.iscsiChapUserPassword)))
                r, o, e = bash.bash_roe(
                    'iscsiadm --mode node --targetname "%s" -p %s:%s --login' %
                    (iqn, cmd.iscsiServerIp, cmd.iscsiServerPort))
                wait_iscsi_mknode(cmd.iscsiServerIp, cmd.iscsiServerPort, iqn,
                                  e)
            finally:
                if bash.bash_r(
                        "ls /dev/disk/by-path | grep %s:%s | grep %s" %
                    (cmd.iscsiServerIp, cmd.iscsiServerPort, iqn)) != 0:
                    rsp.iscsiTargetStructList.append(t)
                else:
                    disks = bash.bash_o(
                        "ls /dev/disk/by-path | grep %s:%s | grep %s" %
                        (cmd.iscsiServerIp, cmd.iscsiServerPort,
                         iqn)).strip().splitlines()
                    for d in disks:
                        lun_struct = self.get_disk_info_by_path(d.strip())
                        if lun_struct is not None:
                            t.iscsiLunStructList.append(lun_struct)
                    rsp.iscsiTargetStructList.append(t)

        linux.set_fail_if_no_path()
        return jsonobject.dumps(rsp)
Beispiel #6
0
        def heartbeat_on_sharedblock():
            failure = 0

            while self.run_fencer(cmd.vgUuid, created_time):
                try:
                    time.sleep(cmd.interval)
                    global last_multipath_run
                    if cmd.fail_if_no_path and time.time(
                    ) - last_multipath_run > 4:
                        last_multipath_run = time.time()
                        linux.set_fail_if_no_path()

                    health = lvm.check_vg_status(cmd.vgUuid,
                                                 cmd.storageCheckerTimeout,
                                                 check_pv=False)
                    logger.debug(
                        "sharedblock group primary storage %s fencer run result: %s"
                        % (cmd.vgUuid, health))
                    if health[0] is True:
                        failure = 0
                        continue

                    failure += 1
                    if failure < cmd.maxAttempts:
                        continue

                    try:
                        logger.warn("shared block storage %s fencer fired!" %
                                    cmd.vgUuid)
                        self.report_storage_status([cmd.vgUuid],
                                                   'Disconnected', health[1])

                        # we will check one qcow2 per pv to determine volumes on pv should be kill
                        invalid_pv_uuids = lvm.get_invalid_pv_uuids(
                            cmd.vgUuid, cmd.checkIo)
                        vms = lvm.get_running_vm_root_volume_on_pv(
                            cmd.vgUuid, invalid_pv_uuids, True)
                        killed_vm_uuids = []
                        for vm in vms:
                            kill = shell.ShellCmd('kill -9 %s' % vm.pid)
                            kill(False)
                            if kill.return_code == 0:
                                logger.warn(
                                    'kill the vm[uuid:%s, pid:%s] because we lost connection to the storage.'
                                    'failed to run health check %s times' %
                                    (vm.uuid, vm.pid, cmd.maxAttempts))
                                killed_vm_uuids.append(vm.uuid)
                            else:
                                logger.warn(
                                    'failed to kill the vm[uuid:%s, pid:%s] %s'
                                    % (vm.uuid, vm.pid, kill.stderr))

                            for volume in vm.volumes:
                                used_process = linux.linux_lsof(volume)
                                if len(used_process) == 0:
                                    try:
                                        lvm.deactive_lv(volume, False)
                                    except Exception as e:
                                        logger.debug(
                                            "deactivate volume %s for vm %s failed, %s"
                                            % (volume, vm.uuid, e.message))
                                        content = traceback.format_exc()
                                        logger.warn("traceback: %s" % content)
                                else:
                                    logger.debug(
                                        "volume %s still used: %s, skip to deactivate"
                                        % (volume, used_process))

                        if len(killed_vm_uuids) != 0:
                            self.report_self_fencer_triggered(
                                [cmd.vgUuid], ','.join(killed_vm_uuids))
                        lvm.remove_partial_lv_dm(cmd.vgUuid)

                        if lvm.check_vg_status(cmd.vgUuid,
                                               cmd.storageCheckerTimeout,
                                               True)[0] is False:
                            lvm.drop_vg_lock(cmd.vgUuid)
                            lvm.remove_device_map_for_vg(cmd.vgUuid)

                        # reset the failure count
                        failure = 0
                    except Exception as e:
                        logger.warn("kill vm failed, %s" % e.message)
                        content = traceback.format_exc()
                        logger.warn("traceback: %s" % content)

                except Exception as e:
                    logger.debug(
                        'self-fencer on sharedblock primary storage %s stopped abnormally, try again soon...'
                        % cmd.vgUuid)
                    content = traceback.format_exc()
                    logger.warn(content)

            if not self.run_fencer(cmd.vgUuid, created_time):
                logger.debug(
                    'stop self-fencer on sharedblock primary storage %s for judger failed'
                    % cmd.vgUuid)
            else:
                logger.warn(
                    'stop self-fencer on sharedblock primary storage %s' %
                    cmd.vgUuid)