def scan_sg_devices(self, req): #1. find fc devices #2. distinct by device wwid and storage wwn rsp = FcSanScanRsp() bash.bash_roe("timeout 120 /usr/bin/rescan-scsi-bus.sh") rsp.fiberChannelLunStructs = self.get_fc_luns() return jsonobject.dumps(rsp)
def clear_stalled_qmp_socket(): def get_used_qmp_file(): t = bash.bash_o("ps aux | grep -Eo -- '-qmp unix:%s/\w*\.sock'" % QMP_SOCKET_PATH).splitlines() qmp = [] for i in t: qmp.append(i.split("/")[-1]) return qmp exists_qmp_files = set( bash.bash_o("ls %s" % QMP_SOCKET_PATH).splitlines()) if len(exists_qmp_files) == 0: return running_qmp_files = set(get_used_qmp_file()) if len(running_qmp_files) == 0: bash.bash_roe("/bin/rm %s/*" % QMP_SOCKET_PATH) return need_delete_qmp_files = exists_qmp_files.difference(running_qmp_files) if len(need_delete_qmp_files) == 0: return for f in need_delete_qmp_files: bash.bash_roe("/bin/rm %s/%s" % (QMP_SOCKET_PATH, f))
def remove_device_map_for_vg(vgUuid): o = bash.bash_o("dmsetup ls | grep %s | awk '{print $1}'" % vgUuid).strip().splitlines() if len(o) == 0: return for dm in o: bash.bash_roe("dmsetup remove %s" % dm.strip())
def check_gl_lock(raise_exception=False): r = bash.bash_r("lvmlockctl -i | grep 'LK GL'") if r == 0: return logger.debug("can not find any gl lock") r, o = bash.bash_ro("lvmlockctl -i | grep 'lock_type=sanlock' | awk '{print $2}'") if len(o.strip().splitlines()) != 0: for i in o.strip().splitlines(): if i == "": continue r, o, e = bash.bash_roe("lvmlockctl --gl-enable %s" % i) if r != 0: raise Exception("failed to enable gl lock on vg: %s, %s, %s" % (i, o, e)) r, o = bash.bash_ro("vgs --nolocking --noheadings -Svg_lock_type=sanlock -oname") result = [] for i in o.strip().split("\n"): if i != "": result.append(i) if len(result) == 0: if raise_exception is True: raise Exception("can not find any sanlock shared vg") else: return r, o, e = bash.bash_roe("lvmlockctl --gl-enable %s" % result[0]) if r != 0: raise Exception("failed to enable gl lock on vg: %s" % result[0])
def scan_sg_devices(self, req): #1. find fc devices #2. distinct by device wwid and storage wwn rsp = FcSanScanRsp() bash.bash_roe("/usr/bin/rescan-scsi-bus.sh") rsp.fiberChannelLunStructs = self.get_fc_luns() return jsonobject.dumps(rsp)
def remove_partial_lv_dm(vgUuid): o = bash.bash_o("lvs --noheading --nolocking %s -opath,tags -Slv_health_status=partial | grep %s" % (vgUuid, COMMON_TAG)).strip().splitlines() if len(o) == 0: return for volume in o: bash.bash_roe("dmsetup remove %s" % volume.strip().split(" ")[0])
def check_gl_lock(raise_exception=False): r = bash.bash_r("lvmlockctl -i | grep 'LK GL'") if r == 0: return logger.debug("can not find any gl lock") r, o = bash.bash_ro( "lvmlockctl -i | grep 'lock_type=sanlock' | awk '{print $2}'") if len(o.strip().splitlines()) != 0: for i in o.strip().splitlines(): if i == "": continue r, o, e = bash.bash_roe("lvmlockctl --gl-enable %s" % i) if r != 0: raise Exception("failed to enable gl lock on vg: %s, %s, %s" % (i, o, e)) r, o = bash.bash_ro( "vgs --nolocking --noheadings -Svg_lock_type=sanlock -oname") result = [] for i in o.strip().split("\n"): if i != "": result.append(i) if len(result) == 0: if raise_exception is True: raise Exception("can not find any sanlock shared vg") else: return r, o, e = bash.bash_roe("lvmlockctl --gl-enable %s" % result[0]) if r != 0: raise Exception("failed to enable gl lock on vg: %s" % result[0])
def check_stuck_vglk(): @linux.retry(3, 0.1) def is_stuck_vglk(): r, o, e = bash.bash_roe("sanlock client status | grep ':VGLK:'") if r != 0: return else: raise RetryException("found sanlock vglk lock stuck") try: is_stuck_vglk() except Exception as e: r, o, e = bash.bash_roe("sanlock client status | grep ':VGLK:'") if r != 0: return if len(o.strip().splitlines()) == 0: return for stucked in o.strip().splitlines(): # type: str if "ADD" in stucked or "REM" in stucked: continue cmd = "sanlock client release -%s" % stucked.replace(" p ", " -p ") r, o, e = bash.bash_roe(cmd) logger.warn( "find stuck vglk and already released, detail: [return_code: %s, stdout: %s, stderr: %s]" % (r, o, e))
def remove_partial_lv_dm(vgUuid): o = bash.bash_o( "lvs --noheading --nolocking --readonly %s -opath,tags -Slv_health_status=partial | grep %s" % (vgUuid, COMMON_TAG)).strip().splitlines() if len(o) == 0: return for volume in o: bash.bash_roe("dmsetup remove %s" % volume.strip().split(" ")[0])
def start_lock(vgUuid): r, o, e = bash.bash_roe("vgchange --lock-start %s" % vgUuid) if r != 0: if ("Device or resource busy" in o+e) : bash.bash_roe("dmsetup remove %s-lvmlock" % vgUuid) raise Exception("vgchange --lock-start failed: return code: %s, stdout: %s, stderr: %s" % (r, o, e)) vg_lock_exists(vgUuid)
def scan_sg_devices(self, req): #1. find fc devices #2. distinct by device wwid and storage wwn cmd = jsonobject.loads(req[http.REQUEST_BODY]) rsp = FcSanScanRsp() bash.bash_roe("timeout 120 /usr/bin/rescan-scsi-bus.sh -a") rsp.fiberChannelLunStructs = self.get_fc_luns(cmd.rescan) linux.set_fail_if_no_path() return jsonobject.dumps(rsp)
def backup_super_block(disk_path): wwid = get_wwid(disk_path) if wwid is None or wwid == "": logger.warn("can not get wwid of disk %s" % disk_path) current_time = time.time() disk_back_file = os.path.join(LVM_CONFIG_BACKUP_PATH, "%s.%s.%s" % (wwid, SUPER_BLOCK_BACKUP, current_time)) bash.bash_roe("dd if=%s of=%s bs=64KB count=1 conv=notrunc" % (disk_path, disk_back_file)) return disk_back_file
def configure_ssh_key(): bash.bash_roe("/bin/rm %s*" % mini_fencer.MINI_FENCER_KEY) bash.bash_roe("ssh-keygen -P \"\" -f %s" % mini_fencer.MINI_FENCER_KEY) r, o, e = bash.bash_roe( "sshpass -p '%s' ssh-copy-id -i %s %s@%s" % (peer_password, mini_fencer.MINI_FENCER_KEY, peer_username, peer_addr)) if r == 0: return
def fix_global_lock(): if not ENABLE_DUP_GLOBAL_CHECK: return vg_names = bash.bash_o("lvmlockctl -i | grep lock_type=sanlock | awk '{print $2}'").strip().splitlines() # type: list vg_names.sort() if len(vg_names) < 2: return for vg_name in vg_names[1:]: bash.bash_roe("lvmlockctl --gl-disable %s" % vg_name) bash.bash_roe("lvmlockctl --gl-enable %s" % vg_names[0])
def fix_global_lock(): if not ENABLE_DUP_GLOBAL_CHECK: return vg_names = bash.bash_o( "lvmlockctl -i | grep lock_type=sanlock | awk '{print $2}'").strip( ).splitlines() # type: list vg_names.sort() if len(vg_names) < 2: return for vg_name in vg_names[1:]: bash.bash_roe("lvmlockctl --gl-disable %s" % vg_name) bash.bash_roe("lvmlockctl --gl-enable %s" % vg_names[0])
def backup_super_block(disk_path): wwid = get_wwid(disk_path) if wwid is None or wwid == "": logger.warn("can not get wwid of disk %s" % disk_path) current_time = time.time() disk_back_file = os.path.join( LVM_CONFIG_BACKUP_PATH, "%s.%s.%s" % (wwid, SUPER_BLOCK_BACKUP, current_time)) bash.bash_roe("dd if=%s of=%s bs=64KB count=1 conv=notrunc" % (disk_path, disk_back_file)) return disk_back_file
def enable_multipath(self, req): rsp = AgentRsp() lvm.enable_multipath() r = bash.bash_r("grep '^[[:space:]]*alias' /etc/multipath.conf") if r == 0: bash.bash_roe( "sed -i 's/^[[:space:]]*alias/#alias/g' /etc/multipath.conf") bash.bash_roe("systemctl reload multipathd") linux.set_fail_if_no_path() return jsonobject.dumps(rsp)
def check_pv_status(vgUuid, timeout): r, o, e = bash.bash_roe( "timeout -s SIGKILL %s pvs --noheading --nolocking -Svg_name=%s -oname,missing" % (timeout, vgUuid)) if len(o) == 0 or r != 0: logger.warn( "can not find shared block in shared block group %s, detail: [return_code: %s, stdout: %s, stderr: %s]" % (vgUuid, r, o, e)) return True, "" for pvs_out in o: if "unknown" in pvs_out: s = "disk in shared block group %s missing" % vgUuid logger.warn("%s, details: %s" % (s, o)) return False, s if "missing" in pvs_out: s = "disk %s in shared block group %s exists but state is missing" % ( pvs_out.strip().split(" ")[0], vgUuid) logger.warn("%s, details: %s" % (s, o)) return False, s health, o, e = bash.bash_roe('timeout -s SIGKILL %s vgck %s' % (10 if timeout < 10 else timeout, vgUuid)) if health != 0: s = "vgck %s failed, details: %s" % (vgUuid, e) logger.warn(s) return False, s health = bash.bash_o( 'timeout -s SIGKILL %s vgs -oattr --nolocking --readonly --noheadings --shared %s ' % (10 if timeout < 10 else timeout, vgUuid)).strip() if health == "": logger.warn("can not get proper attr of vg, return false") return False, "primary storage %s attr get error, expect 'wz--ns' got %s" % ( vgUuid, health) if health[0] != "w": return False, "primary storage %s permission error, expect 'w' but now is %s, deatils: %s" % ( vgUuid, health.stdout.strip()[0], health) if health[1] != "z": return False, "primary storage %s resizeable error, expect 'z' but now is %s, deatils: %s" % ( vgUuid, health.stdout.strip()[1], health) if health[3] != "-": return False, "primary storage %s partial error, expect '-' but now is %s, deatils: %s" % ( vgUuid, health.stdout.strip()[3], health) if health[5] != "s": return False, "primary storage %s shared mode error, expect 's' but now is %s, deatils: %s" % ( vgUuid, health.stdout.strip()[5], health) return True, ""
def enable_multipath(): bash.bash_roe("modprobe dm-multipath") bash.bash_roe("modprobe dm-round-robin") bash.bash_roe("mpathconf --enable --with_multipathd y") bash.bash_roe("systemctl enable multipathd") if not is_multipath_running(): raise RetryException("multipath still not running")
def create_vg_if_not_found(vgUuid, diskPaths, hostUuid, forceWipe=False): @linux.retry(times=3, sleep_time=random.uniform(0.1, 3)) def find_vg(vgUuid): cmd = shell.ShellCmd("vgs %s -otags | grep %s" % (vgUuid, INIT_TAG)) cmd(is_exception=False) if cmd.return_code != 0: raise RetryException("can not find vg %s with tag %s" % (vgUuid, INIT_TAG)) return True try: find_vg(vgUuid) except RetryException: if forceWipe is True: lvm.wipe_fs(diskPaths) r, o, e = bash.bash_roe("vgcreate --shared --addtag '%s::%s::%s' --metadatasize %s %s %s" % (INIT_TAG, hostUuid, time.time(), DEFAULT_VG_METADATA_SIZE, vgUuid, " ".join(diskPaths))) if r == 0: return True if find_vg(vgUuid) is False: raise Exception("can not find vg %s with disks: %s and create failed for %s " % (vgUuid, diskPaths, e)) except Exception as e: raise e return False
def discovery_iscsi(iscsiServerIp, iscsiServerPort): r, o, e = bash.bash_roe( "timeout 10 iscsiadm -m discovery --type sendtargets --portal %s:%s" % ( iscsiServerIp, iscsiServerPort)) if r != 0: raise RetryException("can not discovery iscsi portal %s:%s, cause %s" % (iscsiServerIp, iscsiServerPort, e)) return [i.strip().split(" ")[-1] for i in o.splitlines()]
def lvm_vgck(vgUuid, timeout): health, o, e = bash.bash_roe('timeout -s SIGKILL %s vgck %s 2>&1' % (60 if timeout < 60 else timeout, vgUuid)) check_stuck_vglk() if health != 0: s = "vgck %s failed, detail: [return_code: %s, stdout: %s, stderr: %s]" % (vgUuid, health, o, e) logger.warn(s) return False, s if o is not None and o != "": for es in o.strip().splitlines(): if "WARNING" in es: continue if "Retrying" in es: continue if "Duplicate sanlock global lock" in es: fix_global_lock() continue if "have changed sizes" in es: continue if es.strip() == "": logger.debug("found pv of vg %s size may changed, details: %s" % (vgUuid, es)) continue s = "vgck %s failed, details: [return_code: %s, stdout: %s, stderr: %s]" % (vgUuid, health, o, e) logger.warn(s) return False, s return True, ""
def do_promote(): f = " --force" if force else "" r, o, e = bash.bash_roe("drbdadm primary %s %s" % (self.name, f)) if self.get_role() != DrbdRole.Primary: raise RetryException( "promote failed, return: %s, %s, %s. resource %s still not in role %s" % (r, o, e, self.name, DrbdRole.Primary))
def check_sanlock_status(lockspace): r, o, e = bash.bash_roe("sanlock client status -D | grep %s -A 18" % lockspace) if r != 0: return False, "sanlock can not get lockspace %s status" % lockspace renewal_last_result = 0 renewal_last_attempt = 0 renewal_last_success = 0 for i in o.strip().splitlines(): if "renewal_last_result" in i: renewal_last_result = int(i.strip().split("=")[-1]) if "renewal_last_attempt" in i: renewal_last_attempt = int(i.strip().split("=")[-1]) if "renewal_last_success" in i: renewal_last_success = int(i.strip().split("=")[-1]) if renewal_last_result != 1: if (renewal_last_attempt > renewal_last_success and renewal_last_attempt - renewal_last_success > 100) or ( 100 < renewal_last_attempt < renewal_last_success - 100 < renewal_last_success): return False, "sanlock last renewal failed with %s and last attempt is %s, last success is %s" % ( renewal_last_result, renewal_last_attempt, renewal_last_success) return True, ""
def is_defined(self): assert self.name is not None assert self.name.strip() != "" r, o, e = bash.bash_roe("drbdadm role %s" % self.name) if r != 0 and "not defined in your config" in o + e: return False return True
def minor_allocated(self): r, o, e = bash.bash_roe("drbdadm role %s" % self.name) if e is not None and "Device minor not allocated" in o + e: logger.debug("Device %s minor not allocated!" % self.name) return False if e is not None and "not defined in your config" in o + e: return False return True
def set_sanlock_event(lockspace): """ :type lockspace: str """ host_id = lockspace.split(":")[1] r, o, e = bash.bash_roe("sanlock client set_event -s %s -i %s -e 1 -d 1" % (lockspace, host_id)) return r == 0
def examine_lockspace(lockspace): @linux.retry(times=3, sleep_time=0.5) def _do_examine_lockspace(lockspace): r, _, _ = bash.bash_roe("sanlock client examine -s %s" % lockspace, errorout=False) if r != 0: raise RetryException("can not examine lockspace") return r r, _, _ = bash.bash_roe("sanlock client examine -s %s" % lockspace, errorout=False) if r == 0: return r try: logger.debug("retrying examine lockspace for %s" % lockspace) r = _do_examine_lockspace(lockspace) except Exception as e: r, _, _ = bash.bash_roe("sanlock client examine -s %s" % lockspace, errorout=False) return r
def raid_scan(self, req): # 1. find raid device # 2. get each device info rsp = RaidScanRsp() r, o, e = bash.bash_roe("smartctl --scan | grep megaraid") if r != 0 or o.strip() == "": return jsonobject.dumps(rsp) rsp.raidPhysicalDriveStructs = self.get_megaraid_devices(o) return jsonobject.dumps(rsp)
def resize_lv(path, size, force=False): _force = "" if force is False else " --force " r, o, e = bash.bash_roe("lvresize %s --size %sb %s" % (_force, calcLvReservedSize(size), path)) if r == 0: return elif "matches existing size" in e or "matches existing size" in o: return else: raise Exception("resize lv %s to size %s failed, return code: %s, stdout: %s, stderr: %s" % (path, size, r, o, e))
def resize_lv(path, size): r, o, e = bash.bash_roe("lvresize --size %sb %s" % (calcLvReservedSize(size), path)) if r == 0: return elif "matches existing size" in e or "matches existing size" in o: return else: raise Exception( "resize lv %s to size %s failed, return code: %s, stdout: %s, stderr: %s" % (path, size, r, o, e))
def get_smart_data(busNumber, deviceNumber): # type: (int, int) -> list[SmartDataStruct] r, text, e = bash.bash_roe( "smartctl --all /dev/bus/%s -d megaraid,%s" % (busNumber, deviceNumber)) if r != 0 and "vendor specific smart attributes with thresholds" not in text.lower( ): raise Exception( "read smart info failed, return: %s, stdout: %s, stderr: %s" % (r, text, e)) data = [] in_data = None for l in text.splitlines(): if "vendor specific smart attributes with thresholds" in l.lower(): in_data = True continue if "smart error log version" in l.lower(): break if in_data is None: continue if "id" in l.lower() and "attribute_name" in l.lower(): continue if l.strip() == "": continue data.append(l) if len(data) == 0: logger.warn("can not find smart data!") return [] result = [] attrs = [ "id", "attributeName", "flag", "value", "worst", "thresh", "type", "updated", "whenFailed", "rawValue" ] for d in data: logger.debug("processing smart data %s" % d) r = SmartDataStruct() for column_number in range(len(attrs)): if d.split()[column_number].isdigit(): exec("r.%s = int(\"%s\")" % (attrs[column_number], d.split()[column_number])) else: exec("r.%s = \"%s\"" % (attrs[column_number], d.split()[column_number])) if r.value < r.thresh: r.state = "error" elif r.value - r.thresh < int(r.thresh * 0.2): r.state = "warning" else: r.state = "health" result.append(r) return result
def set_sanlock_event(lockspace): @linux.retry(times=3, sleep_time=0.5) def _set_sanlock_event(lockspace): host_id = lockspace.split(":")[1] r, _, _ = bash.bash_roe("sanlock client set_event -s %s -i %s -e 1 -d 1" % (lockspace, host_id), errorout=False) if r != 0: raise RetryException("set sanlock event failed") return r host_id = lockspace.split(":")[1] r, _, _ = bash.bash_roe("sanlock client set_event -s %s -i %s -e 1 -d 1" % (lockspace, host_id), errorout=False) if r == 0: return r try: logger.debug("retrying set sanlock event for %s" % lockspace) r = _set_sanlock_event(lockspace) except Exception as e: r, _, _ = bash.bash_roe("sanlock client set_event -s %s -i %s -e 1 -d 1" % (lockspace, host_id), errorout=False) finally: return r
def create_lv_from_absolute_path(path, size, tag="zs::sharedblock::volume"): vgName = path.split("/")[2] lvName = path.split("/")[3] r, o, e = bash.bash_roe("lvcreate -an --addtag %s --size %sb --name %s %s" % (tag, calcLvReservedSize(size), lvName, vgName)) if not lv_exists(path): raise Exception("can not find lv %s after create, lvcreate return: %s, %s, %s" % (path, r, o, e)) with OperateLv(path, shared=False): dd_zero(path)
def self_test_is_running(bus, device): r = bash.bash_r( "smartctl -l selftest -d megaraid,%s /dev/bus/%s | grep 'Self-test routine in progress'" % (device, bus)) if r == 0: return r, o, e = bash.bash_roe("smartctl -a /dev/bus/%s -d megaraid,%s" % (bus, device)) if "Self-test routine in progress" in o + e: return raise RetryException( "can not find self test in progress on drive %s" % wwn)
def testName(self): temp = tempfile.NamedTemporaryFile(prefix='grub', suffix='', dir='/tmp', mode='w+b', delete=True) try: temp.write('GRUB_CMDLINE_LINUX="crashkernel=auto rd.lvm.lv=zstack/root rd.lvm.lv=zstack/swap rhgb quiet d=d intel_iommu=on modprobe.blacklist=snd_hda_intel,amd76x_edac,vga16fb,nouveau,rivafb,nvidiafb,rivatv,amdgpu,radeon"\n') temp.seek(0) path = temp.name updateConfigration = vm_plugin.UpdateConfigration() updateConfigration.path = path updateConfigration.enableIommu = False success, error = updateConfigration.updateHostIommu() self.assertTrue(success) r_on, o_on, e_on = bash.bash_roe("grep -E 'intel_iommu(\ )*=(\ )*on' %s" % path) r_off, o_off, e_off = bash.bash_roe("grep -E 'intel_iommu(\ )*=(\ )*off' %s" % path) r_modprobe_blacklist, o_modprobe_blacklist, e_modprobe_blacklist = bash.bash_roe("grep -E 'modprobe.blacklist(\ )*=' %s" % path) self.assertNotEqual(r_on, 0) self.assertNotEqual(r_off, 0) self.assertNotEqual(r_modprobe_blacklist, 0) updateConfigration = vm_plugin.UpdateConfigration() updateConfigration.path = path updateConfigration.enableIommu = True success, error = updateConfigration.updateHostIommu() self.assertTrue(success) r_on, o_on, e_on = bash.bash_roe("grep -E 'intel_iommu(\ )*=(\ )*on' %s" % path) r_off, o_off, e_off = bash.bash_roe("grep -E 'intel_iommu(\ )*=(\ )*off' %s" % path) r_modprobe_blacklist, o_modprobe_blacklist, e_modprobe_blacklist = bash.bash_roe("grep -E 'modprobe.blacklist(\ )*=' %s" % path) self.assertEqual(r_on, 0) self.assertNotEqual(r_off, 0) self.assertEqual(r_modprobe_blacklist, 0) finally: temp.close()
def wipe_fs(disks, expected_vg=None): for disk in disks: exists_vg = None r = bash.bash_r("pvdisplay %s | grep %s" % (disk, expected_vg)) if r == 0: continue r, o = bash.bash_ro("pvs --nolocking --noheading -o vg_name %s" % disk) if r == 0 and o.strip() != "": exists_vg = o.strip() backup = backup_super_block(disk) if bash.bash_r("grep %s %s" % (expected_vg, backup)) == 0: raise Exception("found vg uuid in superblock backup while not found in lvm command!") need_flush_mpath = False bash.bash_roe("partprobe -s %s" % disk) cmd_type = bash.bash_o("lsblk %s -oTYPE | grep mpath" % disk) if cmd_type.strip() != "": need_flush_mpath = True bash.bash_roe("wipefs -af %s" % disk) if need_flush_mpath: bash.bash_roe("multipath -f %s && systemctl restart multipathd.service && sleep 1" % disk) if exists_vg is not None: logger.debug("found vg %s exists on this pv %s, start wipe" % (exists_vg, disk)) try: drop_vg_lock(exists_vg) remove_device_map_for_vg(exists_vg) finally: pass
def enable_multipath(self, req): rsp = AgentRsp() bash.bash_roe("modprobe dm-multipath") bash.bash_roe("modprobe dm-round-robin") bash.bash_roe("mpathconf --enable --with_multipathd y") if not lvm.is_multipath_running: raise RetryException("multipath still not running") return jsonobject.dumps(rsp)
def clear_stalled_qmp_socket(): def get_used_qmp_file(): t = bash.bash_o("ps aux | grep -Eo -- '-qmp unix:%s/\w*\.sock'" % QMP_SOCKET_PATH).splitlines() qmp = [] for i in t: qmp.append(i.split("/")[-1]) return qmp exists_qmp_files = set(bash.bash_o("ls %s" % QMP_SOCKET_PATH).splitlines()) if len(exists_qmp_files) == 0: return running_qmp_files = set(get_used_qmp_file()) if len(running_qmp_files) == 0: bash.bash_roe("/bin/rm %s/*" % QMP_SOCKET_PATH) return need_delete_qmp_files = exists_qmp_files.difference(running_qmp_files) if len(need_delete_qmp_files) == 0: return for f in need_delete_qmp_files: bash.bash_roe("/bin/rm %s/%s" % (QMP_SOCKET_PATH, f))
def check_stuck_vglk(): @linux.retry(3, 0.1) def is_stuck_vglk(): r, o, e = bash.bash_roe("sanlock client status | grep ':VGLK:'") if r != 0: return else: raise RetryException("found sanlock vglk lock stuck") try: is_stuck_vglk() except Exception as e: r, o, e = bash.bash_roe("sanlock client status | grep ':VGLK:'") if r != 0: return if len(o.strip().splitlines()) == 0: return for stucked in o.strip().splitlines(): # type: str if "ADD" in stucked or "REM" in stucked: continue cmd = "sanlock client release -%s" % stucked.replace(" p ", " -p ") r, o, e = bash.bash_roe(cmd) logger.warn("find stuck vglk and already released, detail: [return_code: %s, stdout: %s, stderr: %s]" % (r, o, e))
def down(self): r, o, e = bash.bash_roe("drbdadm down %s" % self.name) if r == 0: return if "conflicting use of device-minor" in o + e: logger.debug("detect conflicting use of device-minor! %s" % e) return if 0 == bash.bash_r("cat /proc/drbd | grep '^%s: cs:Unconfigured'" % self.config.local_host.minor): return if 1 == bash.bash_r("cat /proc/drbd | grep '^%s: '" % self.config.local_host.minor): return raise Exception("demote resource %s failed: %s, %s, %s" % (self.name, r, o, e))
def rescan(self, disk_name=None): """ :type disk_name: str """ if disk_name is None: disk_name = self.get_path().split("/")[-1] def rescan_slave(slave, raise_exception=True): _cmd = shell.ShellCmd("echo 1 > /sys/block/%s/device/rescan" % slave) _cmd(is_exception=raise_exception) logger.debug( "rescaned disk %s (wwid: %s), return code: %s, stdout %s, stderr: %s" % (slave, self.identifier, _cmd.return_code, _cmd.stdout, _cmd.stderr)) multipath_dev = lvm.get_multipath_dmname(disk_name) if multipath_dev: t, disk_name = disk_name, multipath_dev # disk name is dm-xx when multi path slaves = shell.call("ls /sys/class/block/%s/slaves/" % disk_name).strip().split("\n") if slaves is None or len(slaves) == 0 or (len(slaves) == 1 and slaves[0].strip() == ""): logger.debug("can not get any slaves of multipath device %s" % disk_name) rescan_slave(disk_name, False) else: for s in slaves: rescan_slave(s) cmd = shell.ShellCmd("multipathd resize map %s" % disk_name) cmd(is_exception=True) logger.debug( "resized multipath device %s, return code: %s, stdout %s, stderr: %s" % (disk_name, cmd.return_code, cmd.stdout, cmd.stderr)) disk_name = t else: rescan_slave(disk_name) command = "pvresize /dev/%s" % disk_name if multipath_dev is not None and multipath_dev != disk_name: command = "pvresize /dev/%s || pvresize /dev/%s" % (disk_name, multipath_dev) r, o, e = bash.bash_roe(command, errorout=True) logger.debug( "resized pv %s (wwid: %s), return code: %s, stdout %s, stderr: %s" % (disk_name, self.identifier, r, o, e))
def enable_multipath(self, req): rsp = AgentRsp() lvm.enable_multipath() bash.bash_roe( "sed -i 's/^[[:space:]]*alias/#alias/g' /etc/multipath.conf") current_t = time.time() bash.bash_roe( "mv /etc/multipath/bindings /etc/multipath/bindings.%s " % current_t + "&& md5sum /etc/multipath/bindings.* | awk 'p[$1]++ { printf \"rm %s\\n\",$2;}' | bash" ) bash.bash_roe( "mv /etc/multipath/wwids /etc/multipath/wwids.%s " % current_t + "&& md5sum /etc/multipath/wwids.* | awk 'p[$1]++ { printf \"rm %s\\n\",$2;}' | bash" ) bash.bash_roe("multipath -F; systemctl restart multipathd.service") linux.set_fail_if_no_path() return jsonobject.dumps(rsp)
def lv_rename(old_abs_path, new_abs_path, overwrite=False): if not lv_exists(new_abs_path): return bash.bash_roe("lvrename %s %s" % (old_abs_path, new_abs_path)) if overwrite is False: raise Exception("lv with name %s is already exists, can not rename lv %s to it" % (new_abs_path, old_abs_path)) tmp_path = new_abs_path + "_%s" % int(time.time()) r, o, e = lv_rename(new_abs_path, tmp_path) if r != 0: raise Exception("rename lv %s to tmp name %s failed: stdout: %s, stderr: %s" % (new_abs_path, tmp_path, o, e)) r, o, e = lv_rename(old_abs_path, new_abs_path) if r != 0: bash.bash_errorout("lvrename %s %s" % (tmp_path, new_abs_path)) raise Exception("rename lv %s to tmp name %s failed: stdout: %s, stderr: %s" % (old_abs_path, new_abs_path, o, e)) delete_lv(tmp_path, False)
def generate_fencer(peer_addr, peer_username, peer_password): def configure_ssh_key(): bash.bash_roe("/bin/rm %s*" % mini_fencer.MINI_FENCER_KEY) bash.bash_roe("ssh-keygen -P \"\" -f %s" % mini_fencer.MINI_FENCER_KEY) r, o, e = bash.bash_roe( "sshpass -p '%s' ssh-copy-id -i %s %s@%s" % (peer_password, mini_fencer.MINI_FENCER_KEY, peer_username, peer_addr)) if r == 0: return configure_ssh_key() current_dir = os.path.split(os.path.realpath(__file__))[0] fencer_path = "%s/mini_fencer.py" % current_dir bash.bash_roe( "sed -i 's/^PEER_USERNAME = .*$/PEER_USERNAME = \"%s\"/g' %s" % (peer_username, fencer_path)) bash.bash_roe( "sed -i 's/^PEER_MGMT_ADDR = .*$/PEER_MGMT_ADDR = \"%s\"/g' %s" % (peer_addr, fencer_path)) bash.bash_roe("cp %s /usr/lib/drbd/mini_fencer.py" % fencer_path) bash.bash_roe("sudo chmod 777 /usr/lib/drbd/mini_fencer.py")
def rescan(self, disk_name=None): """ :type disk_name: str """ if disk_name is None: disk_name = self.get_path().split("/")[-1] def rescan_slave(slave, raise_exception=True): _cmd = shell.ShellCmd("echo 1 > /sys/block/%s/device/rescan" % slave) _cmd(is_exception=raise_exception) logger.debug("rescaned disk %s (wwid: %s), return code: %s, stdout %s, stderr: %s" % (slave, self.identifier, _cmd.return_code, _cmd.stdout, _cmd.stderr)) multipath_dev = lvm.get_multipath_dmname(disk_name) if multipath_dev: t, disk_name = disk_name, multipath_dev # disk name is dm-xx when multi path slaves = shell.call("ls /sys/class/block/%s/slaves/" % disk_name).strip().split("\n") if slaves is None or len(slaves) == 0 or (len(slaves) == 1 and slaves[0].strip() == ""): logger.debug("can not get any slaves of multipath device %s" % disk_name) rescan_slave(disk_name, False) else: for s in slaves: rescan_slave(s) cmd = shell.ShellCmd("multipathd resize map %s" % disk_name) cmd(is_exception=True) logger.debug("resized multipath device %s, return code: %s, stdout %s, stderr: %s" % (disk_name, cmd.return_code, cmd.stdout, cmd.stderr)) disk_name = t else: rescan_slave(disk_name) command = "pvresize /dev/%s" % disk_name if multipath_dev is not None and multipath_dev != disk_name: command = "pvresize /dev/%s || pvresize /dev/%s" % (disk_name, multipath_dev) r, o, e = bash.bash_roe(command, errorout=True) logger.debug("resized pv %s (wwid: %s), return code: %s, stdout %s, stderr: %s" % (disk_name, self.identifier, r, o, e))
def check_pv_status(vgUuid, timeout): r, o , e = bash.bash_roe("timeout -s SIGKILL %s pvs --noheading --nolocking -Svg_name=%s -oname,missing" % (timeout, vgUuid)) if len(o) == 0 or r != 0: s = "can not find shared block in shared block group %s, detail: [return_code: %s, stdout: %s, stderr: %s]" % (vgUuid, r, o, e) logger.warn(s) return False, s for pvs_out in o: if "unknown" in pvs_out: s = "disk in shared block group %s missing" % vgUuid logger.warn("%s, details: %s" % (s, o)) return False, s if "missing" in pvs_out: s = "disk %s in shared block group %s exists but state is missing" % (pvs_out.strip().split(" ")[0], vgUuid) logger.warn("%s, details: %s" % (s, o)) return False, s # r, s = lvm_vgck(vgUuid, timeout) # if r is False: # return r, s health = bash.bash_o('timeout -s SIGKILL %s vgs -oattr --nolocking --noheadings --shared %s ' % (10 if timeout < 10 else timeout, vgUuid)).strip() if health == "": logger.warn("can not get proper attr of vg, return false") return False, "primary storage %s attr get error, expect 'wz--ns' got %s" % (vgUuid, health) if health[0] != "w": return False, "primary storage %s permission error, expect 'w' but now is %s, deatils: %s" % (vgUuid, health.stdout.strip()[0], health) if health[1] != "z": return False, "primary storage %s resizeable error, expect 'z' but now is %s, deatils: %s" % (vgUuid, health.stdout.strip()[1], health) if health[3] != "-": return False, "primary storage %s partial error, expect '-' but now is %s, deatils: %s" % (vgUuid, health.stdout.strip()[3], health) if health[5] != "s": return False, "primary storage %s shared mode error, expect 's' but now is %s, deatils: %s" % (vgUuid, health.stdout.strip()[5], health) return True, ""
def is_stuck_vglk(): r, o, e = bash.bash_roe("sanlock client status | grep ':VGLK:'") if r != 0: return else: raise RetryException("found sanlock vglk lock stuck")
def _do_examine_lockspace(lockspace): r, _, _ = bash.bash_roe("sanlock client examine -s %s" % lockspace, errorout=False) if r != 0: raise RetryException("can not examine lockspace") return r
def qcow2_convert_to_raw(src, dst): return bash.bash_roe('/usr/bin/qemu-img convert -f qcow2 -O raw %s %s' % (src, dst))
def _set_sanlock_event(lockspace): host_id = lockspace.split(":")[1] r, _, _ = bash.bash_roe("sanlock client set_event -s %s -i %s -e 1 -d 1" % (lockspace, host_id), errorout=False) if r != 0: raise RetryException("set sanlock event failed") return r
def get_sanlock_renewal(lockspace): r, o, e = bash.bash_roe("sanlock client renewal -s %s" % lockspace) return o.strip().splitlines()[-1]
def unpriv_sgio(): bash.bash_roe("for i in `ls /sys/block/`; do echo 1 > /sys/block/$i/queue/unpriv_sgio; done")
def drop_vg_lock(vgUuid): bash.bash_roe("lvmlockctl --drop %s" % vgUuid)
def quitLockServices(): bash.bash_roe("sanlock client shutdown") bash.bash_roe("lvmlockctl -q")