def scan_controller(): sas2_con = linux.exe_shell("sas2ircu list|grep -P 'SAS[0-9]{4}'") sas3_con = linux.exe_shell("sas3ircu list|grep -P 'SAS[0-9]{4}'") mega_raid_con = linux.exe_shell("storcli show|grep LSI") cons = [] if "" != sas2_con.strip(): indexs = linux.search_regex_strings_column(sas2_con, "[0-9]+ *SAS[0-9]{4}.*", " ", 0) con_str = linux.search_regex_strings(sas2_con, "SAS[0-9]{4}") for i in range(len(indexs)): c = LsiSas2Controller(int(indexs[i]), con_str[i]) c.fill_attrs() cons.append(c) if "" != sas3_con.strip(): indexs = linux.search_regex_strings_column(sas3_con, "[0-9]+ *SAS[0-9]{4}.*", " ", 0) con_str = linux.search_regex_strings(sas3_con, "SAS[0-9]{4}") for i in range(len(indexs)): c = LsiSas3Controller(int(indexs[i]), con_str[i]) c.fill_attrs() cons.append(c) if "" != mega_raid_con.strip(): indexs = linux.search_regex_strings_column(sas2_con, ".*SAS[0-9]{4}.*", " ", 0) con_str = linux.search_regex_strings(sas2_con, "SAS[0-9]{4}") for i in range(len(indexs)): c = LsiSas2Controller(int(indexs[i]), con_str[i]) c.fill_attrs() cons.append(c) return cons
def destroy_pool(self, pool_name): zpool_str = "zpool destroy %s" % pool_name result = linux.exe_shell(zpool_str) if '' == result: for i in self.disks: linux.exe_shell( "dd if=/dev/zero of=/dev/%s bs=1M count=100 >> /dev/null" % i) return result
def fill_attrs(self): smart_str = linux.exe_shell("smartctl -a /dev/%s" % self.dev_name) smartx_str = linux.exe_shell("smartctl -x /dev/%s" % self.dev_name) self.smart = smartx_str self.model = linux.search_regex_one_line_string_column(smart_str, "(?:Device Model|Product):.+", ":", 1).strip() self.fw = linux.search_regex_one_line_string_column(smart_str, "(?:Firmware|Revision).+", ":", 1).strip() self.vendor = linux.search_regex_one_line_string_column(smart_str, "(?:ATA|Vendor).+", ":", 1).strip() self.sn = linux.search_regex_one_line_string_column(smart_str, "Serial (?:N|n)umber.+", ":", 1).strip() if "SAS" in smart_str: self.type = "SAS" smart_str_arr = linux.search_regex_strings(smart_str, " *(?:write:|read:|verify:).+") for line in smart_str_arr: tmp = line.split() dict_tmp = { "errorEccFast": tmp[1].strip(), "errorEccDelayed": tmp[2].strip(), "errorEccByRereadsRewrite": tmp[3].strip(), "totalErrorsCorrected": tmp[4].strip(), "correctionAlgorithmInvocations": tmp[5].strip(), "byte10_9": tmp[6].strip(), "totalUncorrectedError": tmp[7].strip() } self.smart_attr[tmp[0].replace(":", " ").strip()] = dict_tmp smart_str_arr = linux.search_regex_strings( self.smart, "(?:Invalid DWORD|Running disparity|Loss of DWORD|Phy reset problem).+=.+" ) i = 0 dict_tmp = {} for it in smart_str_arr: tmp = it.split("=") dict_tmp[tmp[0].strip()] = tmp[1].strip() if 3 == i: self.smart_attr["channel0Error"] = dict_tmp dict_tmp = {} if 7 == i: self.smart_attr["channel1Error"] = dict_tmp dict_tmp = {} i += 1 if "SATA" in smart_str: self.type = "SATA" dict_tmp = linux.search_regex_strings(smart_str, "^( |[0-9])+.+[0-9]+ .+0x.+(In_the_past|-|FAILING_NOW) +[0-9]+") for line in dict_tmp: tmp = line.split() dict_tmp = { "ID": tmp[0].strip(), "FLAG": tmp[2].strip(), "VALUE": tmp[3].strip(), "WORST": tmp[4].strip(), "THRESH": tmp[5].strip(), "TYPE": tmp[6].strip(), "UPDATED": tmp[7].strip(), "WHEN_FAILED": tmp[8].strip(), "RAW_VALUE": tmp[9].strip(), } self.smart_attr[tmp[1]] = dict_tmp
def show_bios(self): '''show server's bios info.''' if not os.path.exists("/usr/sbin/dmidecode"): print("dmidecode is not exists, please install dmidecode.") return dmi_info = exe_shell("dmidecode --type bios") print(dmi_info)
def show_bios_ver(): if not os.path.exists("/usr/sbin/dmidecode"): print("dmidecode is not exists, please install dmidecode.") return dmi_info = exe_shell("dmidecode --type bios") print(search_regex_one_line_string_column(dmi_info, ".*Version:.*", ":", 1))
def scan_disk_name_sn(): disks = linux.exe_shell("ls /dev|grep -P '^sd[a-z]+$'").splitlines() disks_attr = [] for d in disks: attr_arr = disk.Disk.get_from_sas_disk_simple_attr(d) disks_attr.append(attr_arr) return disks_attr
def get_all_gpus(): gpu_list = [] if not Gpu.check_driver(): print("Nvidia driver is not install.") return nvsmi_text = linux.exe_shell("nvidia-smi -a") tmp_list = nvsmi_text.split('\n\n') gpu_info_list = tmp_list[2:-1] kv = [ "Product Name.*:.*", "Fan Speed.*:.*", "GPU Current Temp.*:.*", "Power Draw.*:.*", " Power Limit.*:.*", "Memory.*:.*%.*", "Total.*: [0-9]{4,} MiB", "Gpu.*:.*%.*", "Bus Id.*\.[0-9]", "Minor.*:.*", "VBIOS.*:.*", "Serial Number.*:.*" ] for gpu_info in gpu_info_list: result = [] for k in kv: result.append(linux.search_regex_one_line_string_column(gpu_info, k, ":", 1).strip()) gpu = Gpu(result) gpu_list.append(gpu) return gpu_list
def show_bios(ctx, param, value): if not value or ctx.resilient_parsing: return if not os.path.exists("/usr/sbin/dmidecode"): print("dmidecode is not exists, please install dmidecode.") ctx.exit() dmi_info = exe_shell("dmidecode --type bios") print(dmi_info) ctx.exit()
def map_disk_wwn_dev_hctl(cls, diskname): """ map wwn and H:C:T:L from dev_name """ path = pjoin("/sys/block", diskname) if os.path.exists(path) is False: return None udevadm = linux.exe_shell("udevadm info -q all -n /dev/%s" % diskname) dev = linux.read_file(pjoin(path, "dev")) wwn = linux.search_regex_one_line_string_column( udevadm, "ID_WWN=", "=", 1)
def map_disk_wwn_hctl(diskname): """ map wwn and H:C:T:L from dev_name """ lsscsi = linux.exe_shell("lsscsi -w |grep /dev/|awk '{print$1,$3,$4}'") for i in lsscsi.splitlines(): split_t = i.split(" ") if diskname in split_t[2]: return { "hctl": split_t[0], "wwn": split_t[1], "dev_name": split_t[2] } return None
def get_all_disk(): disks = [] disks_lines = linux.exe_shell("lsblk -o NAME,VENDOR|grep -P '^sd.*[A-Z]'") for line in disks_lines.splitlines(): disk_t = line.split() if len(disk_t) < 1 and "LSI" not in disk_t[1]: disks.append(disk_t[0]) ds = [] for i in disks: d_t = DiskFromLsiSas3("", i) d_t.fill_attrs() ds.append(d_t) return ds
def create_pool(self, pool_name, pool_level): if not linux.bin_exists("zpool"): print("zpool is not exists!") return zpool_str = "zpool create %s raidz" % pool_name if "raidz-2" == pool_level: if len(self.disks) < 6: raise Exception("raidz-2 request more than 6 disks!") for i in self.disks[:len(self.disks) / 2]: zpool_str = "%s %s" % (zpool_str, i) zpool_str = "%s raidz" % zpool_str for i in self.disks[len(self.disks) / 2 + 1:]: zpool_str = "%s %s" % (zpool_str, i) return linux.exe_shell(zpool_str)
def fill_attrs(self): sas2ircu_string = linux.exe_shell("sas2ircu %d display" % self.index) fw_str = linux.get_match_sub_string(sas2ircu_string, 'Firmware.*(?:[0-9]+\\.)+[0-9]*') sn_list = linux.search_regex_strings_column(sas2ircu_string, "^ +Serial No.+", ":", 1) disk_name_sns = Controller.scan_disk_name_sn() tmp = fw_str.split(":") tmp_str = tmp[1].strip() if "" != tmp_str: self.fw = tmp_str else: self.fw = "null" for s in disk_name_sns: if s["sn"] in sn_list: d = disk.DiskFromLsiSas2(s["sn"], s["name"]) d.fill_attrs() self.disks.append(d)
def check_driver(): re = int(linux.exe_shell("nvidia-smi -h 2> /dev/zero | wc -l")) if 0 == re: return False else: return True
def log_monitor(): start_log_path = "/var/log/start-iads-monitor-log.log" log_path = "/var/log/iads-monitor-log.log" start_time_t = exe_shell("date") start_lsi_str = exe_shell("lsiutil.x86_64_171 -p 1 -a 64,1,,debuginfo,exit,0") start_hba_str = exe_shell("lsiutil.x86_64_171 -p 1 -a 65,,'pl dbg',exit,0") start_dmesg_str = exe_shell("dmesg|grep -iP '((i/o error)|(sector [0-9]+))'") start_messages_str = exe_shell("cat /var/log/messages|grep -iP '((i/o error)|(sector [0-9]+))'") start_str = "\ntime:\n%s\ndmesg:\n%s\n\nmessage:\n%s\n\nlsiutils debuginfo:\n%s\n\nlsiutils_pl dbg:\n%s\n\n" % (start_time_t, start_dmesg_str, start_messages_str, start_lsi_str, start_hba_str) with open(start_log_path, "a") as fp: fp.write(start_str) print("Start_log is OK. path: /var/log/start-iads-monitor-log.log \n") i_times = 0 phy_t_list = get_lsi_phy_list(Phy.scan_phys_attr()) collect = False while True: g_lsi_str = exe_shell("lsiutil.x86_64_171 -p 1 -a 64,1,,debuginfo,exit,0") g_hba_str = exe_shell("lsiutil.x86_64_171 -p 1 -a 65,,'pl dbg',exit,0") dmesg_str = exe_shell("dmesg|grep -iP '((i/o error)|(sector [0-9]+))'") phy_list = get_lsi_phy_list(Phy.scan_phys_attr()) if len(phy_t_list) != len(phy_list): collect = True else: for i in range(0, len(phy_list)): if phy_list[i].invalid_dword_count != phy_t_list[i].invalid_dword_count or phy_list[ i].loss_of_dword_sync_count != phy_t_list[i].loss_of_dword_sync_count: collect = True if phy_list[i].phy_reset_problem_count != phy_t_list[i].phy_reset_problem_count or phy_list[ i].running_disparity_error_count != phy_t_list[i].running_disparity_error_count: collect = True if not collect: continue print("Phy err increased.Start collect logs to /var/log/iads-monitor-log.log......") phy_t_list = phy_list messages_str = exe_shell("cat /var/log/messages|grep -iP '((i/o error)|(sector [0-9]+))'") i_times += 1 time_t = exe_shell("date") lsi_str = exe_shell("lsiutil.x86_64_171 -p 1 -a 64,1,,debuginfo,exit,0") hba_str = exe_shell("lsiutil.x86_64_171 -p 1 -a 65,,'pl dbg',exit,0") tmp_str = "\n%s\ndmesg:\n%s\nmessages:\n%s\nbefore_lsi_str:\n%s\nafter_lsi_str:\n%s\nbefore_hba_lig:\n%s\nafter_hba_log:\n%s\n" % ( time_t, dmesg_str, messages_str, g_lsi_str, lsi_str, g_hba_str, hba_str) with open(log_path, "a") as fp: fp.write(tmp_str) fp.writelines("\n\n\nsmart info:\n") for case in ("", "a", "b", ): for i in string.lowercase: write_a(log_path, "\nsd%s%s\n" % (case, i)) exe_shell("smartctl -x /dev/sd%s%s >> /var/log/iads-monitor-log.log" % (case, i)) exe_shell("lsigetlunix.sh") break
def init_disk(self): for i in self.disks: linux.exe_shell("dd if=/dev/zero of=/dev/%s bs=1M count=100" % i) linux.exe_shell("parted /dev/%s -s mklabel gpt" % i)
def get_from_sas_disk_smart_i_str(disk_name): return linux.exe_shell("smartctl -i /dev/%s" % disk_name)
def fill_attrs(self): smart_str = linux.exe_shell("smartctl -a /dev/%s" % self.dev_name) smartx_str = linux.exe_shell("smartctl -x /dev/%s" % self.dev_name) self.smart = smartx_str try: self.model = linux.search_regex_one_line_string_column( smart_str, "(?:Device Model|Product):.+", ":", 1).strip() self.fw = linux.search_regex_one_line_string_column( smart_str, "(?:Firmware|Revision).+", ":", 1).strip() self.vendor = linux.search_regex_one_line_string_column( smart_str, "(?:SATA Ver|Vendor).+", ":", 1).split()[0].strip() self.sn = linux.search_regex_one_line_string_column( smart_str, "Serial (?:N|n)umber.+", ":", 1).strip() map_temp = self.map_disk_wwn_hctl(self.dev_name) self.wwn = map_temp["wwn"] if map_temp is not None else "" self.hctl = map_temp["hctl"] if map_temp is not None else "" rotational = linux.read_file( os.path.join("/sys/block", self.dev_name, "queue/rotational")) if rotational.strip() == "0": self.flash = True except IOError: print("%s read_file rotational err." % self.dev_name) except Exception: print("disk %s is not exists." % self.dev_name) # fill in smart_attr # ========================================================================== # SAS disk # smart_attr: { # 'channel0Error': { # 'Invalid DWORD count': '0', # 'Loss of DWORD synchronization': '0', # 'Phy reset problem': '0', # 'Running disparity error count': '0' # } # 'channel1Error': { # 'Invalid DWORD count': '0', # 'Loss of DWORD synchronization': '0', # 'Phy reset problem': '0', # 'Running disparity error count': '0' # } # 'read': { # 'byte10_9': '59036.419', # 'correctionAlgorithmInvocations': '414271', # 'errorEccByRereadsRewrite': '0', # 'errorEccDelayed': '8', # 'errorEccFast': '0', # 'totalErrorsCorrected': '8', # 'totalUncorrectedError': '0' # } # 'verify': { # 'byte10_9': '59036.419', # 'correctionAlgorithmInvocations': '414271', # 'errorEccByRereadsRewrite': '0', # 'errorEccDelayed': '8', # 'errorEccFast': '0', # 'totalErrorsCorrected': '8', # 'totalUncorrectedError': '0' # } # 'write': { # 'byte10_9': '59036.419', # 'correctionAlgorithmInvocations': '414271', # 'errorEccByRereadsRewrite': '0', # 'errorEccDelayed': '8', # 'errorEccFast': '0', # 'totalErrorsCorrected': '8', # 'totalUncorrectedError': '0' # } # } # # SATA disk # smart_attr: { # 'Raw_Read_Error_Rate': { # 'ID': '1', # 'FLAG': '0x000f', # 'VALUE': '074', # 'WORST': '063', # 'THRESH': '044', # 'TYPE': 'Pre-fail', # 'UPDATED': 'Always', # 'WHEN_FAILED': '-', # 'RAW_VALUE': '26816470' # } # 'Spin_Up_Time': { # ...(According to the following form) # } # } # SATA smart form: # ID# ATTRIBUTE_NAME FLAG VALUE WORST THRESH TYPE UPDATED WHEN_FAILED RAW_VALUE # 1 Raw_Read_Error_Rate 0x000f 074 063 044 Pre-fail Always - 26816470 # 3 Spin_Up_Time 0x0003 094 094 000 Pre-fail Always - 0 # 4 Start_Stop_Count 0x0032 100 100 020 Old_age Always - 314 # 5 Reallocated_Sector_Ct 0x0033 100 100 036 Pre-fail Always - 1 # 7 Seek_Error_Rate 0x000f 073 060 030 Pre-fail Always - 21595176 # 9 Power_On_Hours 0x0032 096 096 000 Old_age Always - 3851 # 10 Spin_Retry_Count 0x0013 100 100 097 Pre-fail Always - 0 # 12 Power_Cycle_Count 0x0032 100 100 020 Old_age Always - 271 # 184 End-to-End_Error 0x0032 100 100 099 Old_age Always - 0 # 187 Reported_Uncorrect 0x0032 100 100 000 Old_age Always - 0 # 188 Command_Timeout 0x0032 100 100 000 Old_age Always - 0 # 189 High_Fly_Writes 0x003a 100 100 000 Old_age Always - 0 # 190 Airflow_Temperature_Cel 0x0022 064 057 045 Old_age Always - 36 (Min/Max 24/40) # 191 G-Sense_Error_Rate 0x0032 100 100 000 Old_age Always - 0 # 192 Power-Off_Retract_Count 0x0032 100 100 000 Old_age Always - 147 # 193 Load_Cycle_Count 0x0032 099 099 000 Old_age Always - 2690 # 194 Temperature_Celsius 0x0022 036 043 000 Old_age Always - 36 (0 11 0 0 0) # 195 Hardware_ECC_Recovered 0x001a 110 099 000 Old_age Always - 26816470 # 197 Current_Pending_Sector 0x0012 100 100 000 Old_age Always - 0 # 198 Offline_Uncorrectable 0x0010 100 100 000 Old_age Offline - 0 # 199 UDMA_CRC_Error_Count 0x003e 200 200 000 Old_age Always - 0 # # =========================================================================== if "SAS" in smart_str: self.type = "SAS" smart_str_arr = linux.search_regex_strings( smart_str, " *(?:write:|read:|verify:).+") for line in smart_str_arr: tmp = line.split() dict_tmp = { "errorEccFast": tmp[1].strip(), "errorEccDelayed": tmp[2].strip(), "errorEccByRereadsRewrite": tmp[3].strip(), "totalErrorsCorrected": tmp[4].strip(), "correctionAlgorithmInvocations": tmp[5].strip(), "byte10_9": tmp[6].strip(), "totalUncorrectedError": tmp[7].strip() } self.smart_attr[tmp[0].replace(":", " ").strip()] = dict_tmp smart_str_arr = linux.search_regex_strings( self.smart, "(?:Invalid DWORD|Running disparity|Loss of DWORD|Phy reset problem).+=.+" ) i = 0 dict_tmp = {} for it in smart_str_arr: tmp = it.split("=") dict_tmp[tmp[0].strip()] = tmp[1].strip() if 3 == i: self.smart_attr["channel0Error"] = dict_tmp dict_tmp = {} if 7 == i: self.smart_attr["channel1Error"] = dict_tmp dict_tmp = {} i += 1 # fill in age # 'data_gb' is float number # age: { # 'start_stop_count': '10', # 'data_gb': '5999' # } if isinstance(self.smart, str) and ("start-stop" in self.smart): self.age[ "start_stop_count"] = linux.search_regex_one_line_string_column( self.smart, ".+start-stop.+", ":", 1) all_gb = float(self.smart_attr["read"]["byte10_9"]) + float( self.smart_attr["write"]["byte10_9"]) + float( self.smart_attr["verify"]["byte10_9"]) self.age["data_gb"] = str(all_gb) if "SATA" in smart_str: self.type = "SATA" dict_tmp = linux.search_regex_strings( smart_str, ".*[0-9]+.+0x.+(?:In_the_past|-|FAILING_NOW) +[0-9]+") for line in dict_tmp: tmp = line.split() dict_tmp = { "ID": tmp[0].strip(), "FLAG": tmp[2].strip(), "VALUE": tmp[3].strip(), "WORST": tmp[4].strip(), "THRESH": tmp[5].strip(), "TYPE": tmp[6].strip(), "UPDATED": tmp[7].strip(), "WHEN_FAILED": tmp[8].strip(), "RAW_VALUE": tmp[9].strip(), } self.smart_attr[tmp[1]] = dict_tmp if "Start_Stop_Count" in self.smart_attr: self.age["start_stop_count"] = self.smart_attr[ "Start_Stop_Count"]["RAW_VALUE"] self.age["power_on_hours"] = self.smart_attr["Power_On_Hours"][ "RAW_VALUE"]
def show_mem_model(): if not bin_exists("dmidecode"): print("dmidecode is not exists, please install dmidecode.") return dmi_info = exe_shell("dmidecode --type memory") print(search_regex_one_line_string_column(dmi_info, ".*Part Number:.*", ":", 1))