def get_stripe_mult_info(stripe_mult_consts, runtime=None, pass_count=2): log = loglib.get_logger() # Get appropriate stripe multiplier override value (from options) stripe_mult_override = False if log.get_data_value('diskscrub_mult'): # Non-zero, Non-None stripe_mult_override = log.get_data_value('diskscrub_mult') if log.get_data_value('diskscrub_full'): stripe_mult_override = 1 # 100%, full drive testing # Get disk devices try: driveinfo = drivelib.DriveInfo() except drivelib.DriveInfoException, e: log.fail('Unable to get disk information: %s' % e, 0) return # hard fail
def run(reps = None): log = loglib.get_logger() if reps == None: while 1: reps_default = 5 reps = log.prompt('How many iterations would you like ' + 'performed? (%d): ' % reps_default) try: if reps == '': reps = reps_default reps = int(reps) if reps <= 0: raise ValueError log.out('') break except (ValueError, TypeError): log.out('Invalid entry. Please enter a positive integer.') log.out('Test iterations: %d' % reps) log.out('') log.out('Connecting to LCD') # Kill isi_lcd_d, no questions asked lcd_d_stop() time.sleep(10) # Attempt to get our LCD object try: lcd = display.getDisplay() except Exception, e: log.fail('Unable to initialize LCD object', 0) if e: log.fail('%s' % e, 0) lcd_d_start() return
def run(reps = None, sections = 125, checkpoint = 0): log = loglib.get_logger() ramdir = ramdisk.disk_dir() nvramdev = hal.getNvramDevicePath() randfile = '/dev/random' blocksize = 1024 * 1024 # see dd cmd, bs=1024k nvramsize = nvram.get_nvram_size() expd_nvramsizes = nvram.get_nvram_sizes() if not nvramdev: log.fail("No nvram device available for test", 0) return if not expd_nvramsizes: log.fail("No official nvram specs for hardware family '%s'" % hwver.hwgenName(hwver.hwgen), 0) return # Validate or prompt for reps if reps != None: try: reps = int(reps) if reps <= 0: raise ValueError() except ValueError: log.fail('Invalid reps parameter (%s)' % reps) return # hard fail else: while 1: reps = log.prompt('Iteration count (%d): ' % default_reps) if reps == '': reps = default_reps try: reps = int(reps) if reps <= 0: raise ValueError() log.out('') break except ValueError: log.out('Invalid iteration count; please enter a positive ' + 'integer') # Init the ram disk (moved from isi_mfg_check script) # XXX This ramdisk lib is crap, and needs serious attention ramdisk.init() log.out('') # clear ECC counts to 0 before test if nvram.pre_test_clear_ecc_errors(): return # hard fail log.out('') # Open the nvram log file for appending. After writing to it, be # sure to flush it before any calls to 'echo xyz >> loglib.nvram_log()', # otherwise output can get out of sync. try: nvramlog = open(loglib.nvram_log(), 'a') except IOError: log.fail('Unable to open nvram log file for appending: %s' % loglib.nvram_log(), 0) return # hard fail def nvramlogwrite(output): if isinstance(output, basestring): output = [output] for out in output: nvramlog.write("%s\n" % out) nvramlog.flush() def testString(iter, sec=None): test = "Test %d of %d" % (iter+1, reps) if not sec is None: test += ": sec %d:" % (sec) return test def ddexec(iter, sec, ddcmd): cmd = 'dd if=%s of=%s bs=1024k count=4' % (ddcmd['if'], ddcmd['of']) if ddcmd['option']: cmd += ' %s' % ddcmd['option'] # Note previously this cmd was piped: 1>/dev/null 2>/dev/null (error, output) = procs.get_cmd_output(cmd) try: outerr = output[0] except: outerr = '' if error: out_str = ('%s: dd from %s to %s failed: %s' % (testString(iter,sec), ddcmd['src'], ddcmd['dest'], outerr)) log.fail(out_str, 0) # Push dd output to logfile output.insert(0, out_str) nvramlogwrite(output) return error def checkjournal(iter, start=True): # Uses: nvramlog, log, reps, checkpoint error = 0 if iter >= checkpoint or iter == -1: if iter == -1: test = 'Pre-Test' out_str = ('%s -- checking journal' % test) else: test = testString(iter) out_str = ('%s -- checking journal (%s)' % (test, start and "start" or "end")) log.out(out_str) nvramlogwrite(out_str) cmd = files.commands['checkjournal'] def echo_func(x): nvramlog.write('%s\n' % x) nvramlog.flush() (error, output) = procs.proc_cmd_output(cmd, echo_func) nvramlog.flush() if not error: volt_fails = nvram.extract_voltage_failures(output) if error or volt_fails: error = 1 out_str = '%s: Journal check failed' % test #procs.get_cmd_output('echo %s >> %s' % # (out_str, loglib.nvram_log())) #nvramlog.flush() nvramlogwrite(out_str) log.fail('%s; see %s file for details' % (out_str, loglib.nvram_log()), 0) return error nvramlogwrite([ "nvramscrub: reps=%s, sections=%s, checkpoint=%s" % (reps, sections, checkpoint), "nvramscrub: ramdir=%s nvramdev=%s randfile=%s" % (ramdir, nvramdev, randfile), "", ]) fail_count = 0 # Tag the syslog with a marker to wrap the test; used for # extract_syslog_entries reporting, see end of test. syslog_marker = sysloglib.init_syslog_marker('nvramscrub') # Do an initial sanity checkjournal, first if checkjournal(iter=-1): fail_count += 1 log.fail('Pre-Test NVRAM checkjournal errors detected', 0) log.out('') # We have a protection limit on dd offsets to prevent dd errors: # /dev/mnv0: end of device # We rely on reported nvram size from hwver; This is checked # independently by safe.id.nvram, but go ahead and report a failure here # (once!) if testing would exceed this limit AND reported size mismatch # the expected safe.id.nvram values. out_str = 'Pre-Test -- checking NVRAM size limits' log.out(out_str) nvramlogwrite(out_str) # max(s in sections loop) = sections-1, but r=s+1, so max(r)=sections start_max = blocksize * (sections*4) if start_max >= nvramsize: # If reported nvramsize is less than expected, report failure if start_max < min(expd_nvramsizes): out_strs = ['- Unable to test at max dd skip offset %dB:' % start_max, '- Detected NVRAM size %dB, Expected %s' % (nvramsize, misc.should_be(map(lambda s: '%dB' % s, expd_nvramsizes)))] for out_str in out_strs: log.fail(out_str, 0) nvramlogwrite(out_str) fail_count += 1 abs_starttime = time.time() #jcc # Run the test loop log.out('[nvramscrub] start: target_run_time=%s seconds' % ( target_run_time )) #print "jcc nvramscrub: start_time '" + str( time.time() ) + "' seconds" for i in range(reps): failed = False r = 0 log.out('%s -- scrubbing journal' % testString(i)) # jcc start time_now = time.time() - abs_starttime time_remain = target_run_time - time_now if ( time_now < target_run_time ): # #print "jcc lcdscrub: remaining_time '" + str( time_remain ) + "' seconds" pass else: log.out('[nvramsrub] end: target_run_time=%s seconds is reached, exited' % (target_run_time)) if fail_count > 0: log.fail('Test failed') else: log.out('All tests succeeded') return if checkjournal(i, start=True): failed = True for s in range(sections): r += 1 start = blocksize * (r*4) if start >= nvramsize: # Don't dd, will get error: /dev/mnv0: end of device continue writefile = '%s/randfilewrite%d%d' % (ramdir, i, s) readfile = '%s/randfileread%d%d' % (ramdir, i, s) ddcommands = [ { 'src': randfile, 'dest': 'ramdisk', 'if': randfile, 'of': writefile, 'option': None, }, { 'src': 'ramdisk', 'dest': 'nvram', 'if': writefile, 'of': nvramdev, 'option': "seek=%d" % (r*4), }, { 'src': 'nvram', 'dest': 'ramdisk', 'if': nvramdev, 'of': readfile, 'option': "skip=%d" % (r*4), }, ] for ddcmd in ddcommands: error = ddexec(i, s, ddcmd) if error: failed = True break # Compare results, if successful dd's above if not error: cmd = 'diff %s %s' % (readfile, writefile) (error, output) = procs.get_cmd_output(cmd) if error: failed = True out_str = '%s: dd result diff failed:' % testString(i,s) log.fail(out_str, 0) # Push failure message to logfile nvramlogwrite(out_str) # Cleanup ramdisk - Always procs.get_cmd_output('rm -f %s %s 1> /dev/null 2> /dev/null' % (readfile, writefile)) if checkjournal(i, start=False): failed = True if failed: fail_count += 1 if fail_count > 0: log.fail('%d of %d tests failed' % (fail_count, reps), 0) # Check NVRAM for ECC errors if nvram.post_test_check_ecc_errors(): fail_count += 1 # Check syslog for NVRAM ECC errors if nvram.check_nvram_syslog_errors(marker=syslog_marker): fail_count += 1 log.out('') if fail_count > 0: log.fail('Test failed') else: log.out('All tests succeeded')
def run(iterations=20, fixed=True, random=False): log = loglib.get_logger() # Set thresholds based on hardware family if memstress_thresh_check: thresh = get_thresh() else: thresh = None if thresh != None: thresh_str = "%d MB/s" % thresh else: thresh_str = None # Bust out some tests mem_reg = re.compile("^Standard memcpy Speed:\s+(?P<speed>\d+(\.\d+)?)", re.IGNORECASE) memstress_tests_all = [("fixed", "prewalk", fixed), ("random", "prerand", random)] info = dict([(t[0], t[1:]) for t in memstress_tests_all]) memstress_opt = lambda t: t in info and info[t][0] or "" # XXX Pychecker: memstress_enb not used (verified). # Investigate (validate) before final code deletion. # memstress_enb = lambda t: t in info and info[t][1] # Order in tests matters: run prewalk before prerand memstress_tests = [] for (test, opt, enb) in memstress_tests_all: if enb: memstress_tests.append(test) results = dict( zip(memstress_tests, [{"values": [], "fail_count": 0, "test_iterations": 0} for test in memstress_tests]) ) def speed_average(values): if values: count = len(values) speed = reduce(lambda x, y: x + y, values) speed /= count speed = int(speed) else: speed = 0 return speed def speed_minimum(values): if values: speed = min(values) else: speed = 0 return speed def speed_maximum(values): if values: speed = max(values) else: speed = 0 return speed def speed_threshold(values): return thresh_str speed_types = [ ("minimum", speed_minimum), ("maximum", speed_maximum), ("average", speed_average), ("threshold", speed_threshold), ] def below_threshold_count(values, threshold): return len(filter(lambda s: s < threshold, values)) # Get formatting widths and funcs (pretty print alignments) test_width = memstress_tests and max(map(len, info)) or 0 iter_width = len(str(iterations)) speed_type_width = max(map(len, zip(*speed_types)[0])) def format_test_info(test, width): return "%-*s pattern test" % (width, test.capitalize()) def format_iter_info(): return "Iterations per test" def format_perf_info(): return "Performance target" def format_header(test, width, iteration, iterations): return "%-*s Iteration %*d of %d" % (width, test.capitalize(), iter_width, iteration + 1, iterations) def format_speed_result(test, width, speed_type): return "%-*s %s speed" % (width, test.capitalize(), speed_type.capitalize()) def format_thresh_result(test, width): return "%-*s Below Threshold Count" % (width, test.capitalize()) info_width = max(map(len, [format_test_info("test", test_width), format_iter_info(), format_perf_info()])) header_width = len(format_header("x", test_width, 0, iterations)) result_width = max( map(len, [format_speed_result("x", test_width, "x" * speed_type_width), format_thresh_result("x", test_width)]) ) format_width = max(info_width, header_width, result_width) # Re-set all specific widths that we wish aligned (info?) info_width = header_width = result_width = format_width # Log test info for (test, opt, enb) in memstress_tests_all: log.out("%-*s: %s" % (info_width, format_test_info(test, test_width), enb and "True" or "False")) log.out("%-*s: %d" % (info_width, format_iter_info(), iterations)) if thresh: log.out("%-*s: %s" % (info_width, format_perf_info(), thresh_str)) else: log.out("%-*s: %s" % (info_width, format_perf_info(), "No performance target.")) log.out("") log.out("[memscrub] start : target_run_time is=%s seconds" % (target_run_time)) for test in memstress_tests: opt = memstress_opt(test) test_iterations = iterations # If both fixed and random pattern tests are used, # only run a single iteration of the fixed pattern. if test == "fixed" and random: test_iterations = 1 results[test]["test_iterations"] = test_iterations log.out("Running %s pattern memscrub with %s" % (test, opt)) # print "jcc memscrub: start_time '" + str( time.time() ) + "' seconds" for i in xrange(test_iterations): # jcc start time_now = time.time() - abs_starttime time_remain = target_run_time - time_now if time_now < target_run_time: pass else: # print " : jcc memscrub: remaining_time '" + str( time_remain ) + "' seconds" log.out("[memscrub] end: target_run_time=%s seconds is reached, exited" % (target_run_time)) break # jcc end (error, output) = procs.get_cmd_output( "/%s -once -verify -%s " "-iters %d" % (files.commands["memstress"], opt, memstress_iters) ) header = format_header(test, test_width, i, test_iterations) if not error and len(output) > 0: speed = None for line in output: match = mem_reg.match(line) if match: speed = float(match.group("speed")) break try: speed = int(speed) except (ValueError, TypeError): speed = None if speed != None: # Note fail is optional, depending on thresh fail = thresh != None and speed < thresh results[test]["values"].append(speed) log.out( "%-*s: %s%d%s MB/s" % (header_width, header, (fail and "(" or " "), speed, (fail and ")" or " ")) ) else: log.out("%-*s: error" % (header_width, header)) log.fail("Unable to extract speed info from memstress " "output: %s" % output, 0) results[test]["fail_count"] += 1 else: log.out("%-*s: error" % (header_width, header)) log.fail("Error running memstress command (error %d)" % error, 0) results[test]["fail_count"] += 1 fail_count = 0 for test in memstress_tests: values = results[test]["values"] fail_count += results[test]["fail_count"] speed_data = dict([(t, f(values)) for (t, f) in speed_types]) speed_width = max(map(len, map(str, filter(lambda v: isinstance(v, int), speed_data.values())))) log.out("") for speed_type in zip(*speed_types)[0]: if not thresh and speed_type == "threshold": continue speed = speed_data.get(speed_type) if isinstance(speed, int): speed = "%*d MB/s" % (speed_width, speed) log.out("%-*s: %s" % (result_width, format_speed_result(test, test_width, speed_type), speed)) # Note use of len(values): if we have errors, with no valid speed, # then we will correctly report total count len(values) < iterations. log.out( "%-*s: %d of %d" % (result_width, format_thresh_result(test, test_width), below_threshold_count(values, thresh), len(values)) ) if results[test]["fail_count"] > 0: log.fail( "%d of %d %s memscrub iterations failed to execute properly" % (results[test]["fail_count"], results[test]["test_iterations"], test), 0, ) speed = speed_data.get("average") if thresh and speed and speed < thresh: # Save this failure indicator globally in fail_count, # so we do not report 'Test passed' at end. fail_count += 1 log.fail( "%-*s Memory speed is %d MB/s, expected at least %d MB/s" % (test_width, test.capitalize(), speed, thresh), 0, ) if fail_count == 0: log.out("Test passed") return fail_count
def run(const_key='long', pass_count=None, partition=None, type=None, stripe_mult=None, runtime=None): #import pdb; pdb.set_trace() #jcc if None in [pass_count, partition, type, stripe_mult]: consts_disk = consts.disk() if not const_key in consts_disk: log = loglib.get_logger() log.fail('Unknown Diskscrub key "%s"' % const_key, 0) return 1 # hard fail consts_disk = consts_disk[const_key] if pass_count is None: pass_count = consts_disk['pass_count'] if partition is None: partition = consts_disk['partition'] if type is None: type = consts_disk['type'] if stripe_mult is None: stripe_mult = consts_disk['stripe_mult'] # For CTO, and eventually everyone (TBD), new stripe_mult method: # Rather than hard-coding 'long' and 'short' stripe_mult values # that are hand-tuned to specific runtimes (e.g. long=8hour), # use a normalized bandwidth factor, per drive, and the desired # test runtime to calculate an appropriate stripe_mult value. # (However, there may be complications; e.g, normalized BW factor # for a given drive may be different for different platforms, # such as a 3.5" SATA drive on a Graham vs. Wingfoot. For now, # only using this for CTO, we don't have any such problems.) # For CTO 6.5.2 release: default long=6hour, not 8hour cto_default_runtimes = {'long':6*60*60, 'short':2*60*60} # jcc start # Hard to put timer, fork() and singal handingly limiter. cto_default_runtimes = consts.disk_target_run_time() log = loglib.get_logger() #import pdb; pdb.set_trace() #jcc # jcc end if hal.supportsCto() and const_key in cto_default_runtimes: if runtime is None: runtime = cto_default_runtimes.get(const_key) result = 0 log.out('[diskscrub] start: target_run_time is %s seconds;' % (runtime)) # jcc # Find an unused diskscrub dir on the remote server localdir = loglib.get_logger().get_unused_logdir('diskscrub', create=False) if localdir is None: return 1 # hard fail # Extract disk-specific stripe_mult values # Use local copy stripe_mult_info for PyChecker warning stripe_mult_info = get_stripe_mult_info(stripe_mult, runtime, pass_count) if stripe_mult_info is None: return 1 # hard fail args = { 'const_key' : const_key, 'partition' : partition, 'scrub_type' : type, 'pass_count' : pass_count, 'output_dir' : localdir, 'prompt_for_range' : False, } # For PyChecker warning: Don't have explanation, but assigning # this value in args declaration above causes this warning: # Modifying parameter (stripe_mult) with a default value may have # unexpected consequences args['stripe_mult'] = stripe_mult_info watcher = watchdog.CPUIdleWatchdog(interval_minutes=10,max_fails=3, max_cpu=98.0) watcher.start('Starting CPU watchdog') try: result = diskscrub.run(**args) finally: watcher.stop('Stopping CPU watchdog') return result