def test_hardware_pstates(ratio_to_control_value): IA32_PERF_CTL = 0x199 with bits.mwait.use_hint(), bits.preserve_msr(IA32_PERF_CTL): MSR_PLATFORM_INFO = 0xce min_ratio = testmsr.MSR("maximum efficiency ratio", bits.bsp_apicid(), MSR_PLATFORM_INFO, highbit=47, lowbit=40)[0] max_ratio = testmsr.MSR("max non-turbo ratio", bits.bsp_apicid(), MSR_PLATFORM_INFO, highbit=15, lowbit=8)[0] # Get the Turbo Mode Availability flag turbo_mode_available = bits.cpuid(bits.bsp_apicid(), 0).eax >= 6 and ( bits.cpuid(bits.bsp_apicid(), 6).eax & 0x2) last_ratio = max_ratio if turbo_mode_available: last_ratio += 1 bclk = testutil.adjust_to_nearest(bits.bclk(), 100.0 / 12) * 1000000 for ratio in range(min_ratio, last_ratio + 1): control_value = ratio_to_control_value(ratio, min_ratio, max_ratio) for apicid in bits.cpus(): bits.wrmsr(apicid, IA32_PERF_CTL, control_value) turbo = (ratio == max_ratio + 1) if turbo: # Needs to busywait, not sleep start = time.time() while (time.time() - start < 2): pass expected = int(ratio * bclk / 1000000) for duration in (0.1, 1.0): aperf = bits.cpu_frequency(duration)[1] aperf = testutil.adjust_to_nearest(aperf, bclk / 2) aperf = int(aperf / 1000000) if turbo: if aperf >= expected: break else: if aperf == expected: break if turbo: testsuite.test( "Turbo measured frequency {} >= expected {} MHz".format( aperf, expected), aperf >= expected) else: testsuite.test( "Ratio {} measured frequency {} MHz == expected {} MHz". format(ratio, aperf, expected), aperf == expected)
def test_hardware_pstates(ratio_to_control_value): old_mwait = {} try: MSR_PLATFORM_INFO = 0xce IA32_PERF_CTL = 0x199 min_ratio = testmsr.MSR("maximum efficiency ratio", bits.bsp_apicid(), MSR_PLATFORM_INFO, highbit=47, lowbit=40)[0] max_ratio = testmsr.MSR("max non-turbo ratio", bits.bsp_apicid(), MSR_PLATFORM_INFO, highbit=15, lowbit=8)[0] # Get the Turbo Mode Availability flag turbo_mode_available = bits.cpuid(bits.bsp_apicid(),0).eax >= 6 and (bits.cpuid(bits.bsp_apicid(),6).eax & 0x2) last_ratio = max_ratio if turbo_mode_available: last_ratio += 1 duration = last_ratio - min_ratio + 1 if turbo_mode_available: duration += 2 print "Test duration is ~{} seconds...".format(duration) # Force use of MWAIT C3 hint = 0x20 cpus = bits.cpus() for apicid in cpus: old_mwait[apicid] = bits.get_mwait(apicid) bits.set_mwait(apicid, True, hint) bclk = testutil.adjust_to_nearest(bits.bclk(), 100.0/12) * 1000000 for ratio in range(min_ratio, last_ratio + 1): control_value = ratio_to_control_value(ratio, min_ratio, max_ratio) for apicid in cpus: bits.wrmsr(apicid, IA32_PERF_CTL, control_value) if ratio == max_ratio + 1: # Needs to busywait, not sleep start = time.time() while (time.time() - start < 2): pass aperf = bits.cpu_frequency()[1] aperf = testutil.adjust_to_nearest(aperf, bclk/2) aperf = int(aperf / 1000000) expected = int(ratio * bclk / 1000000) if ratio == max_ratio + 1: testsuite.test("Turbo measured frequency {} >= expected {} MHz".format(aperf, expected), aperf >= expected) else: testsuite.test("Ratio {} measured frequency {} MHz == expected {} MHz".format(ratio, aperf, expected), aperf == expected) finally: for apicid, old_mwait_values in old_mwait.iteritems(): bits.set_mwait(apicid, *old_mwait_values)
def show_autodemotion(): with ttypager.page(): if bitfields.getbits(bits.rdmsr(bits.bsp_apicid(), 0xe2), 26, 25) == 0x3: print("C1 and C3 autodemotion are enabled") else: print("C1 and C3 autodemotion are disabled")
def variable_mtrrs(apicid=bits.bsp_apicid()): assert apicid in bits.cpus() ia32_mtrrcap_msr = IA32_MTRRCAP( bits.rdmsr(apicid, IA32_MTRRCAP_REG) ) ia32_mtrr_def_type_msr = IA32_MTRR_DEF_TYPE(bits.rdmsr(apicid, IA32_MTRR_DEF_TYPE_REG)) with ttypager.page(): print("Summary:") print("Default memory type: {}".format(_memory_type_str(ia32_mtrr_def_type_msr.type))) for i in range(ia32_mtrrcap_msr.VCNT): ia32_mtrr_physbase_msr = IA32_MTRR_PHYSBASE(bits.rdmsr(apicid, IA32_MTRR_PHYSBASEn_REG(i))) ia32_mtrr_physmask_msr = IA32_MTRR_PHYSMASK(bits.rdmsr(apicid, IA32_MTRR_PHYSMASKn_REG(i))) if (ia32_mtrr_physmask_msr.V): print("MTRR{}: type={:20} base={:10} size={:10}".format(i, _memory_type_str(ia32_mtrr_physbase_msr.Type), _physbase_str(ia32_mtrr_physbase_msr.PhysBase), _physmask_str(ia32_mtrr_physmask_msr.PhysMask))) print() print(ia32_mtrrcap_msr, end='\n\n') print(ia32_mtrr_def_type_msr, end='\n\n') for i in range(ia32_mtrrcap_msr.VCNT): msr_num = IA32_MTRR_PHYSBASEn_REG(i) ia32_mtrr_physbase_msr = IA32_MTRR_PHYSBASE( bits.rdmsr(apicid, msr_num) ) print("IA32_MTRR_PHYSBASE[{}] MSR {:#x}".format(i, msr_num)) print(ia32_mtrr_physbase_msr, end='\n\n') msr_num = IA32_MTRR_PHYSMASKn_REG(i) ia32_mtrr_physmask_msr = IA32_MTRR_PHYSMASK( bits.rdmsr(apicid, msr_num) ) print("IA32_MTRR_PHYSMASK[{}] MSR {:#x}".format(i, msr_num)) print(ia32_mtrr_physmask_msr, end='\n\n')
def residency(residency_counters, residency_counter_msr, sleep_time=3): bsp_apicid = bits.bsp_apicid() sockets = dict( (skt_index, min(apic_list)) for skt_index, apic_list in bits.socket_apic_ids().iteritems()) rc = {} rc_start = {} delta = {} def read_rc(apic_id): return residency_counters(*(bits.rdmsr(apic_id, msr) for msr in residency_counter_msr)) tsc = bits.rdmsr(bsp_apicid, 0x10) for apic_id in sockets.itervalues(): rc_start[apic_id] = read_rc(apic_id) bits.blocking_sleep(sleep_time * 1000 * 1000) tsc = float(bits.rdmsr(bsp_apicid, 0x10) - tsc) for apic_id in sockets.itervalues(): rc[apic_id] = read_rc(apic_id) for apic_id in sockets.itervalues(): delta[apic_id] = residency_counters( *((end - start) / tsc for end, start in zip(rc[apic_id], rc_start[apic_id]))) return delta
def power_opt_low_power_profile(): # If CPUID.06H:ECX.SETBH[bit 3] is set, then the processor supports # performance-energy bias preference and IA32_ENERGY_PERF_BIAS (1B0H) MSR setbh = (bits.cpuid(bits.bsp_apicid(),6).ecx >> 3) & 1 if not setbh: return testmsr.test_msr_consistency("Energy Performance Bias", 0x1b0, mask=0xf) testmsr.test_msr("Energy Performance Bias 12-15", 0x1b0, expected_value=3, shift=2, mask=3)
def turbo_msr(ratio, min_ratio, max_ratio): MSR_TURBO_RATIO_LIMIT = 0x1ad if ratio > max_ratio: ratio = testmsr.MSR("turbo ratio", bits.bsp_apicid(), MSR_TURBO_RATIO_LIMIT, highbit=7, lowbit=0)[0] return ratio << 8
def test_hardware_pstates(ratio_to_control_value): IA32_PERF_CTL = 0x199 with bits.mwait.use_hint(), bits.preserve_msr(IA32_PERF_CTL): MSR_PLATFORM_INFO = 0xce min_ratio = testmsr.MSR("maximum efficiency ratio", bits.bsp_apicid(), MSR_PLATFORM_INFO, highbit=47, lowbit=40)[0] max_ratio = testmsr.MSR("max non-turbo ratio", bits.bsp_apicid(), MSR_PLATFORM_INFO, highbit=15, lowbit=8)[0] # Get the Turbo Mode Availability flag turbo_mode_available = bits.cpuid(bits.bsp_apicid(),0).eax >= 6 and (bits.cpuid(bits.bsp_apicid(),6).eax & 0x2) last_ratio = max_ratio if turbo_mode_available: last_ratio += 1 bclk = testutil.adjust_to_nearest(bits.bclk(), 100.0/12) * 1000000 for ratio in range(min_ratio, last_ratio + 1): control_value = ratio_to_control_value(ratio, min_ratio, max_ratio) for apicid in bits.cpus(): bits.wrmsr(apicid, IA32_PERF_CTL, control_value) turbo = (ratio == max_ratio + 1) if turbo: # Needs to busywait, not sleep start = time.time() while (time.time() - start < 2): pass expected = int(ratio * bclk / 1000000) for duration in (0.1, 1.0): aperf = bits.cpu_frequency(duration)[1] aperf = testutil.adjust_to_nearest(aperf, bclk/2) aperf = int(aperf / 1000000) if turbo: if aperf >= expected: break else: if aperf == expected: break if turbo: testsuite.test("Turbo measured frequency {} >= expected {} MHz".format(aperf, expected), aperf >= expected) else: testsuite.test("Ratio {} measured frequency {} MHz == expected {} MHz".format(ratio, aperf, expected), aperf == expected)
def toggle_autodemotion(): value = bits.rdmsr(bits.bsp_apicid(), 0xe2) if bitfields.getbits(value, 26, 25) == 0x3: fieldvalue = 0 else: fieldvalue = 0x3 value = bitfields.setbits(value, fieldvalue, 26, 25) for cpu in bits.cpus(): bits.wrmsr(cpu, 0xe2, value) show_autodemotion()
def max_phys_addr(): """Return the max physical address width, in bits. Computed on first call, and cached for subsequent return.""" global max_phys_addr max_extended_leaf = bits.cpuid(bits.bsp_apicid(), 0x80000000).eax if max_extended_leaf >= 0x80000008: # cpuid.(eax=0x80000008).eax[7:0] = max physical-address width supported by the processor local_max_phys_addr = bitfields.getbits(bits.cpuid(bits.bsp_apicid(), 0x80000008).eax, 7, 0) elif bitfields.getbits(bits.cpuid(bits.bsp_apicid(), 1).edx, 6): # PAE supported local_max_phys_addr = 36 else: local_max_phys_addr = 32 old_func = max_phys_addr def max_phys_addr(): return local_max_phys_addr functools.update_wrapper(max_phys_addr, old_func) return local_max_phys_addr
def test_pm_generic_profile(): testmsr.test_msr_consistency("Max non-turbo ratio must be consistent", 0xce, mask=0xff00) testpci.test_pci("Bus master disable", 0, 31, 0, 0xa9, bytes=1, shift=2, mask=1, expected_value=1) testmsr.test_msr("C1 Auto Undemotion Enable", 0xe2, shift=28, mask=1, expected_value=1) testmsr.test_msr("C3 Auto Undemotion Enable", 0xe2, shift=27, mask=1, expected_value=1) testmsr.test_msr("C1 Auto Demotion Enable", 0xe2, shift=26, mask=1, expected_value=1) testmsr.test_msr("C3 Auto Demotion Enable", 0xe2, shift=25, mask=1, expected_value=1) testmsr.test_msr("IO MWAIT Redirection Enable", 0xe2, shift=10, mask=1, expected_value=1) testmsr.test_msr("C1E Enable", 0x1fc, shift=1, mask=1, expected_value=1) testmsr.test_msr("EIST Enable", 0x1a0, shift=16, mask=1, expected_value=1) testmsr.test_msr("Turbo Enable", 0x1a0, shift=38, mask=1, expected_value=0) testmsr.test_msr("EIST Hardware Coordination Enable", 0x1aa, mask=1, expected_value=0) testmsr.test_msr_consistency("IO Capture C-state Range Consistent", 0xe4, shift=16, mask=7) io_capture_range, io_capture_range_str = testmsr.MSR( "IO Capture C-state Range", bits.bsp_apicid(), 0xe4, 18, 16) testsuite.test("IO Capture C-state Range <= 2", io_capture_range <= 2) testsuite.print_detail(io_capture_range_str)
def power_opt_low_power_profile(): # If CPUID.06H:ECX.SETBH[bit 3] is set, then the processor supports # performance-energy bias preference and IA32_ENERGY_PERF_BIAS (1B0H) MSR setbh = (bits.cpuid(bits.bsp_apicid(), 6).ecx >> 3) & 1 if not setbh: return testmsr.test_msr_consistency("Energy Performance Bias", 0x1b0, mask=0xf) testmsr.test_msr("Energy Performance Bias 12-15", 0x1b0, expected_value=3, shift=2, mask=3)
def test_pm_generic_profile(): testmsr.test_msr_consistency("Max non-turbo ratio must be consistent", 0xce, mask=0xff00) testpci.test_pci("Bus master disable", 0, 31, 0, 0xa9, bytes=1, shift=2, mask=1, expected_value=1) testmsr.test_msr("C1 Auto Demotion Enable", 0xe2, shift=26, mask=1, expected_value=1) testmsr.test_msr("C3 Auto Demotion Enable", 0xe2, shift=25, mask=1, expected_value=1) testmsr.test_msr("IO MWAIT Redirection Enable", 0xe2, shift=10, mask=1, expected_value=1) testmsr.test_msr("C1E Enable", 0x1fc, shift=1, mask=1, expected_value=1) testmsr.test_msr("EIST Enable", 0x1a0, shift=16, mask=1, expected_value=1) testmsr.test_msr("Turbo Enable", 0x1a0, shift=38, mask=1, expected_value=0) testmsr.test_msr("EIST Hardware Coordination Enable", 0x1aa, mask=1, expected_value=0) testmsr.test_msr_consistency("IO Capture C-state Range Consistent", 0xe4, shift=16, mask=7) io_capture_range, io_capture_range_str = testmsr.MSR("IO Capture C-state Range", bits.bsp_apicid(), 0xe4, 18, 16) testsuite.test("IO Capture C-state Range <= 2", io_capture_range <= 2) testsuite.print_detail(io_capture_range_str)
def residency(residency_counters, residency_counter_msr, sleep_time=3): bsp_apicid = bits.bsp_apicid() sockets = dict((skt_index, min(apic_list)) for skt_index, apic_list in bits.socket_apic_ids().iteritems()) rc = {} rc_start = {} delta = {} def read_rc(apic_id): return residency_counters(*(bits.rdmsr(apic_id, msr) for msr in residency_counter_msr)) tsc = bits.rdmsr(bsp_apicid, 0x10) for apic_id in sockets.itervalues(): rc_start[apic_id] = read_rc(apic_id) bits.blocking_sleep(sleep_time*1000*1000) tsc = float(bits.rdmsr(bsp_apicid, 0x10) - tsc) for apic_id in sockets.itervalues(): rc[apic_id] = read_rc(apic_id) for apic_id in sockets.itervalues(): delta[apic_id] = residency_counters(*((end - start) / tsc for end, start in zip(rc[apic_id], rc_start[apic_id]))) return delta
def is_cpu(): return bits.cpuid(bits.bsp_apicid(), 1).eax & ~0xf == 0x106c0
def is_cpu(): return bits.cpuid(bits.bsp_apicid(), 1).eax & ~0xF == 0x206D0
def smi_latency(): IA32_TSC_MSR = 0x10 MSR_SMI_COUNT = 0x34 bsp_apicid = bits.bsp_apicid() if bits.rdmsr(bsp_apicid, IA32_TSC_MSR) is None: raise RuntimeError("Reading of IA32_TSC MSR caused a GPF") print "Warning: touching the keyboard can affect the results of this test." print "Starting pass #1. Calibrating the TSC." start = time.time() tsc1 = bits.rdmsr(bsp_apicid, IA32_TSC_MSR) while time.time() - start < 1: pass stop = time.time() tsc2 = bits.rdmsr(bsp_apicid, IA32_TSC_MSR) tsc_per_sec = (tsc2 - tsc1) / (stop - start) tsc_per_usec = tsc_per_sec / (1000*1000) def show_time(tscs): units = [(1000*1000*1000, "ns"), (1000*1000, "us"), (1000, "ms")] for divisor, unit in units: temp = tscs / (tsc_per_sec / divisor) if temp < 10000: return "{}{}".format(int(temp), unit) return "{}s".format(int(tscs / tsc_per_sec)) bins = [long(tsc_per_usec * 10**i) for i in range(9)] bin_descs = [ "0 < t <= 1us", "1us < t <= 10us", "10us < t <= 100us", "100us < t <= 1ms", "1ms < t <= 10ms", "10ms < t <= 100ms", "100ms < t <= 1s ", "1s < t <= 10s ", "10s < t <= 100s ", "100s < t ", ] print "Starting pass #2. Wait here, I will be back in 15 seconds." (max_latency, smi_count_delta, bins) = bits.smi_latency(long(15 * tsc_per_sec), bins) BinType = namedtuple('BinType', ("max", "total", "count", "times")) bins = [BinType(*b) for b in bins] testsuite.test("SMI latency < 150us to minimize risk of OS timeouts", max_latency / tsc_per_usec <= 150) if not testsuite.show_detail(): return for bin, desc in zip(bins, bin_descs): if bin.count == 0: continue testsuite.print_detail("{}; average = {}; count = {}".format(desc, show_time(bin.total/bin.count), bin.count)) deltas = (show_time(t2 - t1) for t1,t2 in zip(bin.times, bin.times[1:])) testsuite.print_detail(" Times between first few observations: {}".format(" ".join("{:>6}".format(delta) for delta in deltas))) if smi_count_delta is not None: testsuite.print_detail("{} SMI detected using MSR_SMI_COUNT (MSR {:#x})".format(smi_count_delta, MSR_SMI_COUNT)) testsuite.print_detail("Summary of impact: observed maximum latency = {}".format(show_time(max_latency)))
def log_sysinfo(): with redirect.logonly(): signature = bits.cpuid(bits.bsp_apicid(), 1).eax print "Processor signature {:#x}, detected CPU as {}".format( signature, cpulib.name)
def _display_apicid(apicid): """Format APIC IDs to display in hex. Indicate if the APIC ID is the BSP APIC ID""" s = '{:#x}'.format(apicid) if apicid == bits.bsp_apicid(): return s + ' (BSP)' return s
def _apickey(apicid): """Key function to sort the BSP APIC ID first.""" if apicid == bits.bsp_apicid(): return -1 return apicid
def generate_mwait_menu(): global created_mwait_menu global supported_mwaits_msg if created_mwait_menu: return cfg = "" cfg += 'menuentry "Test round-trip latency via MWAIT" {\n' cfg += " py 'import mwaitmenu; mwaitmenu.test_latency()'\n" cfg += '}\n\n' cfg += 'menuentry "MWAIT disable" {\n' cfg += " py 'import mwaitmenu; mwaitmenu.mwait_callback(False)'\n" cfg += '}\n\n' cfg += 'menuentry "MWAIT enable C0" {\n' cfg += """ py 'import mwaitmenu; mwaitmenu.mwait_callback(True, "C0", 0xf)'\n""" cfg += '}\n\n' edx = 0 if bits.cpuid(bits.bsp_apicid(), 0).eax >= 5: edx = bits.cpuid(bits.bsp_apicid(), 5).edx mwait_table = ( ("C1", 0, 4, 1), ("C1E", 1, 4, 2), ("*C2", 0x10, 8, 1), ("*C3", 0x20, 12, 1), ("*C4", 0x30, 16, 1), ("*C5", 0x40, 20, 1), ("*C6", 0x50, 24, 1), ) supported_mwaits_msg = "" for name, hint, shift, n in mwait_table: if ((edx >> shift) & 0xf) >= n: cfg += 'menuentry "MWAIT enable {}" {{\n'.format(name) cfg += """ py 'import mwaitmenu; mwaitmenu.mwait_callback(True, "{}", {})'\n""".format( name, hint) cfg += '}\n\n' supported_mwaits_msg += "MWAIT {} is supported\n".format(name) else: supported_mwaits_msg += "MWAIT {} is not supported\n".format(name) supported_mwaits_msg += "For more information, see the Intel Software Developers Manual, CPUID leaf 5\n" cfg += 'menuentry "* MWAIT C-state naming is per CPUID leaf 5 & not processor-specific!" {' cfg += " py 'import mwaitmenu; mwaitmenu.show_supported_mwaits()'\n" cfg += '}\n\n' cfg += 'menuentry "Current state of Interrupt Break Event" {\n' cfg += " py 'import mwaitmenu; mwaitmenu.show_interrupt_break_event()'\n" cfg += '}\n\n' cfg += 'menuentry "Toggle Interrupt Break Event" {\n' cfg += " py 'import mwaitmenu; mwaitmenu.toggle_interrupt_break_event()'\n" cfg += '}\n\n' try: cfg += cpulib.generate_mwait_menu() except AttributeError as e: pass bits.pyfs.add_static("mwaitmenu.cfg", cfg) created_mwait_menu = True
def generate_mwait_menu(): global created_mwait_menu global supported_mwaits_msg if created_mwait_menu: return cfg = "" cfg += 'menuentry "Test round-trip latency via MWAIT" {\n' cfg += " py 'import mwaitmenu; mwaitmenu.test_latency()'\n" cfg += '}\n\n' cfg += 'menuentry "MWAIT disable" {\n' cfg += " py 'import mwaitmenu; mwaitmenu.mwait_callback(False)'\n" cfg += '}\n\n' cfg += 'menuentry "MWAIT enable C0" {\n' cfg += """ py 'import mwaitmenu; mwaitmenu.mwait_callback(True, "C0", 0xf)'\n""" cfg += '}\n\n' edx = 0 if bits.cpuid(bits.bsp_apicid(), 0).eax >= 5: edx = bits.cpuid(bits.bsp_apicid(), 5).edx mwait_table = ( ("C1", 0, 4, 1), ("C1E", 1, 4, 2), ("*C2", 0x10, 8, 1), ("*C3", 0x20, 12, 1), ("*C4", 0x30, 16, 1), ("*C5", 0x40, 20, 1), ("*C6", 0x50, 24, 1), ) supported_mwaits_msg = "" for name, hint, shift, n in mwait_table: if ((edx >> shift) & 0xf) >= n: cfg += 'menuentry "MWAIT enable {}" {{\n'.format(name) cfg += """ py 'import mwaitmenu; mwaitmenu.mwait_callback(True, "{}", {})'\n""".format(name, hint) cfg += '}\n\n' supported_mwaits_msg += "MWAIT {} is supported\n".format(name) else: supported_mwaits_msg += "MWAIT {} is not supported\n".format(name) supported_mwaits_msg += "For more information, see the Intel Software Developers Manual, CPUID leaf 5\n" cfg += 'menuentry "* MWAIT C-state naming is per CPUID leaf 5 & not processor-specific!" {' cfg += " py 'import mwaitmenu; mwaitmenu.show_supported_mwaits()'\n" cfg += '}\n\n' cfg += 'menuentry "Current state of Interrupt Break Event" {\n' cfg += " py 'import mwaitmenu; mwaitmenu.show_interrupt_break_event()'\n" cfg += '}\n\n' cfg += 'menuentry "Toggle Interrupt Break Event" {\n' cfg += " py 'import mwaitmenu; mwaitmenu.toggle_interrupt_break_event()'\n" cfg += '}\n\n' try: cfg += cpulib.generate_mwait_menu() except AttributeError as e: pass bits.pyfs.add_static("mwaitmenu.cfg", cfg) created_mwait_menu = True
def is_cpu(): return bits.cpuid(bits.bsp_apicid(),1).eax & ~0xf == 0x106c0
def log_sysinfo(): with redirect.logonly(): signature = bits.cpuid(bits.bsp_apicid(),1).eax print "Processor signature {:#x}, detected CPU as {}".format(signature, cpulib.name)