def gpu_usage_sanity(self): '''Verify that the jobreport output has sensible numbers. This function asserts that the nodes reported are at least a subset of all nodes used by the gpu burn app. Also, the GPU usage is verified by assuming that in the worst case scenario, the usage is near 100% during the burn, and 0% outside the burn period. Lastly, the GPU usage time for each node is also asserted to be greater or equal than the burn time. ''' # Get set with all nodes patt = r'^\s*\[([^\]]*)\]\s*GPU\s*\d+\(OK\)' full_node_set = set(sn.extractall(patt, self.stdout, 1)) # Parse job report data patt = r'^\s*(\w*)\s*(\d+)\s*%\s*\d+\s*MiB\s*\d+:\d+:(\d+)' self.nodes_reported = sn.extractall(patt, self.stdout, 1) usage = sn.extractall(patt, self.stdout, 2, int) time_reported = sn.extractall(patt, self.stdout, 3, int) return sn.all([ sn.assert_ge(sn.count(self.nodes_reported), 1), set(self.nodes_reported).issubset(full_node_set), sn.all( map(lambda x, y: self.burn_time / x <= y, time_reported, usage)), sn.assert_ge(sn.min(time_reported), self.burn_time) ])
def __init__(self): super().__init__() self.modules = ['likwid'] self.valid_prog_environs = ['PrgEnv-gnu'] self.sourcesdir = None self.executable = 'likwid-bench' self.num_tasks = 0 self.num_tasks_per_core = 2 self.system_num_cpus = { 'daint:mc': 72, 'daint:gpu': 24, 'dom:mc': 72, 'dom:gpu': 24, } self.system_numa_domains = { 'daint:mc': ['S0', 'S1'], 'daint:gpu': ['S0'], 'dom:mc': ['S0', 'S1'], 'dom:gpu': ['S0'], } # Test each level at half capacity times nthreads per domain self.system_cache_sizes = { 'daint:mc': { 'L1': '288kB', 'L2': '2304kB', 'L3': '23MB', 'memory': '1800MB' }, 'daint:gpu': { 'L1': '192kB', 'L2': '1536kB', 'L3': '15MB', 'memory': '1200MB' }, 'dom:mc': { 'L1': '288kB', 'L2': '2304kB', 'L3': '23MB', 'memory': '1800MB' }, 'dom:gpu': { 'L1': '192kB', 'L2': '1536kB', 'L3': '15MB', 'memory': '1200MB' }, } self.maintainers = ['SK'] self.tags = {'benchmark', 'diagnostic'} bw_pattern = sn.extractsingle(r'MByte/s:\s*(?P<bw>\S+)', self.stdout, 'bw', float) self.sanity_patterns = sn.assert_ge(bw_pattern, 0.0) self.perf_patterns = {'bandwidth': bw_pattern}
def test_assert_ge_with_deferrables(self): self.assertTrue(sn.assert_ge(3, make_deferrable(1))) self.assertTrue(sn.assert_ge(3, make_deferrable(3))) self.assertRaisesRegex(SanityError, '1 < 3', evaluate, sn.assert_ge(1, make_deferrable(3)))
def test_assert_ge(self): self.assertTrue(sn.assert_ge(3, 1)) self.assertTrue(sn.assert_ge(3, 3)) self.assertRaisesRegex(SanityError, '1 < 3', evaluate, sn.assert_ge(1, 3))
def test_assert_ge_with_deferrables(): assert sn.assert_ge(3, sn.defer(1)) assert sn.assert_ge(3, sn.defer(3)) with pytest.raises(SanityError, match='1 < 3'): sn.evaluate(sn.assert_ge(1, sn.defer(3)))
def test_assert_ge(): assert sn.assert_ge(3, 1) assert sn.assert_ge(3, 3) with pytest.raises(SanityError, match='1 < 3'): sn.evaluate(sn.assert_ge(1, 3))
def validate_test(self): self.bw_pattern = sn.min(sn.extractall(r'MByte/s:\s*(?P<bw>\S+)', self.stdout, 'bw', float)) return sn.assert_ge(self.bw_pattern, 0.0)