def test_assert_ne_with_deferrables(self): self.assertTrue(sn.assert_ne(1, make_deferrable(2))) self.assertTrue(sn.assert_ne(make_deferrable(1), False)) self.assertRaisesRegex(SanityError, '1 == 1', evaluate, sn.assert_ne(make_deferrable(1), 1)) self.assertRaisesRegex(SanityError, '1 == True', evaluate, sn.assert_ne(make_deferrable(1), True))
def test_assert_ne(self): self.assertTrue(sn.assert_ne(1, 2)) self.assertTrue(sn.assert_ne(1, False)) self.assertRaisesRegex(SanityError, '1 == 1', evaluate, sn.assert_ne(1, 1)) self.assertRaisesRegex(SanityError, '1 == True', evaluate, sn.assert_ne(1, True))
def test_assert_ne_with_deferrables(): assert sn.assert_ne(1, sn.defer(2)) assert sn.assert_ne(sn.defer(1), False) with pytest.raises(SanityError, match='1 == 1'): sn.evaluate(sn.assert_ne(sn.defer(1), 1)) with pytest.raises(SanityError, match='1 == True'): sn.evaluate(sn.assert_ne(sn.defer(1), True))
def test_assert_ne(): assert sn.assert_ne(1, 2) assert sn.assert_ne(1, False) with pytest.raises(SanityError, match='1 == 1'): sn.evaluate(sn.assert_ne(1, 1)) with pytest.raises(SanityError, match='1 == True'): sn.evaluate(sn.assert_ne(1, True))
def assert_successful_execution(self): nids = sn.extractall(r'nid(?P<nid>\d+)', self.stdout, 'nid', str) return sn.all([ sn.assert_ne(nids, []), sn.assert_ne(nids[0], nids[1]), sn.assert_found(r'IPCluster is ready\!\s+', self.stdout), sn.assert_found(r'slope=\S+', self.stdout) ])
def __init__(self): self.descr = 'Distributed training with TensorFlow using ipyparallel' self.valid_systems = ['daint:gpu', 'dom:gpu'] self.valid_prog_environs = ['PrgEnv-gnu'] cray_cdt_version = osext.cray_cdt_version() # FIXME: The following will not be needed after the Daint upgrade if self.current_system.name == 'dom': self.modules = [ 'ipcmagic', f'Horovod/0.21.0-CrayGNU-{cray_cdt_version}-tf-2.4.0' ] else: self.modules = [ 'ipcmagic', 'Horovod/0.19.1-CrayGNU-20.08-tf-2.2.0' ] self.num_tasks = 2 self.num_tasks_per_node = 1 self.executable = 'ipython' self.executable_opts = ['tf-hvd-sgd-ipc-tf2.py'] nids = sn.extractall(r'nid(?P<nid>\d+)', self.stdout, 'nid', str) self.sanity_patterns = sn.all( [sn.assert_ne(nids, []), sn.assert_ne(nids[0], nids[1])]) self.reference = { 'daint:gpu': { 'slope': (2.0, -0.1, 0.1, None), 'offset': (0.0, -0.1, 0.1, None), 'retries': (0, None, None, None), 'time': (10, None, None, 's'), }, 'dom:gpu': { 'slope': (2.0, -0.1, 0.1, None), 'offset': (0.0, -0.1, 0.1, None), 'retries': (0, None, None, None), 'time': (10, None, None, 's'), } } self.perf_patterns = { 'slope': sn.extractsingle(r'slope=(?P<slope>\S+)', self.stdout, 'slope', float), 'offset': sn.extractsingle(r'offset=(?P<offset>\S+)', self.stdout, 'offset', float), 'retries': 4 - sn.count(sn.findall(r'IPCluster is already running', self.stdout)), 'time': sn.extractsingle( r'IPCluster is ready\!\s+' r'\((?P<time>\d+) seconds\)', self.stdout, 'time', float) } self.maintainers = ['RS', 'TR'] self.tags = {'production'}
def __init__(self): self.descr = 'Distributed training with TensorFlow using ipyparallel' self.valid_systems = ['daint:gpu', 'dom:gpu'] self.valid_prog_environs = ['PrgEnv-gnu'] self.modules = ['ipcmagic'] self.prerun_cmds = [ 'module unload dask', 'module load Horovod/0.16.4-CrayGNU-19.10-tf-1.14.0' ] self.num_tasks = 2 self.num_tasks_per_node = 1 self.executable = 'ipython' self.executable_opts = ['tf-hvd-sgd-ipc-tf-1.14.py'] nids = sn.extractall(r'nid(?P<nid>\d+)', self.stdout, 'nid', str) self.sanity_patterns = sn.all( [sn.assert_ne(nids, []), sn.assert_ne(nids[0], nids[1])]) self.reference = { 'daint:gpu': { 'slope': (2.0, -0.1, 0.1, None), 'offset': (0.0, -0.1, 0.1, None), 'retries': (0, None, None, None), 'time': (10, None, None, 's'), }, 'dom:gpu': { 'slope': (2.0, -0.1, 0.1, None), 'offset': (0.0, -0.1, 0.1, None), 'retries': (0, None, None, None), 'time': (10, None, None, 's'), } } self.perf_patterns = { 'slope': sn.extractsingle(r'slope=(?P<slope>\S+)', self.stdout, 'slope', float), 'offset': sn.extractsingle(r'offset=(?P<offset>\S+)', self.stdout, 'offset', float), 'retries': 4 - sn.count(sn.findall(r'IPCluster is already running', self.stdout)), 'time': sn.extractsingle( r'IPCluster is ready\!\s+' r'\((?P<time>\d+) seconds\)', self.stdout, 'time', float) } self.maintainers = ['RS', 'TR'] self.tags = {'production'}
def assert_successful_execution(self): '''Checks that the program is running on 2 different nodes (nids are different), that IPCMagic is configured and returns the correct end-of-program message (returns the slope parameter in the end).''' nids = sn.extractall(r'nid(?P<nid>\d+)', self.stdout, 'nid', str) return sn.all([ sn.assert_eq(sn.len(nids), 2), sn.assert_ne(nids[0], nids[1]), sn.assert_found(r'slope=\S+', self.stdout) ])