def test_end_to_end_wide_deep(self): integration.run_synthetic( main=wide_deep.main, tmp_root=self.get_temp_dir(), extra_flags=[ '--data_dir', self.get_temp_dir(), '--model_type', 'wide_deep', ], synth=False, max_train=None)
def test_end_to_end_keras_2_gpu(self): if context.num_gpus() < 2: self.skipTest( "{} GPUs are not available for this test. {} GPUs are available". format(2, context.num_gpus())) integration.run_synthetic( ncf_keras_main.main, tmp_root=self.get_temp_dir(), max_train=None, extra_flags=self._BASE_END_TO_END_FLAGS + ['-num_gpus', '2'])
def test_end_to_end_wide_deep(self): integration.run_synthetic( main=census_main.main, tmp_root=self.get_temp_dir(), extra_flags=[ '--data_dir', self.get_temp_dir(), '--model_type', 'wide_deep', '--download_if_missing=false' ], synth=False, max_train=None)
def test_end_to_end_deep(self): integration.run_synthetic( main=movielens_main.main, tmp_root=self.temp_dir, extra_flags=[ "--data_dir", self.temp_dir, "--download_if_missing=false", "--train_epochs", "1", "--epochs_between_evals", "1" ], synth=False, max_train=None)
def test_end_to_end(self): """Tests end-to-end running.""" model_dir = os.path.join(self.get_temp_dir(), 'model') integration.run_synthetic( main=train_higgs.main, tmp_root=self.get_temp_dir(), extra_flags=[ '--data_dir', self.data_dir, '--model_dir', model_dir, '--n_trees', '5', '--train_start', '0', '--train_count', '12', '--eval_start', '12', '--eval_count', '8', ], synth=False, max_train=None) self.assertTrue(tf.gfile.Exists(os.path.join(model_dir, 'checkpoint')))
def test_end_to_end_graph_no_dist_strat(self): """Test Keras model in legacy graph mode with 1 GPU, no dist strat.""" extra_flags = [ "-enable_eager", "false", "-distribution_strategy", "off", "-model_dir", "keras_cifar_graph_no_dist_strat", "-data_format", "channels_last", ] extra_flags = extra_flags + self._extra_flags integration.run_synthetic( main=keras_cifar_main.run, tmp_root=self.get_temp_dir(), extra_flags=extra_flags )
def test_end_to_end(self): """Tests end-to-end running.""" model_dir = os.path.join(self.get_temp_dir(), "model") integration.run_synthetic( main=train_higgs.main, tmp_root=self.get_temp_dir(), extra_flags=[ "--data_dir", self.data_dir, "--model_dir", model_dir, "--n_trees", "5", "--train_start", "0", "--train_count", "12", "--eval_start", "12", "--eval_count", "8", ], synth=False, max_train=None) self.assertTrue(tf.gfile.Exists(os.path.join(model_dir, "checkpoint")))
def test_end_to_end_tpu_bf16_with_mlir(self): """Test Keras model with TPU and bfloat16 activation and MLIR bridge.""" tf.config.experimental.enable_mlir_bridge() model_dir = os.path.join(self.get_temp_dir(), 'ctl_imagenet_tpu_dist_strat_bf16_mlir') extra_flags = [ '-model_dir', model_dir, '-dtype', 'bf16', ] extra_flags = extra_flags + self._extra_flags integration.run_synthetic( main=resnet_ctl_imagenet_main.run, tmp_root=self.get_temp_dir(), extra_flags=extra_flags )
def test_end_to_end_no_dist_strat(self): """Test Keras model with 1 GPU, no distribution strategy.""" config = keras_utils.get_config_proto_v1() tf.compat.v1.enable_eager_execution(config=config) extra_flags = [ "-distribution_strategy", "off", "-model_dir", "keras_imagenet_no_dist_strat", "-data_format", "channels_last", ] extra_flags = extra_flags + self._extra_flags integration.run_synthetic( main=resnet_imagenet_main.run, tmp_root=self.get_temp_dir(), extra_flags=extra_flags )
def test_end_to_end_no_dist_strat(self): """Test Keras model with 1 GPU, no distribution strategy.""" config = keras_common.get_config_proto_v1() tf.compat.v1.enable_eager_execution(config=config) extra_flags = [ "-distribution_strategy", "off", "-model_dir", "keras_cifar_no_dist_strat", "-data_format", "channels_last", ] extra_flags = extra_flags + self._extra_flags integration.run_synthetic( main=keras_cifar_main.run, tmp_root=self.get_temp_dir(), extra_flags=extra_flags )
def test_end_to_end_graph_no_dist_strat(self): """Test Keras model in legacy graph mode with 1 GPU, no dist strat.""" extra_flags = [ "-enable_eager", "false", "-distribution_strategy", "off", "-model_dir", "keras_imagenet_graph_no_dist_strat", "-data_format", "channels_last", ] extra_flags = extra_flags + self._extra_flags integration.run_synthetic(main=keras_imagenet_main.run, tmp_root=self.get_temp_dir(), extra_flags=extra_flags)
def test_end_to_end_tpu(self, flags_key): """Test Keras model with TPU distribution strategy.""" config = keras_utils.get_config_proto_v1() tf.compat.v1.enable_eager_execution(config=config) extra_flags = [ "-distribution_strategy", "tpu", "-data_format", "channels_last", "-enable_checkpoint_and_export", "1", ] extra_flags = extra_flags + self._extra_flags_dict[flags_key] integration.run_synthetic(main=resnet_imagenet_main.run, tmp_root=self.get_temp_dir(), extra_flags=extra_flags)
def test_end_to_end_with_export(self): """Tests end-to-end running.""" model_dir = os.path.join(self.get_temp_dir(), "model") export_dir = os.path.join(self.get_temp_dir(), "export") integration.run_synthetic( main=train_higgs.main, tmp_root=self.get_temp_dir(), extra_flags=[ "--data_dir", self.data_dir, "--model_dir", model_dir, "--export_dir", export_dir, "--n_trees", "5", "--train_start", "0", "--train_count", "12", "--eval_start", "12", "--eval_count", "8", ], synth=False) self.assertTrue(tf.gfile.Exists(os.path.join(model_dir, "checkpoint"))) self.assertTrue(tf.gfile.Exists(os.path.join(export_dir)))
def test_end_to_end_no_dist_strat(self): """Test Keras model with 1 GPU, no distribution strategy.""" model_dir = os.path.join(self.get_temp_dir(), 'ctl_imagenet_no_dist_strat') extra_flags = [ '-distribution_strategy', 'off', '-model_dir', model_dir, '-data_format', 'channels_last', ] extra_flags = extra_flags + self._extra_flags integration.run_synthetic(main=resnet_ctl_imagenet_main.run, tmp_root=self.get_temp_dir(), extra_flags=extra_flags)
def test_end_to_end_with_export(self): """Tests end-to-end running.""" model_dir = os.path.join(self.get_temp_dir(), 'model') export_dir = os.path.join(self.get_temp_dir(), 'export') integration.run_synthetic( main=train_higgs.main, tmp_root=self.get_temp_dir(), extra_flags=[ '--data_dir', self.data_dir, '--model_dir', model_dir, '--export_dir', export_dir, '--n_trees', '5', '--train_start', '0', '--train_count', '12', '--eval_start', '12', '--eval_count', '8', ], synth=False, max_train=None) self.assertTrue(tf.gfile.Exists(os.path.join(model_dir, 'checkpoint'))) self.assertTrue(tf.gfile.Exists(os.path.join(export_dir)))
def test_end_to_end_tpu_bf16(self): """Test Keras model with TPU and bfloat16 activation.""" config = keras_utils.get_config_proto_v1() tf.compat.v1.enable_eager_execution(config=config) extra_flags = [ "-distribution_strategy", "tpu", "-data_format", "channels_last", "-dtype", "bf16", ] extra_flags = extra_flags + self._extra_flags integration.run_synthetic(main=resnet_imagenet_main.run, tmp_root=self.get_temp_dir(), extra_flags=extra_flags)
def test_end_to_end_2_gpu(self): """Test Keras model with 2 GPUs.""" num_gpus = "2" if context.num_gpus() < 2: num_gpus = "0" extra_flags = [ "-num_gpus", num_gpus, "-distribution_strategy", "default", "-model_dir", "ctl_imagenet_2_gpu", "-data_format", "channels_last", ] extra_flags = extra_flags + self._extra_flags integration.run_synthetic( main=ctl_imagenet_main.run, tmp_root=self.get_temp_dir(), extra_flags=extra_flags )
def test_end_to_end_2_gpu(self, flags_key): """Test Keras model with 2 GPUs.""" if context.num_gpus() < 2: self.skipTest( "{} GPUs are not available for this test. {} GPUs are available" .format(2, context.num_gpus())) extra_flags = [ "-num_gpus", "2", "-distribution_strategy", "mirrored", ] extra_flags = extra_flags + self.get_extra_flags_dict(flags_key) integration.run_synthetic(main=resnet_imagenet_main.run, tmp_root=self.get_temp_dir(), extra_flags=extra_flags)
def test_end_to_end_2_gpu(self): """Test Keras model with 2 GPUs.""" num_gpus = '2' if context.num_gpus() < 2: num_gpus = '0' extra_flags = [ '-num_gpus', num_gpus, '-distribution_strategy', 'default', '-model_dir', 'ctl_imagenet_2_gpu', '-data_format', 'channels_last', ] extra_flags = extra_flags + self._extra_flags integration.run_synthetic( main=ctl_imagenet_main.run, tmp_root=self.get_temp_dir(), extra_flags=extra_flags )
def test_end_to_end_graph_2_gpu(self): """Test Keras model in legacy graph mode with 2 GPUs.""" if context.num_gpus() < 2: self.skipTest( "{} GPUs are not available for this test. {} GPUs are available". format(2, context.num_gpus())) extra_flags = [ "-num_gpus", "2", "-enable_eager", "false", "-distribution_strategy", "default", "-model_dir", "keras_imagenet_graph_2_gpu", ] extra_flags = extra_flags + self._extra_flags integration.run_synthetic( main=keras_imagenet_main.run, tmp_root=self.get_temp_dir(), extra_flags=extra_flags )
def test_end_to_end_graph_2_gpu(self): """Test Keras model in legacy graph mode with 2 GPUs.""" if context.num_gpus() < 2: self.skipTest( "{} GPUs are not available for this test. {} GPUs are available". format(2, context.num_gpus())) extra_flags = [ "-num_gpus", "2", "-enable_eager", "false", "-distribution_strategy", "default", "-model_dir", "keras_cifar_graph_2_gpu", ] extra_flags = extra_flags + self._extra_flags integration.run_synthetic( main=keras_cifar_main.run, tmp_root=self.get_temp_dir(), extra_flags=extra_flags )
def test_end_to_end_graph_1_gpu(self): """Test Keras model in legacy graph mode with 1 GPU.""" if context.num_gpus() < 1: self.skipTest( "{} GPUs are not available for this test. {} GPUs are available". format(1, context.num_gpus())) extra_flags = [ "-num_gpus", "1", "-noenable_eager", "-distribution_strategy", "default", "-model_dir", "keras_cifar_graph_1_gpu", "-data_format", "channels_last", ] extra_flags = extra_flags + self._extra_flags integration.run_synthetic( main=resnet_cifar_main.run, tmp_root=self.get_temp_dir(), extra_flags=extra_flags )
def test_end_to_end_graph_1_gpu(self): """Test Keras model in legacy graph mode with 1 GPU.""" if context.num_gpus() < 1: self.skipTest( "{} GPUs are not available for this test. {} GPUs are available". format(1, context.num_gpus())) extra_flags = [ "-num_gpus", "1", "-enable_eager", "false", "-distribution_strategy", "default", "-model_dir", "keras_imagenet_graph_1_gpu", "-data_format", "channels_last", ] extra_flags = extra_flags + self._extra_flags integration.run_synthetic( main=keras_imagenet_main.run, tmp_root=self.get_temp_dir(), extra_flags=extra_flags )
def test_end_to_end_2_gpu(self): """Test Keras model with 2 GPUs.""" config = keras_utils.get_config_proto_v1() tf.compat.v1.enable_eager_execution(config=config) if context.num_gpus() < 2: self.skipTest( "{} GPUs are not available for this test. {} GPUs are available" .format(2, context.num_gpus())) extra_flags = [ "-num_gpus", "2", "-distribution_strategy", "default", ] extra_flags = extra_flags + self._extra_flags integration.run_synthetic(main=resnet_imagenet_main.run, tmp_root=self.get_temp_dir(), extra_flags=extra_flags)
def test_end_to_end_1_gpu(self, flags_key): """Test Keras model with 1 GPU.""" config = keras_utils.get_config_proto_v1() tf.compat.v1.enable_eager_execution(config=config) if context.num_gpus() < 1: self.skipTest( "{} GPUs are not available for this test. {} GPUs are available". format(1, context.num_gpus())) extra_flags = [ "-num_gpus", "1", "-distribution_strategy", "mirrored", "-enable_checkpoint_and_export", "1", ] extra_flags = extra_flags + self.get_extra_flags_dict(flags_key) integration.run_synthetic( main=resnet_imagenet_main.run, tmp_root=self.get_temp_dir(), extra_flags=extra_flags )
def test_end_to_end_2_gpu(self): """Test Keras model with 2 GPUs.""" config = keras_common.get_config_proto_v1() tf.compat.v1.enable_eager_execution(config=config) if context.num_gpus() < 2: self.skipTest( "{} GPUs are not available for this test. {} GPUs are available". format(2, context.num_gpus())) extra_flags = [ "-num_gpus", "2", "-distribution_strategy", "default", "-model_dir", "keras_cifar_2_gpu", ] extra_flags = extra_flags + self._extra_flags integration.run_synthetic( main=keras_cifar_main.run, tmp_root=self.get_temp_dir(), extra_flags=extra_flags )
def test_end_to_end_1_gpu_fp16(self): """Test Keras model with 1 GPU and fp16.""" config = keras_utils.get_config_proto_v1() tf.compat.v1.enable_eager_execution(config=config) if context.num_gpus() < 1: self.skipTest( "{} GPUs are not available for this test. {} GPUs are available" .format(1, context.num_gpus())) extra_flags = [ "-num_gpus", "1", "-dtype", "fp16", "-distribution_strategy", "mirrored", "-data_format", "channels_last", ] extra_flags = extra_flags + self._extra_flags integration.run_synthetic( main=resnet_imagenet_main.run, tmp_root=self.get_temp_dir(), extra_flags=extra_flags )
def test_end_to_end_2_gpu_fp16(self, flags_key): """Test Keras model with 2 GPUs and fp16.""" if context.num_gpus() < 2: self.skipTest( "{} GPUs are not available for this test. {} GPUs are available" .format(2, context.num_gpus())) extra_flags = [ "-num_gpus", "2", "-dtype", "fp16", "-distribution_strategy", "mirrored", ] extra_flags = extra_flags + self.get_extra_flags_dict(flags_key) if "polynomial_decay" in extra_flags: self.skipTest("Pruning with fp16 is not currently supported.") integration.run_synthetic(main=resnet_imagenet_main.run, tmp_root=self.get_temp_dir(), extra_flags=extra_flags)
def test_flag_restriction(self): with self.assertRaises(SystemExit): integration.run_synthetic( main=cifar10_main.run_cifar, tmp_root=self.get_temp_dir(), extra_flags=['-resnet_version', '1', "-dtype", "fp16"] )
def test_cifar10_end_to_end_synthetic_v2(self): integration.run_synthetic( main=cifar10_main.main, tmp_root=self.get_temp_dir(), extra_flags=['-v', '2'] )
def test_end_to_end_keras(self): self.skipTest("TODO: fix synthetic data with keras") integration.run_synthetic( ncf_keras_main.main, tmp_root=self.get_temp_dir(), max_train=None, extra_flags=self._BASE_END_TO_END_FLAGS + ['-distribution_strategy', 'off'])
def test_imagenet_end_to_end_synthetic_v1_tiny(self): integration.run_synthetic( main=imagenet_main.run_imagenet, tmp_root=self.get_temp_dir(), extra_flags=['-resnet_version', '1', '-batch_size', '4', '-resnet_size', '18'] )
def test_imagenet_end_to_end_synthetic_v2_huge(self): integration.run_synthetic(main=imagenet_main.main, extra_flags=['-v', '2', '-rs', '200'])
def test_end_to_end_keras_mlperf(self): integration.run_synthetic( ncf_keras_main.main, tmp_root=self.get_temp_dir(), max_train=None, extra_flags=self._BASE_END_TO_END_FLAGS + ['-ml_perf', 'True', '-distribution_strategy', 'off'])
def test_end_to_end_keras_dist_strat_ctl(self): flags = ( self._BASE_END_TO_END_FLAGS + ['-num_gpus', '0'] + ['-keras_use_ctl', 'True']) integration.run_synthetic( ncf_keras_main.main, tmp_root=self.get_temp_dir(), extra_flags=flags)
def test_end_to_end_estimator_mlperf(self): integration.run_synthetic(ncf_estimator_main.main, tmp_root=self.get_temp_dir(), max_train=None, extra_flags=self._BASE_END_TO_END_FLAGS + ['-ml_perf', 'True'])
def test_end_to_end_estimator(self): integration.run_synthetic( ncf_estimator_main.main, tmp_root=self.get_temp_dir(), extra_flags=self._BASE_END_TO_END_FLAGS)
def test_end_to_end_estimator_mlperf(self): integration.run_synthetic( ncf_estimator_main.main, tmp_root=self.get_temp_dir(), max_train=None, extra_flags=self._BASE_END_TO_END_FLAGS + ['-ml_perf', 'True'])
def test_flag_restriction(self): with self.assertRaises(SystemExit): integration.run_synthetic( main=cifar10_main.run_cifar, tmp_root=self.get_temp_dir(), extra_flags=['-resnet_version', '1', "-dtype", "fp16"])
def test_imagenet_end_to_end_synthetic_v2_huge(self): integration.run_synthetic( main=imagenet_main.run_imagenet, tmp_root=self.get_temp_dir(), extra_flags=['-resnet_version', '2', '-resnet_size', '200'] )
def test_imagenet_end_to_end_synthetic_v2(self): integration.run_synthetic( main=imagenet_main.run_imagenet, tmp_root=self.get_temp_dir(), extra_flags=['-v', '2'] )
def test_cifar10_end_to_end_synthetic_v1(self): integration.run_synthetic(main=cifar10_main.main, tmp_root=self.get_temp_dir(), extra_flags=['-v', '1'])
def test_imagenet_end_to_end_synthetic_v1_huge(self): integration.run_synthetic( main=imagenet_main.main, tmp_root=self.get_temp_dir(), extra_flags=['-v', '1', '-rs', '200'] )
def test_end_to_end_keras_dist_strat(self): integration.run_synthetic( ncf_keras_main.main, tmp_root=self.get_temp_dir(), extra_flags=self._BASE_END_TO_END_FLAGS + ['-num_gpus', '0'])
def test_imagenet_end_to_end_synthetic_v1_huge(self): integration.run_synthetic(main=imagenet_main.main, tmp_root=self.get_temp_dir(), extra_flags=['-v', '1', '-rs', '200'])
def test_imagenet_end_to_end_synthetic_v2(self): integration.run_synthetic( main=imagenet_main.run_imagenet, tmp_root=self.get_temp_dir(), extra_flags=['-resnet_version', '2', '-batch_size', '4'])
def test_cifar10_end_to_end_synthetic_v2(self): integration.run_synthetic( main=cifar10_main.run_cifar, tmp_root=self.get_temp_dir(), extra_flags=['-resnet_version', '2', '-batch_size', '4'] )
def test_end_to_end_keras(self): integration.run_synthetic(ncf_keras_main.main, tmp_root=self.get_temp_dir(), max_train=None, extra_flags=self._BASE_END_TO_END_FLAGS + ['-distribution_strategy', 'off'])
def test_cifar10_end_to_end_keras_synthetic_v1(self): integration.run_synthetic( main=keras_cifar_main.main, tmp_root=self.get_temp_dir(), extra_flags=['-resnet_version', '1', '-batch_size', '4', '-train_steps', '1'] )
def test_cifar10_end_to_end_synthetic_v2(self): integration.run_synthetic(main=cifar10_main.run_cifar, tmp_root=self.get_temp_dir(), extra_flags=['-resnet_version', '2'])
def test_imagenet_end_to_end_synthetic_v1_tiny(self): integration.run_synthetic(main=imagenet_main.main, extra_flags=['-v', '1', '-rs', '18'])