def docker_config_cache_from(self): ret_val = tfc.run( entry_point=os.path.join(self.test_data_path, "mnist_example_using_fit.ipynb"), requirements_txt=os.path.join(self.test_data_path, "requirements.txt"), docker_config=tfc.DockerConfig(image_build_bucket=_TEST_BUCKET), chief_config=tfc.COMMON_MACHINE_CONFIGS["P100_1X"], job_labels={ "job": "docker_config_cache_from", "team": "on_notebook_tests", }, ) return tfc.run( entry_point=os.path.join(self.test_data_path, "mnist_example_using_fit.ipynb"), requirements_txt=os.path.join(self.test_data_path, "requirements.txt"), docker_config=tfc.DockerConfig(image_build_bucket=_TEST_BUCKET, image=ret_val["docker_image"], cache_from=ret_val["docker_image"]), chief_config=tfc.COMMON_MACHINE_CONFIGS["P100_1X"], job_labels={ "job": "docker_config_cache_from", "team": "on_notebook_tests", }, )
def run(): model_path = "resnet-dogs" img_size = 224 batch_size = 64 train_data, test_data, num_classes = get_data(img_size, batch_size) model = get_model(img_size, num_classes) callbacks = get_callbacks(model_path, GCP_BUCKET) if tfc.remote(): epochs = 500 model.fit( train_data, epochs=epochs, callbacks=callbacks, validation_data=test_data, verbose=2 ) save_path = os.path.join("gs://", GCP_BUCKET, model_path) model.save(save_path) model = tf.keras.models.load_model(save_path) model.evaluate(test_data) tfc.run( requirements_txt="requirements.txt", distribution_strategy="auto", chief_config=tfc.MachineConfig( cpu_cores=8, memory=30, accelerator_type=tfc.AcceleratorType.NVIDIA_TESLA_T4, accelerator_count=2, ), docker_image_bucket_name=GCP_BUCKET, )
def test_auto_one_device_strategy(self): tfc.run( entry_point=os.path.join(self.test_data_path, "mnist_example_using_fit.py"), requirements_txt=os.path.join(self.test_data_path, "requirements.txt"), ) self._mock_sys_exit.assert_called_once_with(0)
def test_auto_multi_worker_strategy(self, mock_exit): tfc.run( entry_point=os.path.join(self.test_data_path, "mnist_example_using_fit.py"), worker_count=1, requirements_txt=os.path.join(self.test_data_path, "requirements.txt"), ) mock_exit.assert_called_once_with(0)
def test_auto_mirrored_strategy(self, mock_exit): tfc.run( entry_point=os.path.join(self.test_data_path, "mnist_example_using_fit.py"), requirements_txt=os.path.join(self.test_data_path, "requirements.txt"), chief_config=tfc.COMMON_MACHINE_CONFIGS['T4_2X'], ) mock_exit.assert_called_once_with(0)
def test_auto_one_device_stream_logs(self, mock_exit): tfc.run( entry_point=os.path.join(self.test_data_path, "mnist_example_using_fit.py"), requirements_txt=os.path.join(self.test_data_path, "requirements.txt"), stream_logs=True, ) mock_exit.assert_called_once_with(0)
def test_auto_one_device_job_labels(self): tfc.run( entry_point=os.path.join(self.test_data_path, "mnist_example_using_fit.py"), requirements_txt=os.path.join(self.test_data_path, "requirements.txt"), job_labels={"job": "on_script_tests", "team": "keras"}, ) self._mock_sys_exit.assert_called_once_with(0)
def test_auto_one_device_strategy_bucket_build(self): tfc.run( entry_point=os.path.join(self.test_data_path, "mnist_example_using_fit.py"), requirements_txt=os.path.join(self.test_data_path, "requirements.txt"), docker_image_bucket_name=_TEST_BUCKET, ) self._mock_sys_exit.assert_called_once_with(0)
def test_none_dist_strat_multi_worker_strategy(self): tfc.run( entry_point=os.path.join(self.test_data_path, "mnist_example_using_ctl.py"), distribution_strategy=None, worker_count=2, requirements_txt=os.path.join(self.test_data_path, "requirements.txt"), ) self._mock_sys_exit.assert_called_once_with(0)
def test_none_dist_strat_mwms_custom_img(self): tfc.run( entry_point=os.path.join(self.test_data_path, "mnist_example_using_ctl.py"), worker_count=2, requirements_txt=os.path.join(self.test_data_path, "requirements.txt"), docker_base_image="tensorflow/tensorflow:latest-gpu", ) self._mock_sys_exit.assert_called_once_with(0)
def test_none_dist_strat_multi_worker_strategy_bucket_build( self, mock_exit): tfc.run( entry_point=os.path.join(self.test_data_path, "mnist_example_using_ctl.py"), worker_count=2, requirements_txt=os.path.join(self.test_data_path, "requirements.txt"), docker_image_bucket_name="TEST_BUCKET", ) mock_exit.assert_called_once_with(0)
def test_auto_tpu(self, mock_exit): tfc.run( entry_point=os.path.join(self.test_data_path, "mnist_example_using_fit.py"), chief_config=tfc.COMMON_MACHINE_CONFIGS["CPU"], worker_count=1, worker_config=tfc.COMMON_MACHINE_CONFIGS["TPU"], requirements_txt=os.path.join(self.test_data_path, "requirements.txt"), ) mock_exit.assert_called_once_with(0)
def test_auto_tpu_strategy(self): tfc.run( entry_point=os.path.join(self.test_data_path, "mnist_example_using_fit.py"), requirements_txt=os.path.join(self.test_data_path, "requirements.txt"), chief_config=tfc.COMMON_MACHINE_CONFIGS["CPU"], worker_count=1, worker_config=tfc.COMMON_MACHINE_CONFIGS["TPU"], docker_base_image="tensorflow/tensorflow:2.1.0", ) self._mock_sys_exit.assert_called_once_with(0)
def test_auto_mirrored_strategy(self): tfc.run( entry_point=os.path.join(self.test_data_path, "mnist_example_using_fit.py"), requirements_txt=os.path.join(self.test_data_path, "requirements.txt"), chief_config=tfc.MachineConfig( cpu_cores=8, memory=30, accelerator_type=tfc.AcceleratorType.NVIDIA_TESLA_T4, accelerator_count=2, ), ) self._mock_sys_exit.assert_called_once_with(0)
def auto_one_device_strategy_with_image(self): ret_val = tfc.run( entry_point=os.path.join(self.test_data_path, "mnist_example_using_fit.py"), requirements_txt=os.path.join(self.test_data_path, "requirements.txt"), ) return tfc.run( entry_point=os.path.join(self.test_data_path, "mnist_example_using_fit.py"), requirements_txt=os.path.join(self.test_data_path, "requirements.txt"), docker_config=tfc.DockerConfig(image=ret_val["docker_image"]), )
def docker_config_image(self): ret_val = tfc.run( entry_point=os.path.join(self.test_data_path, "mnist_example_using_fit.py"), requirements_txt=os.path.join(self.test_data_path, "requirements.txt"), chief_config=tfc.COMMON_MACHINE_CONFIGS["P100_1X"], ) return tfc.run( entry_point=os.path.join(self.test_data_path, "mnist_example_using_fit.py"), requirements_txt=os.path.join(self.test_data_path, "requirements.txt"), docker_config=tfc.DockerConfig(image=ret_val["docker_image"]), chief_config=tfc.COMMON_MACHINE_CONFIGS["P100_1X"], )
def auto_one_device_strategy(self): return tfc.run( entry_point=os.path.join(self.test_data_path, "mnist_example_using_fit.py"), requirements_txt=os.path.join(self.test_data_path, "requirements.txt"), )
def auto_multi_worker_strategy(self): return tfc.run( entry_point=os.path.join(self.test_data_path, "mnist_example_using_fit.py"), worker_count=1, requirements_txt=os.path.join(self.test_data_path, "requirements.txt"), )
def auto_one_device_strategy_cloud_build(self): return tfc.run( entry_point=os.path.join(self.test_data_path, "mnist_example_using_fit.py"), requirements_txt=os.path.join(self.test_data_path, "requirements.txt"), docker_config=tfc.DockerConfig(image_build_bucket=_TEST_BUCKET), )
def docker_config_cloud_build(self): return tfc.run( entry_point=os.path.join(self.test_data_path, "mnist_example_using_fit.py"), requirements_txt=os.path.join(self.test_data_path, "requirements.txt"), docker_config=tfc.DockerConfig(image_build_bucket=_TEST_BUCKET), chief_config=tfc.COMMON_MACHINE_CONFIGS["P100_1X"], )
def job_labels(self): return tfc.run( entry_point=os.path.join(self.test_data_path, "mnist_example_using_fit.py"), requirements_txt=os.path.join(self.test_data_path, "requirements.txt"), job_labels={"job": "on_script_tests", "team": "keras"}, chief_config=tfc.COMMON_MACHINE_CONFIGS["P100_1X"], )
def none_dist_strat_multi_worker_strategy(self): return tfc.run( entry_point=os.path.join(self.test_data_path, "mnist_example_using_ctl.py"), distribution_strategy=None, worker_count=2, requirements_txt=os.path.join(self.test_data_path, "requirements.txt"), )
def auto_dist_strat_mwms_custom_img(self): return tfc.run( entry_point=os.path.join(self.test_data_path, "mnist_example_using_fit.py"), distribution_strategy="auto", requirements_txt=os.path.join(self.test_data_path, "requirements.txt"), docker_base_image=("gcr.io/deeplearning-platform-release" "/tf2-gpu.2-2:latest"), )
def auto_multi_worker_strategy(self): return tfc.run( entry_point=os.path.join(self.test_data_path, "mnist_example_using_fit.py"), requirements_txt=os.path.join(self.test_data_path, "requirements.txt"), worker_count=1, chief_config=tfc.COMMON_MACHINE_CONFIGS["P100_1X"], worker_config=tfc.COMMON_MACHINE_CONFIGS["P100_1X"], )
def auto_one_device_job_labels(self): return tfc.run( entry_point=os.path.join(self.test_data_path, "mnist_example_using_fit.py"), requirements_txt=os.path.join(self.test_data_path, "requirements.txt"), job_labels={ "job": "on_script_tests", "team": "keras" }, )
def auto_tpu_strategy(self): return tfc.run( entry_point=os.path.join(self.test_data_path, "mnist_example_using_fit.py"), requirements_txt=os.path.join(self.test_data_path, "requirements_tpu_strategy.txt"), chief_config=tfc.COMMON_MACHINE_CONFIGS["CPU"], worker_count=1, worker_config=tfc.COMMON_MACHINE_CONFIGS["TPU"], docker_base_image="tensorflow/tensorflow:2.1.0", )
def none_dist_strat(self): return tfc.run( entry_point=os.path.join(self.test_data_path, "mnist_example_using_ctl.py"), requirements_txt=os.path.join(self.test_data_path, "requirements.txt"), distribution_strategy=None, worker_count=2, chief_config=tfc.COMMON_MACHINE_CONFIGS["P100_1X"], worker_config=tfc.COMMON_MACHINE_CONFIGS["P100_1X"], )
def cloud_build_base_image_backward_compatibility(self): return tfc.run( entry_point=os.path.join(self.test_data_path, "mnist_example_using_fit.py"), requirements_txt=os.path.join(self.test_data_path, "requirements.txt"), docker_image_bucket_name=_TEST_BUCKET, docker_base_image="gcr.io/deeplearning-platform-release" "/tf2-gpu.2-2:latest", chief_config=tfc.COMMON_MACHINE_CONFIGS["P100_1X"], )
def docker_config_parent_img(self): return tfc.run( entry_point=os.path.join(self.test_data_path, "mnist_example_using_fit.py"), requirements_txt=os.path.join(self.test_data_path, "requirements.txt"), docker_config=tfc.DockerConfig( parent_image="gcr.io/deeplearning-platform-release" "/tf2-gpu.2-2:latest"), chief_config=tfc.COMMON_MACHINE_CONFIGS["P100_1X"], )
def auto_one_device_strategy(self): # Using the default T4 GPU for this test. return tfc.run( entry_point=os.path.join(self.test_data_path, "mnist_example_using_fit.ipynb"), requirements_txt=os.path.join(self.test_data_path, "requirements.txt"), job_labels={ "job": "auto_one_device_strategy", "team": "on_notebook_tests", }, )