def test_tf_keras_const_warm_start(tf2: bool) -> None: config = conf.load_config( conf.official_examples_path("cifar10_cnn_tf_keras/const.yaml")) config = conf.set_max_steps(config, 2) config = conf.set_tf2_image(config) if tf2 else conf.set_tf1_image(config) experiment_id1 = exp.run_basic_test_with_temp_config( config, conf.official_examples_path("cifar10_cnn_tf_keras"), 1) trials = exp.experiment_trials(experiment_id1) assert len(trials) == 1 first_trial = trials[0] first_trial_id = first_trial["id"] assert len(first_trial["steps"]) == 2 first_checkpoint_id = first_trial["steps"][1]["checkpoint"]["id"] # Add a source trial ID to warm start from. config["searcher"]["source_trial_id"] = first_trial_id experiment_id2 = exp.run_basic_test_with_temp_config( config, conf.official_examples_path("cifar10_cnn_tf_keras"), 1) # The new trials should have a warm start checkpoint ID. trials = exp.experiment_trials(experiment_id2) assert len(trials) == 1 for trial in trials: assert trial["warm_start_checkpoint_id"] == first_checkpoint_id
def test_bert_glue() -> None: config = conf.load_config( conf.experimental_path("bert_glue_pytorch/const.yaml")) config = conf.set_max_steps(config, 2) exp.run_basic_test_with_temp_config( config, conf.experimental_path("bert_glue_pytorch/"), 1)
def test_resnet50() -> None: config = conf.load_config( conf.experimental_path("resnet50_tf_keras/const.yaml")) config = conf.set_max_steps(config, 2) exp.run_basic_test_with_temp_config( config, conf.experimental_path("resnet50_tf_keras"), 1)
def test_mnist_tp_to_estimator() -> None: config = conf.load_config( conf.experimental_path("mnist_tp_to_estimator/const.yaml")) config = conf.set_max_steps(config, 2) exp.run_basic_test_with_temp_config( config, conf.experimental_path("mnist_tp_to_estimator"), 1)
def test_pytorch_const_multi_output() -> None: config = conf.load_config( conf.experimental_path("mnist_pytorch_multi_output/const.yaml")) config = conf.set_max_steps(config, 2) exp.run_basic_test_with_temp_config( config, conf.experimental_path("mnist_pytorch_multi_output"), 1)
def test_pytorch_const_with_amp() -> None: config = conf.load_config( conf.official_examples_path("mnist_pytorch/const.yaml")) config = conf.set_max_steps(config, 2) config = conf.set_amp_level(config, "O1") exp.run_basic_test_with_temp_config( config, conf.official_examples_path("mnist_pytorch"), 1)
def test_faster_rcnn() -> None: config = conf.load_config( conf.experimental_path("FasterRCNN_tp/16-gpus.yaml")) config = conf.set_max_steps(config, 2) config = conf.set_slots_per_trial(config, 1) exp.run_basic_test_with_temp_config( config, conf.experimental_path("FasterRCNN_tp"), 1, max_wait_secs=4800)
def test_nas_search() -> None: config = conf.load_config( conf.experimental_path("nas_search/train_one_arch.yaml")) config = conf.set_max_steps(config, 2) exp.run_basic_test_with_temp_config(config, conf.experimental_path("nas_search"), 1)
def test_pytorch_parallel() -> None: config = conf.load_config(conf.official_examples_path("mnist_pytorch/const.yaml")) config = conf.set_slots_per_trial(config, 8) config = conf.set_native_parallel(config, False) config = conf.set_max_steps(config, 2) config = conf.set_tensor_auto_tuning(config, True) exp.run_basic_test_with_temp_config(config, conf.official_examples_path("mnist_pytorch"), 1)
def test_iris() -> None: config = conf.load_config( conf.official_examples_path("iris_tf_keras/const.yaml")) config = conf.set_max_steps(config, 2) exp_id = exp.run_basic_test_with_temp_config( config, conf.official_examples_path("iris_tf_keras"), 1) exp_ref = Determined(conf.make_master_url()).get_experiment(exp_id) model = exp_ref.top_checkpoint().load() model.summary()
def test_pytorch_cifar10_const() -> None: config = conf.load_config( conf.official_examples_path("cifar10_cnn_pytorch/const.yaml")) config = conf.set_max_steps(config, 2) experiment_id = exp.run_basic_test_with_temp_config( config, conf.official_examples_path("cifar10_cnn_pytorch"), 1) trials = exp.experiment_trials(experiment_id) nn = Determined().get_trial( trials[0].id).select_checkpoint(latest=True).load() assert isinstance(nn, torch.nn.Module)
def test_tf_keras_mnist_parallel() -> None: config = conf.load_config( conf.official_examples_path("fashion_mnist_tf_keras/const.yaml")) config = conf.set_slots_per_trial(config, 8) config = conf.set_native_parallel(config, False) config = conf.set_max_steps(config, 2) experiment_id = exp.run_basic_test_with_temp_config( config, conf.official_examples_path("fashion_mnist_tf_keras"), 1) trials = exp.experiment_trials(experiment_id) assert len(trials) == 1
def test_tensorpack_native_parallel() -> None: config = conf.load_config( conf.official_examples_path("mnist_tp/const.yaml")) config = conf.set_slots_per_trial(config, 8) config = conf.set_native_parallel(config, True) config = conf.set_max_steps(config, 2) experiment_id = exp.run_basic_test_with_temp_config( config, conf.official_examples_path("mnist_tp"), 1) trials = exp.experiment_trials(experiment_id) assert len(trials) == 1
def test_tf_keras_single_gpu(tf2: bool) -> None: config = conf.load_config( conf.official_examples_path("cifar10_cnn_tf_keras/const.yaml")) config = conf.set_slots_per_trial(config, 1) config = conf.set_max_steps(config, 2) config = conf.set_tf2_image(config) if tf2 else conf.set_tf1_image(config) experiment_id = exp.run_basic_test_with_temp_config( config, conf.official_examples_path("cifar10_cnn_tf_keras"), 1) trials = exp.experiment_trials(experiment_id) assert len(trials) == 1
def run_tf_keras_mnist_data_layer_test(tf2: bool, storage_type: str) -> None: config = conf.load_config( conf.experimental_path("data_layer_mnist_tf_keras/const.yaml")) config = conf.set_max_steps(config, 2) config = conf.set_tf2_image(config) if tf2 else conf.set_tf1_image(config) if storage_type == "lfs": config = conf.set_shared_fs_data_layer(config) else: config = conf.set_s3_data_layer(config) exp.run_basic_test_with_temp_config( config, conf.experimental_path("data_layer_mnist_tf_keras"), 1)
def test_pytorch_const_parallel(aggregation_frequency: int, use_amp: bool) -> None: config = conf.load_config( conf.official_examples_path("mnist_pytorch/const.yaml")) config = conf.set_slots_per_trial(config, 8) config = conf.set_native_parallel(config, False) config = conf.set_max_steps(config, 2) config = conf.set_aggregation_frequency(config, aggregation_frequency) if use_amp: config = conf.set_amp_level(config, "O1") exp.run_basic_test_with_temp_config( config, conf.official_examples_path("mnist_pytorch"), 1)
def test_pytorch_cifar10_parallel() -> None: config = conf.load_config( conf.official_examples_path("cifar10_cnn_pytorch/const.yaml")) config = conf.set_max_steps(config, 2) config = conf.set_slots_per_trial(config, 8) experiment_id = exp.run_basic_test_with_temp_config( config, conf.official_examples_path("cifar10_cnn_pytorch"), 1) trials = exp.experiment_trials(experiment_id) nn = (Determined(conf.make_master_url()).get_trial( trials[0].id).select_checkpoint(latest=True).load( map_location=torch.device("cpu"))) assert isinstance(nn, torch.nn.Module)
def test_tf_keras_mnist_parallel() -> None: config = conf.load_config( conf.official_examples_path("fashion_mnist_tf_keras/const.yaml")) config["checkpoint_storage"] = exp.shared_fs_checkpoint_config() config.get("bind_mounts", []).append(exp.root_user_home_bind_mount()) config = conf.set_slots_per_trial(config, 8) config = conf.set_native_parallel(config, False) config = conf.set_max_steps(config, 2) experiment_id = exp.run_basic_test_with_temp_config( config, conf.official_examples_path("fashion_mnist_tf_keras"), 1) trials = exp.experiment_trials(experiment_id) assert len(trials) == 1
def test_tf_keras_single_gpu(tf2: bool) -> None: config = conf.load_config( conf.official_examples_path("cifar10_cnn_tf_keras/const.yaml")) config["checkpoint_storage"] = exp.shared_fs_checkpoint_config() config.get("bind_mounts", []).append(exp.root_user_home_bind_mount()) config = conf.set_slots_per_trial(config, 1) config = conf.set_max_steps(config, 2) config = conf.set_tf2_image(config) if tf2 else conf.set_tf1_image(config) experiment_id = exp.run_basic_test_with_temp_config( config, conf.official_examples_path("cifar10_cnn_tf_keras"), 1) trials = exp.experiment_trials(experiment_id) assert len(trials) == 1
def test_tf_keras_parallel(aggregation_frequency: int, tf2: bool) -> None: config = conf.load_config( conf.official_examples_path("cifar10_cnn_tf_keras/const.yaml")) config = conf.set_slots_per_trial(config, 8) config = conf.set_native_parallel(config, False) config = conf.set_max_steps(config, 2) config = conf.set_aggregation_frequency(config, aggregation_frequency) config = conf.set_tf2_image(config) if tf2 else conf.set_tf1_image(config) experiment_id = exp.run_basic_test_with_temp_config( config, conf.official_examples_path("cifar10_cnn_tf_keras"), 1) trials = exp.experiment_trials(experiment_id) assert len(trials) == 1
def test_mnist_estimator_data_layer_parallel(storage_type: str) -> None: config = conf.load_config( conf.experimental_path("data_layer_mnist_estimator/const.yaml")) config = conf.set_max_steps(config, 2) config = conf.set_slots_per_trial(config, 8) config = conf.set_tf1_image(config) if storage_type == "lfs": config = conf.set_shared_fs_data_layer(config) else: config = conf.set_s3_data_layer(config) exp.run_basic_test_with_temp_config( config, conf.experimental_path("data_layer_mnist_estimator"), 1)
def test_tensorpack_parallel(aggregation_frequency: int) -> None: config = conf.load_config( conf.official_examples_path("mnist_tp/const.yaml")) config["checkpoint_storage"] = exp.shared_fs_checkpoint_config() config.get("bind_mounts", []).append(exp.root_user_home_bind_mount()) config = conf.set_slots_per_trial(config, 8) config = conf.set_native_parallel(config, False) config = conf.set_max_steps(config, 2) config = conf.set_aggregation_frequency(config, aggregation_frequency) experiment_id = exp.run_basic_test_with_temp_config( config, conf.official_examples_path("mnist_tp"), 1) trials = exp.experiment_trials(experiment_id) assert len(trials) == 1
def test_mnist_estimmator_const_parallel(native_parallel: bool, tf2: bool) -> None: if tf2 and native_parallel: pytest.skip("TF2 native parallel training is not currently supported.") config = conf.load_config( conf.fixtures_path("mnist_estimator/single-multi-slot.yaml")) config = conf.set_slots_per_trial(config, 8) config = conf.set_native_parallel(config, native_parallel) config = conf.set_max_steps(config, 2) config = conf.set_tf2_image(config) if tf2 else conf.set_tf1_image(config) exp.run_basic_test_with_temp_config( config, conf.official_examples_path("mnist_estimator"), 1)
def test_pytorch_const_warm_start() -> None: """ Test that specifying an earlier trial checkpoint to warm-start from correctly populates the later trials' `warm_start_checkpoint_id` fields. """ config = conf.load_config( conf.official_examples_path("mnist_pytorch/const.yaml")) config = conf.set_max_steps(config, 2) experiment_id1 = exp.run_basic_test_with_temp_config( config, conf.official_examples_path("mnist_pytorch"), 1) trials = exp.experiment_trials(experiment_id1) assert len(trials) == 1 first_trial = trials[0] first_trial_id = first_trial["id"] assert len(first_trial["steps"]) == 2 first_checkpoint_id = first_trial["steps"][-1]["checkpoint"]["id"] config_obj = conf.load_config( conf.official_examples_path("mnist_pytorch/const.yaml")) # Change the search method to random, and add a source trial ID to warm # start from. config_obj["searcher"]["source_trial_id"] = first_trial_id config_obj["searcher"]["name"] = "random" config_obj["searcher"]["max_steps"] = 1 config_obj["searcher"]["max_trials"] = 3 experiment_id2 = exp.run_basic_test_with_temp_config( config_obj, conf.official_examples_path("mnist_pytorch"), 3) trials = exp.experiment_trials(experiment_id2) assert len(trials) == 3 for trial in trials: assert trial["warm_start_checkpoint_id"] == first_checkpoint_id