Example #1
0
def test_pytorch_parallel() -> None:
    config = conf.load_config(conf.official_examples_path("mnist_pytorch/const.yaml"))
    config = conf.set_slots_per_trial(config, 8)
    config = conf.set_native_parallel(config, False)
    config = conf.set_max_steps(config, 2)
    config = conf.set_tensor_auto_tuning(config, True)

    exp.run_basic_test_with_temp_config(config, conf.official_examples_path("mnist_pytorch"), 1)
Example #2
0
def test_faster_rcnn() -> None:
    config = conf.load_config(
        conf.experimental_path("FasterRCNN_tp/16-gpus.yaml"))
    config = conf.set_max_steps(config, 2)
    config = conf.set_slots_per_trial(config, 1)

    exp.run_basic_test_with_temp_config(
        config, conf.experimental_path("FasterRCNN_tp"), 1, max_wait_secs=4800)
def test_tensorpack_native_parallel() -> None:
    config = conf.load_config(
        conf.official_examples_path("mnist_tp/const.yaml"))
    config = conf.set_slots_per_trial(config, 8)
    config = conf.set_native_parallel(config, True)
    config = conf.set_max_steps(config, 2)

    experiment_id = exp.run_basic_test_with_temp_config(
        config, conf.official_examples_path("mnist_tp"), 1)
    trials = exp.experiment_trials(experiment_id)
    assert len(trials) == 1
Example #4
0
def test_tf_keras_single_gpu(tf2: bool) -> None:
    config = conf.load_config(
        conf.official_examples_path("cifar10_cnn_tf_keras/const.yaml"))
    config = conf.set_slots_per_trial(config, 1)
    config = conf.set_max_steps(config, 2)
    config = conf.set_tf2_image(config) if tf2 else conf.set_tf1_image(config)

    experiment_id = exp.run_basic_test_with_temp_config(
        config, conf.official_examples_path("cifar10_cnn_tf_keras"), 1)
    trials = exp.experiment_trials(experiment_id)
    assert len(trials) == 1
Example #5
0
def test_tf_keras_mnist_parallel() -> None:
    config = conf.load_config(
        conf.official_examples_path("fashion_mnist_tf_keras/const.yaml"))
    config = conf.set_slots_per_trial(config, 8)
    config = conf.set_native_parallel(config, False)
    config = conf.set_max_steps(config, 2)

    experiment_id = exp.run_basic_test_with_temp_config(
        config, conf.official_examples_path("fashion_mnist_tf_keras"), 1)
    trials = exp.experiment_trials(experiment_id)
    assert len(trials) == 1
Example #6
0
def test_pytorch_cifar10_parallel() -> None:
    config = conf.load_config(
        conf.official_examples_path("cifar10_cnn_pytorch/const.yaml"))
    config = conf.set_max_steps(config, 2)
    config = conf.set_slots_per_trial(config, 8)

    experiment_id = exp.run_basic_test_with_temp_config(
        config, conf.official_examples_path("cifar10_cnn_pytorch"), 1)
    trials = exp.experiment_trials(experiment_id)
    nn = Determined().get_trial(
        trials[0].id).select_checkpoint(latest=True).load()
    assert isinstance(nn, torch.nn.Module)
Example #7
0
def test_pytorch_const_parallel(aggregation_frequency: int,
                                use_amp: bool) -> None:
    config = conf.load_config(
        conf.official_examples_path("mnist_pytorch/const.yaml"))
    config = conf.set_slots_per_trial(config, 8)
    config = conf.set_native_parallel(config, False)
    config = conf.set_max_steps(config, 2)
    config = conf.set_aggregation_frequency(config, aggregation_frequency)
    if use_amp:
        config = conf.set_amp_level(config, "O1")

    exp.run_basic_test_with_temp_config(
        config, conf.official_examples_path("mnist_pytorch"), 1)
Example #8
0
def test_tf_keras_parallel(aggregation_frequency: int, tf2: bool) -> None:
    config = conf.load_config(
        conf.official_examples_path("cifar10_cnn_tf_keras/const.yaml"))
    config = conf.set_slots_per_trial(config, 8)
    config = conf.set_native_parallel(config, False)
    config = conf.set_max_steps(config, 2)
    config = conf.set_aggregation_frequency(config, aggregation_frequency)
    config = conf.set_tf2_image(config) if tf2 else conf.set_tf1_image(config)

    experiment_id = exp.run_basic_test_with_temp_config(
        config, conf.official_examples_path("cifar10_cnn_tf_keras"), 1)
    trials = exp.experiment_trials(experiment_id)
    assert len(trials) == 1
Example #9
0
def test_tf_keras_single_gpu(tf2: bool) -> None:
    config = conf.load_config(
        conf.official_examples_path("cifar10_cnn_tf_keras/const.yaml"))
    config["checkpoint_storage"] = exp.shared_fs_checkpoint_config()
    config.get("bind_mounts", []).append(exp.root_user_home_bind_mount())
    config = conf.set_slots_per_trial(config, 1)
    config = conf.set_max_steps(config, 2)
    config = conf.set_tf2_image(config) if tf2 else conf.set_tf1_image(config)

    experiment_id = exp.run_basic_test_with_temp_config(
        config, conf.official_examples_path("cifar10_cnn_tf_keras"), 1)
    trials = exp.experiment_trials(experiment_id)
    assert len(trials) == 1
Example #10
0
def test_tf_keras_mnist_parallel() -> None:
    config = conf.load_config(
        conf.official_examples_path("fashion_mnist_tf_keras/const.yaml"))
    config["checkpoint_storage"] = exp.shared_fs_checkpoint_config()
    config.get("bind_mounts", []).append(exp.root_user_home_bind_mount())
    config = conf.set_slots_per_trial(config, 8)
    config = conf.set_native_parallel(config, False)
    config = conf.set_max_steps(config, 2)

    experiment_id = exp.run_basic_test_with_temp_config(
        config, conf.official_examples_path("fashion_mnist_tf_keras"), 1)
    trials = exp.experiment_trials(experiment_id)
    assert len(trials) == 1
def test_mnist_estimator_data_layer_parallel(storage_type: str) -> None:
    config = conf.load_config(
        conf.experimental_path("data_layer_mnist_estimator/const.yaml"))
    config = conf.set_max_steps(config, 2)
    config = conf.set_slots_per_trial(config, 8)
    config = conf.set_tf1_image(config)
    if storage_type == "lfs":
        config = conf.set_shared_fs_data_layer(config)
    else:
        config = conf.set_s3_data_layer(config)

    exp.run_basic_test_with_temp_config(
        config, conf.experimental_path("data_layer_mnist_estimator"), 1)
def test_tensorpack_parallel(aggregation_frequency: int) -> None:
    config = conf.load_config(
        conf.official_examples_path("mnist_tp/const.yaml"))
    config["checkpoint_storage"] = exp.shared_fs_checkpoint_config()
    config.get("bind_mounts", []).append(exp.root_user_home_bind_mount())
    config = conf.set_slots_per_trial(config, 8)
    config = conf.set_native_parallel(config, False)
    config = conf.set_max_steps(config, 2)
    config = conf.set_aggregation_frequency(config, aggregation_frequency)

    experiment_id = exp.run_basic_test_with_temp_config(
        config, conf.official_examples_path("mnist_tp"), 1)
    trials = exp.experiment_trials(experiment_id)
    assert len(trials) == 1
def test_mnist_estimmator_const_parallel(native_parallel: bool,
                                         tf2: bool) -> None:
    if tf2 and native_parallel:
        pytest.skip("TF2 native parallel training is not currently supported.")

    config = conf.load_config(
        conf.fixtures_path("mnist_estimator/single-multi-slot.yaml"))
    config = conf.set_slots_per_trial(config, 8)
    config = conf.set_native_parallel(config, native_parallel)
    config = conf.set_max_steps(config, 2)
    config = conf.set_tf2_image(config) if tf2 else conf.set_tf1_image(config)

    exp.run_basic_test_with_temp_config(
        config, conf.official_examples_path("mnist_estimator"), 1)