Python set_slots_per_trial Examples, tests.config.set_slots_per_trial Python Examples

Example #1

0

Show file

def test_launch_layer_cifar(
        collect_trial_profiles: Callable[[int], None]) -> None:
    config = conf.load_config(
        conf.cv_examples_path("cifar10_pytorch/const.yaml"))
    config = conf.set_max_length(config, {"batches": 200})
    config = conf.set_slots_per_trial(config, 1)
    config = conf.set_profiling_enabled(config)
    config = conf.set_entrypoint(
        config,
        "python3 -m determined.launch.horovod --autohorovod --trial model_def:CIFARTrial"
    )

    experiment_id = exp.run_basic_test_with_temp_config(
        config, conf.cv_examples_path("cifar10_pytorch"), 1)
    trials = exp.experiment_trials(experiment_id)
    (Determined(conf.make_master_url()).get_trial(
        trials[0].trial.id).select_checkpoint(latest=True).load(
            map_location="cpu"))

    collect_trial_profiles(trials[0].trial.id)

    assert exp.check_if_string_present_in_trial_logs(
        trials[0].trial.id,
        "allocation stopped after resources exited successfully with a zero exit code",
    )

Example #2

0

Show file

File: test_pytorch.py Project: justin-determined-ai/determined

def test_pytorch_parallel() -> None:
    config = conf.load_config(conf.tutorials_path("mnist_pytorch/const.yaml"))
    config = conf.set_slots_per_trial(config, 8)
    config = conf.set_max_length(config, {"batches": 200})
    config = conf.set_tensor_auto_tuning(config, True)
    config = conf.set_perform_initial_validation(config, True)

    exp_id = exp.run_basic_test_with_temp_config(
        config, conf.tutorials_path("mnist_pytorch"), 1)
    exp.assert_performed_initial_validation(exp_id)

    # Check on record/batch counts we emitted in logs.
    validation_size = 10000
    global_batch_size = config["hyperparameters"]["global_batch_size"]
    num_workers = config.get("resources", {}).get("slots_per_trial", 1)
    global_batch_size = config["hyperparameters"]["global_batch_size"]
    scheduling_unit = config.get("scheduling_unit", 100)
    per_slot_batch_size = global_batch_size // num_workers
    exp_val_batches = (validation_size +
                       (per_slot_batch_size - 1)) // per_slot_batch_size
    patterns = [
        # Expect two copies of matching training reports.
        f"trained: {scheduling_unit * global_batch_size} records.*in {scheduling_unit} batches",
        f"trained: {scheduling_unit * global_batch_size} records.*in {scheduling_unit} batches",
        f"validated: {validation_size} records.*in {exp_val_batches} batches",
    ]
    trial_id = exp.experiment_trials(exp_id)[0].trial.id
    exp.assert_patterns_in_trial_logs(trial_id, patterns)

Example #3

0

Show file

def test_epoch_sync(num_workers: int, global_batch_size: int,
                    dataset_len: int) -> None:
    """
    Test that epoch_idx is synchronized across all workers regardless of whether the
    number of batches is evenly divisible by the number of workers.
    """
    config = conf.load_config(conf.fixtures_path("pytorch_no_op/const.yaml"))
    config = conf.set_slots_per_trial(config, num_workers)
    max_len_batches = 10
    config = conf.set_max_length(config, {"batches": max_len_batches})
    config = conf.set_hparam(config, "dataset_len", dataset_len)
    config = conf.set_global_batch_size(config, global_batch_size)

    e_id = exp.run_basic_test_with_temp_config(
        config, conf.fixtures_path("pytorch_no_op"), 1)
    t_id = exp.experiment_trials(e_id)[0].trial.id

    batches_per_epoch = (dataset_len + global_batch_size -
                         1) // global_batch_size  # ceil

    for batch_idx in range(max_len_batches):
        epoch_idx = batch_idx // batches_per_epoch
        for rank in range(config["resources"]["slots_per_trial"]):
            assert exp.check_if_string_present_in_trial_logs(
                t_id,
                f"rank {rank} finished batch {batch_idx} in epoch {epoch_idx}")

Example #4

0

Show file

def test_detr_coco_pytorch_distributed() -> None:
    config = conf.load_config(
        conf.cv_examples_path("detr_coco_pytorch/const_fake.yaml"))
    config = conf.set_max_length(config, {"batches": 200})
    config = conf.set_slots_per_trial(config, 2)

    exp.run_basic_test_with_temp_config(
        config, conf.cv_examples_path("detr_coco_pytorch"), 1)

Example #5

0

Show file

def test_faster_rcnn() -> None:
    config = conf.load_config(conf.experimental_path("trial/FasterRCNN_tp/16-gpus.yaml"))
    config = conf.set_max_length(config, {"batches": 128})
    config = conf.set_slots_per_trial(config, 1)

    exp.run_basic_test_with_temp_config(
        config, conf.experimental_path("trial/FasterRCNN_tp"), 1, max_wait_secs=4800
    )

Example #6

0

Show file

File: test_pytorch.py Project: Srinivasaragavan/determined

def test_pytorch_const_native_parallel() -> None:
    config = conf.load_config(conf.tutorials_path("mnist_pytorch/const.yaml"))
    config = conf.set_slots_per_trial(config, 8)
    config = conf.set_native_parallel(config, True)
    config = conf.set_max_length(config, {"batches": 200})

    exp.run_basic_test_with_temp_config(config,
                                        conf.tutorials_path("mnist_pytorch"),
                                        1)

Example #7

0

Show file

File: test_transformers.py Project: wbwatkinson/determined

def test_language_modeling_mlm() -> None:
    example_path = conf.model_hub_examples_path("huggingface/language-modeling")
    config = conf.load_config(os.path.join(example_path, "mlm_config.yaml"))
    config = conf.set_slots_per_trial(config, 8)
    config = conf.set_global_batch_size(config, 16)
    config = conf.set_max_length(config, {"batches": 200})
    config = set_docker_image(config)

    exp.run_basic_test_with_temp_config(config, example_path, 1)

Example #8

0

Show file

File: test_transformers.py Project: wbwatkinson/determined

def test_multiple_choice_swag() -> None:
    example_path = conf.model_hub_examples_path("huggingface/multiple-choice")
    config = conf.load_config(os.path.join(example_path, "swag_config.yaml"))
    config = conf.set_slots_per_trial(config, 8)
    config = conf.set_global_batch_size(config, 64)
    config = conf.set_max_length(config, {"batches": 200})
    config = set_docker_image(config)

    exp.run_basic_test_with_temp_config(config, example_path, 1)

Example #9

0

Show file

File: test_pytorch.py Project: solversa/determined

def test_pytorch_const_native_parallel() -> None:
    config = conf.load_config(
        conf.official_examples_path("mnist_pytorch/const.yaml"))
    config = conf.set_slots_per_trial(config, 8)
    config = conf.set_native_parallel(config, True)
    config = conf.set_max_steps(config, 2)

    exp.run_basic_test_with_temp_config(
        config, conf.official_examples_path("mnist_pytorch"), 1)

Example #10

0

Show file

File: test_transformers.py Project: wbwatkinson/determined

def test_squad_v2_with_beam_search() -> None:
    example_path = conf.model_hub_examples_path("huggingface/question-answering")
    config = conf.load_config(os.path.join(example_path, "squad_v2_beam_search.yaml"))
    config = conf.set_slots_per_trial(config, 8)
    config = conf.set_global_batch_size(config, 16)
    config = conf.set_max_length(config, {"batches": 200})
    config = set_docker_image(config)

    exp.run_basic_test_with_temp_config(config, example_path, 1)

Example #11

0

Show file

File: test_transformers.py Project: wbwatkinson/determined

def test_token_classification_ner() -> None:
    example_path = conf.model_hub_examples_path("huggingface/token-classification")
    config = conf.load_config(os.path.join(example_path, "ner_config.yaml"))
    config = conf.set_slots_per_trial(config, 8)
    config = conf.set_global_batch_size(config, 32)
    config = conf.set_max_length(config, {"batches": 200})
    config = set_docker_image(config)

    exp.run_basic_test_with_temp_config(config, example_path, 1)

Example #12

0

Show file

def run_tf_keras_dcgan_example() -> None:
    config = conf.load_config(
        conf.gan_examples_path("dcgan_tf_keras/const.yaml"))
    config = conf.set_max_length(config, {"batches": 200})
    config = conf.set_min_validation_period(config, {"batches": 200})
    config = conf.set_slots_per_trial(config, 8)
    config = conf.set_tf2_image(config)

    exp.run_basic_test_with_temp_config(
        config, conf.gan_examples_path("dcgan_tf_keras"), 1)

Example #13

0

Show file

def test_tf_keras_mnist_parallel() -> None:
    config = conf.load_config(
        conf.tutorials_path("fashion_mnist_tf_keras/const.yaml"))
    config = conf.set_slots_per_trial(config, 8)
    config = conf.set_max_length(config, {"batches": 200})

    experiment_id = exp.run_basic_test_with_temp_config(
        config, conf.tutorials_path("fashion_mnist_tf_keras"), 1)
    trials = exp.experiment_trials(experiment_id)
    assert len(trials) == 1

Example #14

0

Show file

File: test_transformers.py Project: wbwatkinson/determined

def test_squad_amp() -> None:
    example_path = conf.model_hub_examples_path("huggingface/question-answering")
    config = conf.load_config(os.path.join(example_path, "squad.yaml"))
    config = conf.set_slots_per_trial(config, 8)
    config = conf.set_global_batch_size(config, 64)
    config = conf.set_max_length(config, {"batches": 200})
    config = conf.set_hparam(config, "use_apex_amp", True)
    config = set_docker_image(config)

    exp.run_basic_test_with_temp_config(config, example_path, 1)

Example #15

0

Show file

File: test_transformers.py Project: wbwatkinson/determined

def test_text_classification_xnli_amp() -> None:
    example_path = conf.model_hub_examples_path("huggingface/text-classification")
    config = conf.load_config(os.path.join(example_path, "xnli_config.yaml"))
    config = conf.set_slots_per_trial(config, 8)
    config = conf.set_global_batch_size(config, 128)
    config = conf.set_max_length(config, {"batches": 200})
    config = conf.set_hparam(config, "use_apex_amp", True)
    config = set_docker_image(config)

    exp.run_basic_test_with_temp_config(config, example_path, 1)

Example #16

0

Show file

def test_pytorch_parallel() -> None:
    config = conf.load_config(conf.tutorials_path("mnist_pytorch/const.yaml"))
    config = conf.set_slots_per_trial(config, 8)
    config = conf.set_max_length(config, {"batches": 200})
    config = conf.set_tensor_auto_tuning(config, True)
    config = conf.set_perform_initial_validation(config, True)

    exp_id = exp.run_basic_test_with_temp_config(
        config, conf.tutorials_path("mnist_pytorch"), 1, has_zeroth_step=True)
    exp.assert_performed_initial_validation(exp_id)

Example #17

0

Show file

File: test_system.py Project: neilconway/determined

def test_pytorch_parallel() -> None:
    config = conf.load_config(conf.official_examples_path("trial/mnist_pytorch/const.yaml"))
    config = conf.set_slots_per_trial(config, 8)
    config = conf.set_native_parallel(config, False)
    config = conf.set_max_length(config, {"batches": 200})
    config = conf.set_tensor_auto_tuning(config, True)

    exp.run_basic_test_with_temp_config(
        config, conf.official_examples_path("trial/mnist_pytorch"), 1
    )

Example #18

0

Show file

def test_tensorpack_native_parallel() -> None:
    config = conf.load_config(
        conf.official_examples_path("trial/mnist_tp/const.yaml"))
    config = conf.set_slots_per_trial(config, 8)
    config = conf.set_native_parallel(config, True)
    config = conf.set_max_length(config, {"batches": 32})

    experiment_id = exp.run_basic_test_with_temp_config(
        config, conf.official_examples_path("trial/mnist_tp"), 1)
    trials = exp.experiment_trials(experiment_id)
    assert len(trials) == 1

Example #19

0

Show file

File: test_tf_estimator.py Project: sumerzhang/determined

def test_estimator_when_detecting_gpus() -> None:
    config = conf.load_config(
        conf.fixtures_path("estimator_gpu_detection/const.yaml"))
    config = conf.set_slots_per_trial(config, 8)
    config = conf.set_max_length(config, {"batches": 200})

    exp.run_basic_test_with_temp_config(
        config,
        conf.fixtures_path("estimator_gpu_detection/"),
        1,
        has_zeroth_step=False)

Example #20

0

Show file

def test_tf_keras_mnist_parallel() -> None:
    config = conf.load_config(conf.official_examples_path("fashion_mnist_tf_keras/const.yaml"))
    config = conf.set_slots_per_trial(config, 8)
    config = conf.set_native_parallel(config, False)
    config = conf.set_max_steps(config, 2)

    experiment_id = exp.run_basic_test_with_temp_config(
        config, conf.official_examples_path("fashion_mnist_tf_keras"), 1
    )
    trials = exp.experiment_trials(experiment_id)
    assert len(trials) == 1

Example #21

0

Show file

def test_tf_keras_single_gpu(tf2: bool) -> None:
    config = conf.load_config(conf.official_examples_path("cifar10_cnn_tf_keras/const.yaml"))
    config = conf.set_slots_per_trial(config, 1)
    config = conf.set_max_steps(config, 2)
    config = conf.set_tf2_image(config) if tf2 else conf.set_tf1_image(config)

    experiment_id = exp.run_basic_test_with_temp_config(
        config, conf.official_examples_path("cifar10_cnn_tf_keras"), 1
    )
    trials = exp.experiment_trials(experiment_id)
    assert len(trials) == 1

Example #22

0

Show file

File: test_tf_estimator.py Project: justin-determined-ai/determined

def test_on_trial_close_callback() -> None:
    config = conf.load_config(conf.fixtures_path("estimator_no_op/const.yaml"))
    config = conf.set_slots_per_trial(config, 8)
    config = conf.set_max_length(config, {"batches": 3})

    exp_id = exp.run_basic_test_with_temp_config(
        config, conf.fixtures_path("estimator_no_op"), 1)

    assert exp.check_if_string_present_in_trial_logs(
        exp.experiment_trials(exp_id)[0].trial.id,
        "rank 0 has completed on_trial_close")

Example #23

0

Show file

File: test_tf_estimator.py Project: justin-determined-ai/determined

def test_mnist_estimator_const_parallel(tf2: bool) -> None:
    config = conf.load_config(
        conf.fixtures_path("mnist_estimator/single-multi-slot.yaml"))
    config = conf.set_slots_per_trial(config, 8)
    config = conf.set_max_length(config, {"batches": 200})
    config = conf.set_tf2_image(config) if tf2 else conf.set_tf1_image(config)
    config = conf.set_perform_initial_validation(config, True)

    exp_id = exp.run_basic_test_with_temp_config(
        config, conf.cv_examples_path("mnist_estimator"), 1)
    exp.assert_performed_initial_validation(exp_id)

Example #24

0

Show file

File: test_pytorch.py Project: shiyuann/determined

def test_pytorch_gan_parallel() -> None:
    config = conf.load_config(
        conf.gan_examples_path("gan_mnist_pytorch/const.yaml"))
    config = conf.set_max_length(config, {"batches": 200})
    config = conf.set_slots_per_trial(config, 8)

    experiment_id = exp.run_basic_test_with_temp_config(
        config, conf.gan_examples_path("gan_mnist_pytorch"), 1)
    trials = exp.experiment_trials(experiment_id)
    (Determined(conf.make_master_url()).get_trial(
        trials[0]["id"]).select_checkpoint(latest=True).load(
            map_location="cpu"))

Example #25

0

Show file

def test_tensorpack_parallel(aggregation_frequency: int) -> None:
    config = conf.load_config(
        conf.official_examples_path("mnist_tp/const.yaml"))
    config = conf.set_slots_per_trial(config, 8)
    config = conf.set_native_parallel(config, False)
    config = conf.set_max_steps(config, 2)
    config = conf.set_aggregation_frequency(config, aggregation_frequency)

    experiment_id = exp.run_basic_test_with_temp_config(
        config, conf.official_examples_path("mnist_tp"), 1)
    trials = exp.experiment_trials(experiment_id)
    assert len(trials) == 1

Example #26

0

Show file

def test_pytorch_const_parallel(aggregation_frequency: int, use_amp: bool) -> None:
    if use_amp and aggregation_frequency > 1:
        pytest.skip("Mixed precision is not support with aggregation frequency > 1.")

    config = conf.load_config(conf.tutorials_path("mnist_pytorch/const.yaml"))
    config = conf.set_slots_per_trial(config, 8)
    config = conf.set_max_length(config, {"batches": 200})
    config = conf.set_aggregation_frequency(config, aggregation_frequency)
    if use_amp:
        config = conf.set_amp_level(config, "O1")

    exp.run_basic_test_with_temp_config(config, conf.tutorials_path("mnist_pytorch"), 1)

Example #27

0

Show file

File: test_tf_keras.py Project: Joiike/determined

def test_tf_keras_parallel(aggregation_frequency: int, tf2: bool) -> None:
    config = conf.load_config(
        conf.cv_examples_path("cifar10_tf_keras/const.yaml"))
    config = conf.set_slots_per_trial(config, 8)
    config = conf.set_max_length(config, {"batches": 200})
    config = conf.set_aggregation_frequency(config, aggregation_frequency)
    config = conf.set_tf2_image(config) if tf2 else conf.set_tf1_image(config)

    experiment_id = exp.run_basic_test_with_temp_config(
        config, conf.cv_examples_path("cifar10_tf_keras"), 1)
    trials = exp.experiment_trials(experiment_id)
    assert len(trials) == 1

Example #28

0

Show file

File: test_pytorch.py Project: stjordanis/determined

def test_pytorch_cifar10_parallel() -> None:
    config = conf.load_config(
        conf.official_examples_path("trial/cifar10_cnn_pytorch/const.yaml"))
    config = conf.set_max_steps(config, 2)
    config = conf.set_slots_per_trial(config, 8)

    experiment_id = exp.run_basic_test_with_temp_config(
        config, conf.official_examples_path("trial/cifar10_cnn_pytorch"), 1)
    trials = exp.experiment_trials(experiment_id)
    nn = (Determined(conf.make_master_url()).get_trial(
        trials[0]["id"]).select_checkpoint(latest=True).load())
    assert isinstance(nn, torch.nn.Module)

Example #29

0

Show file

File: test_system.py Project: Srinivasaragavan/determined

def test_distributed_logging() -> None:
    config = conf.load_config(conf.fixtures_path("pytorch_no_op/const.yaml"))
    config = conf.set_slots_per_trial(config, 8)
    config = conf.set_max_length(config, {"batches": 1})

    e_id = exp.run_basic_test_with_temp_config(
        config, conf.fixtures_path("pytorch_no_op"), 1)
    t_id = exp.experiment_trials(e_id)[0]["id"]

    for i in range(config["resources"]["slots_per_trial"]):
        assert exp.check_if_string_present_in_trial_logs(
            t_id, "finished train_batch for rank {}".format(i))

Example #30

0

Show file

File: test_tf_keras.py Project: yshvrdhn/determined

def test_tf_keras_native_parallel(tf2: bool) -> None:
    config = conf.load_config(
        conf.official_examples_path("trial/cifar10_cnn_tf_keras/const.yaml"))
    config = conf.set_slots_per_trial(config, 8)
    config = conf.set_native_parallel(config, True)
    config = conf.set_max_length(config, {"batches": 200})
    config = conf.set_tf2_image(config) if tf2 else conf.set_tf1_image(config)

    experiment_id = exp.run_basic_test_with_temp_config(
        config, conf.official_examples_path("trial/cifar10_cnn_tf_keras"), 1)
    trials = exp.experiment_trials(experiment_id)
    assert len(trials) == 1