def test_pytorch_11_const(aggregation_frequency: int, using_k8s: bool) -> None: config = conf.load_config( conf.fixtures_path("mnist_pytorch/const-pytorch11.yaml")) config = conf.set_aggregation_frequency(config, aggregation_frequency) if using_k8s: pod_spec = { "metadata": { "labels": { "ci": "testing" } }, "spec": { "containers": [{ "name": "determined-container", "volumeMounts": [{ "name": "temp1", "mountPath": "/random" }], }], "volumes": [{ "name": "temp1", "emptyDir": {} }], }, } config = conf.set_pod_spec(config, pod_spec) exp.run_basic_test_with_temp_config(config, conf.tutorials_path("mnist_pytorch"), 1)
def test_pytorch_11_const(aggregation_frequency: int) -> None: config = conf.load_config( conf.fixtures_path("mnist_pytorch/const-pytorch11.yaml")) config = conf.set_aggregation_frequency(config, aggregation_frequency) exp.run_basic_test_with_temp_config( config, conf.official_examples_path("mnist_pytorch"), 1)
def test_tf_keras_parallel(aggregation_frequency: int, tf2: bool) -> None: config = conf.load_config( conf.cv_examples_path("cifar10_tf_keras/const.yaml")) config = conf.set_slots_per_trial(config, 8) config = conf.set_max_length(config, {"batches": 200}) config = conf.set_aggregation_frequency(config, aggregation_frequency) config = conf.set_tf2_image(config) if tf2 else conf.set_tf1_image(config) experiment_id = exp.run_basic_test_with_temp_config( config, conf.cv_examples_path("cifar10_tf_keras"), 1) trials = exp.experiment_trials(experiment_id) assert len(trials) == 1
def test_tensorpack_parallel(aggregation_frequency: int) -> None: config = conf.load_config( conf.official_examples_path("trial/mnist_tp/const.yaml")) config = conf.set_slots_per_trial(config, 8) config = conf.set_native_parallel(config, False) config = conf.set_max_length(config, {"batches": 32}) config = conf.set_aggregation_frequency(config, aggregation_frequency) experiment_id = exp.run_basic_test_with_temp_config( config, conf.official_examples_path("trial/mnist_tp"), 1) trials = exp.experiment_trials(experiment_id) assert len(trials) == 1
def test_pytorch_const_parallel(aggregation_frequency: int, use_amp: bool) -> None: if use_amp and aggregation_frequency > 1: pytest.skip("Mixed precision is not support with aggregation frequency > 1.") config = conf.load_config(conf.tutorials_path("mnist_pytorch/const.yaml")) config = conf.set_slots_per_trial(config, 8) config = conf.set_max_length(config, {"batches": 200}) config = conf.set_aggregation_frequency(config, aggregation_frequency) if use_amp: config = conf.set_amp_level(config, "O1") exp.run_basic_test_with_temp_config(config, conf.tutorials_path("mnist_pytorch"), 1)
def test_pytorch_const_parallel(aggregation_frequency: int, use_amp: bool) -> None: config = conf.load_config( conf.official_examples_path("mnist_pytorch/const.yaml")) config = conf.set_slots_per_trial(config, 8) config = conf.set_native_parallel(config, False) config = conf.set_max_steps(config, 2) config = conf.set_aggregation_frequency(config, aggregation_frequency) if use_amp: config = conf.set_amp_level(config, "O1") exp.run_basic_test_with_temp_config( config, conf.official_examples_path("mnist_pytorch"), 1)
def test_tf_keras_parallel(aggregation_frequency: int, tf2: bool) -> None: config = conf.load_config(conf.official_examples_path("cifar10_cnn_tf_keras/const.yaml")) config = conf.set_slots_per_trial(config, 8) config = conf.set_native_parallel(config, False) config = conf.set_max_steps(config, 2) config = conf.set_aggregation_frequency(config, aggregation_frequency) config = conf.set_tf2_image(config) if tf2 else conf.set_tf1_image(config) experiment_id = exp.run_basic_test_with_temp_config( config, conf.official_examples_path("cifar10_cnn_tf_keras"), 1 ) trials = exp.experiment_trials(experiment_id) assert len(trials) == 1
def test_pytorch_const_parallel(aggregation_frequency: int, use_amp: bool) -> None: if use_amp and aggregation_frequency > 1: pytest.skip( "Mixed precision is not support with aggregation frequency > 1.") config = conf.load_config( conf.official_examples_path("trial/mnist_pytorch/const.yaml")) config = conf.set_slots_per_trial(config, 8) config = conf.set_native_parallel(config, False) config = conf.set_max_steps(config, 2) config = conf.set_aggregation_frequency(config, aggregation_frequency) if use_amp: config = conf.set_amp_level(config, "O1") exp.run_basic_test_with_temp_config( config, conf.official_examples_path("trial/mnist_pytorch"), 1)
def test_tf_keras_parallel( aggregation_frequency: int, tf2: bool, collect_trial_profiles: Callable[[int], None]) -> None: config = conf.load_config( conf.cv_examples_path("cifar10_tf_keras/const.yaml")) config = conf.set_slots_per_trial(config, 8) config = conf.set_max_length(config, {"batches": 200}) config = conf.set_aggregation_frequency(config, aggregation_frequency) config = conf.set_tf2_image(config) if tf2 else conf.set_tf1_image(config) config = conf.set_profiling_enabled(config) experiment_id = exp.run_basic_test_with_temp_config( config, conf.cv_examples_path("cifar10_tf_keras"), 1) trials = exp.experiment_trials(experiment_id) assert len(trials) == 1 # Test exporting a checkpoint. export_and_load_model(experiment_id) collect_trial_profiles(trials[0].trial.id) # Check on record/batch counts we emitted in logs. validation_size = 10000 global_batch_size = config["hyperparameters"]["global_batch_size"] num_workers = config.get("resources", {}).get("slots_per_trial", 1) global_batch_size = config["hyperparameters"]["global_batch_size"] scheduling_unit = config.get("scheduling_unit", 100) per_slot_batch_size = global_batch_size // num_workers exp_val_batches = (validation_size + (per_slot_batch_size - 1)) // per_slot_batch_size patterns = [ # Expect two copies of matching training reports. f"trained: {scheduling_unit * global_batch_size} records.*in {scheduling_unit} batches", f"trained: {scheduling_unit * global_batch_size} records.*in {scheduling_unit} batches", f"validated: {validation_size} records.*in {exp_val_batches} batches", ] exp.assert_patterns_in_trial_logs(trials[0].trial.id, patterns)
def test_pytorch_11_const( aggregation_frequency: int, using_k8s: bool, collect_trial_profiles: Callable[[int], None]) -> None: config = conf.load_config( conf.fixtures_path("mnist_pytorch/const-pytorch11.yaml")) config = conf.set_aggregation_frequency(config, aggregation_frequency) config = conf.set_profiling_enabled(config) if using_k8s: pod_spec = { "metadata": { "labels": { "ci": "testing" } }, "spec": { "containers": [{ "name": "determined-container", "volumeMounts": [{ "name": "temp1", "mountPath": "/random" }], }], "volumes": [{ "name": "temp1", "emptyDir": {} }], }, } config = conf.set_pod_spec(config, pod_spec) experiment_id = exp.run_basic_test_with_temp_config( config, conf.tutorials_path("mnist_pytorch"), 1) trial_id = exp.experiment_trials(experiment_id)[0].trial.id collect_trial_profiles(trial_id)