def test_fail_on_first_validation() -> None: error_log = "failed on first validation" config_obj = conf.load_config(conf.fixtures_path("no_op/single.yaml")) config_obj["hyperparameters"]["fail_on_first_validation"] = error_log exp.run_failure_test_with_temp_config( config_obj, conf.fixtures_path("no_op"), error_log, )
def test_fail_on_chechpoint_save() -> None: error_log = "failed on checkpoint save" config_obj = conf.load_config(conf.fixtures_path("no_op/single.yaml")) config_obj["hyperparameters"]["fail_on_chechpoint_save"] = error_log exp.run_failure_test_with_temp_config( config_obj, conf.fixtures_path("no_op"), error_log, )
def test_fail_on_preclose_chechpoint_save() -> None: error_log = "failed on checkpoint save" config_obj = conf.load_config(conf.fixtures_path("no_op/single.yaml")) config_obj["hyperparameters"]["fail_on_chechpoint_save"] = error_log config_obj["searcher"]["max_length"] = {"batches": 1} config_obj["min_validation_period"] = {"batches": 1} config_obj["max_restarts"] = 1 exp.run_failure_test_with_temp_config( config_obj, conf.fixtures_path("no_op"), error_log, )
def test_launch_layer_exit( collect_trial_profiles: Callable[[int], None]) -> None: config = conf.load_config( conf.cv_examples_path("cifar10_pytorch/const.yaml")) config = conf.set_entrypoint( config, "python3 -m nonexistent_launch_module model_def:CIFARTrial") experiment_id = exp.run_failure_test_with_temp_config( config, conf.cv_examples_path("cifar10_pytorch")) trials = exp.experiment_trials(experiment_id) Determined(conf.make_master_url()).get_trial(trials[0].trial.id) collect_trial_profiles(trials[0].trial.id) assert exp.check_if_string_present_in_trial_logs( trials[0].trial.id, "container failed with non-zero exit code: 1")