def test_mnist_estimator_const(tf2: bool) -> None: config = conf.load_config( conf.fixtures_path("mnist_estimator/single.yaml")) config = conf.set_tf2_image(config) if tf2 else conf.set_tf1_image(config) experiment_id = exp.run_basic_test_with_temp_config( config, conf.official_examples_path("mnist_estimator"), 1) trials = exp.experiment_trials(experiment_id) assert len(trials) == 1 # Check validation metrics. steps = trials[0]["steps"] assert len(steps) == 1 step = steps[0] assert "validation" in step v_metrics = step["validation"]["metrics"]["validation_metrics"] # GPU training is non-deterministic, but on CPU we can validate that we # reach a consistent result. if not cluster.running_on_gpu(): assert v_metrics["accuracy"] == 0.9125999808311462 # Check training metrics. full_trial_metrics = exp.trial_metrics(trials[0]["id"]) for step in full_trial_metrics["steps"]: metrics = step["metrics"] batch_metrics = metrics["batch_metrics"] assert len(batch_metrics) == 100 for batch_metric in batch_metrics: assert batch_metric["loss"] > 0
def test_cifar10_byol_pytorch_accuracy() -> None: config = conf.load_config( conf.cv_examples_path("byol_pytorch/const-cifar10.yaml")) # Limit convergence time, since was running over 30 minute limit. config["searcher"]["max_length"]["epochs"] = 20 config["hyperparameters"]["classifier"]["train_epochs"] = 1 config = conf.set_random_seed(config, 1591280374) experiment_id = exp.run_basic_test_with_temp_config( config, conf.cv_examples_path("byol_pytorch"), 1) trials = exp.experiment_trials(experiment_id) trial_metrics = exp.trial_metrics(trials[0].trial.id) validation_accuracies = [ step["validation"]["metrics"]["validation_metrics"]["test_accuracy"] for step in trial_metrics["steps"] if step.get("validation") ] # Accuracy reachable within limited convergence time -- goes higher given full training. target_accuracy = 0.40 assert max(validation_accuracies) > target_accuracy, ( "cifar10_byol_pytorch did not reach minimum target accuracy {} in {} steps." " full validation accuracy history: {}".format( target_accuracy, len(trial_metrics["steps"]), validation_accuracies))
def test_mnist_tp_accuracy() -> None: config = conf.load_config(conf.official_examples_path("mnist_tp/const.yaml")) experiment_id = exp.run_basic_test_with_temp_config( config, conf.official_examples_path("mnist_tp"), 1 ) trials = exp.experiment_trials(experiment_id) trial_metrics = exp.trial_metrics(trials[0]["id"]) validation_errors = [] # TODO (DET-3082): The validation metric names were modified by our trial reporting # from accuracy to val_accuracy. We should probably remove the added prefix so # the metric name is as specified. validation_errors = [ step["validation"]["metrics"]["validation_metrics"]["val_accuracy"] for step in trial_metrics["steps"] if step.get("validation") ] target_accuracy = 0.95 assert max(validation_errors) > target_accuracy, ( "mnist_tp did not reach minimum target accuracy {} in {} steps." " full validation error history: {}".format( target_accuracy, len(trial_metrics["steps"]), validation_errors ) )
def test_metric_gathering() -> None: """ Confirm that metrics are gathered from the trial the way that we expect. """ experiment_id = exp.run_basic_test( conf.fixtures_path("metric_maker/const.yaml"), conf.fixtures_path("metric_maker"), 1) trials = exp.experiment_trials(experiment_id) assert len(trials) == 1 # Read the structure of the metrics directly from the config file config = conf.load_config(conf.fixtures_path("metric_maker/const.yaml")) base_value = config["hyperparameters"]["starting_base_value"] gain_per_batch = config["hyperparameters"]["gain_per_batch"] training_structure = config["hyperparameters"]["training_structure"]["val"] validation_structure = config["hyperparameters"]["validation_structure"][ "val"] scheduling_unit = 100 # Check training metrics. full_trial_metrics = exp.trial_metrics(trials[0].trial.id) batches_trained = 0 for step in full_trial_metrics["steps"]: metrics = step["metrics"] actual = metrics["batch_metrics"] assert len(actual) == scheduling_unit first_base_value = base_value + batches_trained batch_values = first_base_value + gain_per_batch * np.arange( scheduling_unit) expected = [ structure_to_metrics(value, training_structure) for value in batch_values ] assert structure_equal(expected, actual) batches_trained = step["total_batches"] # Check validation metrics. validation_workloads = exp.workloads_with_validation(trials[0].workloads) for validation in validation_workloads: actual = validation.metrics batches_trained = validation.totalBatches value = base_value + batches_trained expected = structure_to_metrics(value, validation_structure) assert structure_equal(expected, actual)
def test_metric_gathering() -> None: """ Confirm that metrics are gathered from the trial the way that we expect. """ experiment_id = exp.run_basic_test( conf.fixtures_path("metric_maker/const.yaml"), conf.fixtures_path("metric_maker"), 1) trials = exp.experiment_trials(experiment_id) assert len(trials) == 1 # Read the structure of the metrics directly from the config file config = conf.load_config(conf.fixtures_path("metric_maker/const.yaml")) base_value = config["hyperparameters"]["starting_base_value"] gain_per_batch = config["hyperparameters"]["gain_per_batch"] training_structure = config["hyperparameters"]["training_structure"]["val"] validation_structure = config["hyperparameters"]["validation_structure"][ "val"] scheduling_unit = 100 # Check training metrics. full_trial_metrics = exp.trial_metrics(trials[0]["id"]) for step in full_trial_metrics["steps"]: metrics = step["metrics"] assert metrics["num_inputs"] == scheduling_unit actual = metrics["batch_metrics"] assert len(actual) == scheduling_unit first_base_value = base_value + (step["id"] - 1) * scheduling_unit batch_values = first_base_value + gain_per_batch * np.arange( scheduling_unit) expected = [ structure_to_metrics(value, training_structure) for value in batch_values ] assert structure_equal(expected, actual) # Check validation metrics. for step in trials[0]["steps"]: validation = step["validation"] metrics = validation["metrics"] actual = metrics["validation_metrics"] value = base_value + step["id"] * scheduling_unit expected = structure_to_metrics(value, validation_structure) assert structure_equal(expected, actual)
def test_mnist_pytorch_accuracy() -> None: config = conf.load_config(conf.tutorials_path("mnist_pytorch/const.yaml")) experiment_id = exp.run_basic_test_with_temp_config( config, conf.tutorials_path("mnist_pytorch"), 1) trials = exp.experiment_trials(experiment_id) trial_metrics = exp.trial_metrics(trials[0]["id"]) validation_errors = [ step["validation"]["metrics"]["validation_metrics"]["accuracy"] for step in trial_metrics["steps"] if step.get("validation") ] target_accuracy = 0.97 assert max(validation_errors) > target_accuracy, ( "mnist_pytorch did not reach minimum target accuracy {} in {} steps." " full validation error history: {}".format( target_accuracy, len(trial_metrics["steps"]), validation_errors))
def test_cifar10_pytorch_accuracy() -> None: config = conf.load_config( conf.official_examples_path("cifar10_cnn_pytorch/const.yaml")) experiment_id = exp.run_basic_test_with_temp_config( config, conf.official_examples_path("cifar10_cnn_pytorch"), 1) trials = exp.experiment_trials(experiment_id) trial_metrics = exp.trial_metrics(trials[0]["id"]) validation_errors = [ step.validation.metrics["validation_metrics"]["validation_accuracy"] for step in trial_metrics["steps"] if step.validation ] target_accuracy = 0.745 assert max(validation_errors) > target_accuracy, ( "cifar10_cnn_pytorch did not reach minimum target accuracy {} in {} steps." " full validation error history: {}".format( target_accuracy, len(trial_metrics["steps"]), validation_errors))
def test_mnist_tf_layers_accuracy() -> None: config = conf.load_config( conf.cv_examples_path("mnist_tf_layers/const.yaml")) experiment_id = exp.run_basic_test_with_temp_config( config, conf.cv_examples_path("mnist_tf_layers"), 1) trials = exp.experiment_trials(experiment_id) trial_metrics = exp.trial_metrics(trials[0]["id"]) validation_errors = [ step["validation"]["metrics"]["validation_metrics"]["error"] for step in trial_metrics["steps"] if step.get("validation") ] target_error = 0.04 assert min(validation_errors) < target_error, ( "mnist_estimator did not reach minimum target error {} in {} steps." " full validation error history: {}".format( target_error, len(trial_metrics["steps"]), validation_errors))
def test_nan_metrics() -> None: """ Confirm that NaN and Infinity metrics are gathered from the trial. """ exp_id = exp.run_basic_test(conf.fixtures_path("metric_maker/nans.yaml"), conf.fixtures_path("metric_maker"), 1) trials = exp.experiment_trials(exp_id) config = conf.load_config(conf.fixtures_path("metric_maker/nans.yaml")) base_value = config["hyperparameters"]["starting_base_value"] gain_per_batch = config["hyperparameters"]["gain_per_batch"] # Infinity and NaN cannot be processed in the YAML->JSON deserializer # Add them to expected values here training_structure = config["hyperparameters"]["training_structure"]["val"] training_structure["inf"] = "Infinity" training_structure["nan"] = "NaN" training_structure["nanarray"] = ["NaN", "NaN"] validation_structure = config["hyperparameters"]["validation_structure"][ "val"] validation_structure["neg_inf"] = "-Infinity" # Check training metrics. full_trial_metrics = exp.trial_metrics(trials[0].trial.id) batches_trained = 0 for step in full_trial_metrics["steps"]: metrics = step["metrics"] actual = metrics["batch_metrics"] first_base_value = base_value + batches_trained batch_values = first_base_value + gain_per_batch * np.arange(5) expected = [ structure_to_metrics(value, training_structure) for value in batch_values ] assert structure_equal(expected, actual) batches_trained = step["total_batches"] # Check validation metrics. validation_workloads = exp.workloads_with_validation(trials[0].workloads) for validation in validation_workloads: actual = validation.metrics batches_trained = validation.totalBatches expected = structure_to_metrics(base_value, validation_structure) assert structure_equal(expected, actual)
def test_imagenet_pytorch() -> None: config = conf.load_config( conf.tutorials_path("imagenet_pytorch/const_cifar.yaml")) experiment_id = exp.run_basic_test_with_temp_config( config, conf.tutorials_path("imagenet_pytorch"), 1) trials = exp.experiment_trials(experiment_id) trial_metrics = exp.trial_metrics(trials[0].trial.id) validation_loss = [ step["validation"]["metrics"]["validation_metrics"]["val_loss"] for step in trial_metrics["steps"] if step.get("validation") ] target_loss = 1.55 assert max(validation_loss) < target_loss, ( "imagenet_pytorch did not reach minimum target loss {} in {} steps." " full validation accuracy history: {}".format( target_loss, len(trial_metrics["steps"]), validation_loss))
def test_fashion_mnist_tf_keras() -> None: config = conf.load_config( conf.official_examples_path("trial/fashion_mnist_tf_keras/const.yaml")) config = conf.set_random_seed(config, 1591110586) experiment_id = exp.run_basic_test_with_temp_config( config, conf.official_examples_path("trial/fashion_mnist_tf_keras"), 1) trials = exp.experiment_trials(experiment_id) trial_metrics = exp.trial_metrics(trials[0]["id"]) validation_errors = [ step["validation"]["metrics"]["validation_metrics"]["val_accuracy"] for step in trial_metrics["steps"] if step.get("validation") ] accuracy = 0.85 assert max(validation_errors) > accuracy, ( "fashion_mnist_tf_keras did not reach minimum target accuracy {} in {} steps." " full validation error history: {}".format( accuracy, len(trial_metrics["steps"]), validation_errors))
def test_fasterrcnn_coco_pytorch_accuracy() -> None: config = conf.load_config( conf.cv_examples_path("fasterrcnn_coco_pytorch/const.yaml")) config = conf.set_random_seed(config, 1590497309) experiment_id = exp.run_basic_test_with_temp_config( config, conf.cv_examples_path("fasterrcnn_coco_pytorch"), 1) trials = exp.experiment_trials(experiment_id) trial_metrics = exp.trial_metrics(trials[0]["id"]) validation_iou = [ step["validation"]["metrics"]["validation_metrics"]["val_avg_iou"] for step in trial_metrics["steps"] if step.get("validation") ] target_iou = 0.42 assert max(validation_iou) > target_iou, ( "fasterrcnn_coco_pytorch did not reach minimum target accuracy {} in {} steps." " full validation avg_iou history: {}".format( target_iou, len(trial_metrics["steps"]), validation_iou))
def test_data_layer_mnist_estimator_accuracy() -> None: config = conf.load_config( conf.data_layer_examples_path("data_layer_mnist_estimator/const.yaml")) experiment_id = exp.run_basic_test_with_temp_config( config, conf.data_layer_examples_path("data_layer_mnist_estimator"), 1) trials = exp.experiment_trials(experiment_id) trial_metrics = exp.trial_metrics(trials[0]["id"]) validation_accuracies = [ step["validation"]["metrics"]["validation_metrics"]["accuracy"] for step in trial_metrics["steps"] if step.get("validation") ] target_accuracy = 0.93 assert max(validation_accuracies) > target_accuracy, ( "data_layer_mnist_estimator did not reach minimum target accuracy {} in {} steps." " full validation accuracy history: {}".format( target_accuracy, len(trial_metrics["steps"]), validation_accuracies))
def test_unets_tf_keras_accuracy() -> None: config = conf.load_config( conf.cv_examples_path("unets_tf_keras/const.yaml")) config = conf.set_random_seed(config, 1591280374) experiment_id = exp.run_basic_test_with_temp_config( config, conf.cv_examples_path("unets_tf_keras"), 1) trials = exp.experiment_trials(experiment_id) trial_metrics = exp.trial_metrics(trials[0]["id"]) validation_accuracies = [ step["validation"]["metrics"]["validation_metrics"]["val_accuracy"] for step in trial_metrics["steps"] if step.get("validation") ] target_accuracy = 0.85 assert max(validation_accuracies) > target_accuracy, ( "unets_tf_keras did not reach minimum target accuracy {} in {} steps." " full validation accuracy history: {}".format( target_accuracy, len(trial_metrics["steps"]), validation_accuracies))
def test_cifar10_tf_keras_accuracy() -> None: config = conf.load_config( conf.cv_examples_path("cifar10_tf_keras/const.yaml")) config = conf.set_random_seed(config, 1591110586) experiment_id = exp.run_basic_test_with_temp_config( config, conf.cv_examples_path("cifar10_tf_keras"), 1, None, 6000) trials = exp.experiment_trials(experiment_id) trial_metrics = exp.trial_metrics(trials[0].trial.id) validation_accuracies = [ step["validation"]["metrics"]["validation_metrics"] ["val_categorical_accuracy"] for step in trial_metrics["steps"] if step.get("validation") ] target_accuracy = 0.73 assert max(validation_accuracies) > target_accuracy, ( "cifar10_pytorch did not reach minimum target accuracy {} in {} steps." " full validation accuracy history: {}".format( target_accuracy, len(trial_metrics["steps"]), validation_accuracies))
def test_text_classification_tf_keras_accuracy() -> None: config = conf.load_config( conf.features_examples_path("text_classification_tf_keras/const.yaml") ) experiment_id = exp.run_basic_test_with_temp_config( config, conf.features_examples_path("text_classification_tf_keras"), 1 ) trials = exp.experiment_trials(experiment_id) trial_metrics = exp.trial_metrics(trials[0]["id"]) validation_accuracies = [ step["validation"]["metrics"]["validation_metrics"]["val_sparse_categorical_accuracy"] for step in trial_metrics["steps"] if step.get("validation") ] target_accuracy = 0.95 assert max(validation_accuracies) > target_accuracy, ( "text_classification_tf_keras did not reach minimum target accuracy {} in {} steps." " full validation accuracy history: {}".format( target_accuracy, len(trial_metrics["steps"]), validation_accuracies ) )