def test_tuning_kmeans_identical_dataset_algorithm_tuner_raw( sagemaker_session, kmeans_train_set, kmeans_estimator, hyperparameter_ranges): parent_tuning_job_name = unique_name_from_base("kmeans-identical", max_length=32) child_tuning_job_name = unique_name_from_base("c-kmeans-identical", max_length=32) _tune( kmeans_estimator, kmeans_train_set, job_name=parent_tuning_job_name, hyperparameter_ranges=hyperparameter_ranges, max_parallel_jobs=1, max_jobs=1, ) child_tuner = _tune( kmeans_estimator, kmeans_train_set, job_name=child_tuning_job_name, hyperparameter_ranges=hyperparameter_ranges, warm_start_config=WarmStartConfig( warm_start_type=WarmStartTypes.IDENTICAL_DATA_AND_ALGORITHM, parents=[parent_tuning_job_name], ), max_parallel_jobs=1, max_jobs=1, ) child_warm_start_config_response = WarmStartConfig.from_job_desc( sagemaker_session.sagemaker_client.describe_hyper_parameter_tuning_job( HyperParameterTuningJobName=child_tuning_job_name) ["WarmStartConfig"]) assert child_warm_start_config_response.type == child_tuner.warm_start_config.type assert child_warm_start_config_response.parents == child_tuner.warm_start_config.parents
def test_tune_warm_start(sagemaker_session, warm_start_type, parents): def assert_create_tuning_job_request(**kwrags): assert kwrags["HyperParameterTuningJobConfig"] == SAMPLE_TUNING_JOB_REQUEST["HyperParameterTuningJobConfig"] assert kwrags["HyperParameterTuningJobName"] == "dummy-tuning-1" assert kwrags["TrainingJobDefinition"] == SAMPLE_TUNING_JOB_REQUEST["TrainingJobDefinition"] assert kwrags["WarmStartConfig"] == { 'WarmStartType': warm_start_type, 'ParentHyperParameterTuningJobs': [{'HyperParameterTuningJobName': parent} for parent in parents] } sagemaker_session.sagemaker_client.create_hyper_parameter_tuning_job.side_effect = assert_create_tuning_job_request sagemaker_session.tune(job_name="dummy-tuning-1", strategy="Bayesian", objective_type="Maximize", objective_metric_name="val-score", max_jobs=100, max_parallel_jobs=5, parameter_ranges=SAMPLE_PARAM_RANGES, static_hyperparameters=STATIC_HPs, image="dummy-image-1", input_mode="File", metric_definitions=SAMPLE_METRIC_DEF, role=EXPANDED_ROLE, input_config=SAMPLE_INPUT, output_config=SAMPLE_OUTPUT, resource_config=RESOURCE_CONFIG, stop_condition=SAMPLE_STOPPING_CONDITION, tags=None, warm_start_config=WarmStartConfig(warm_start_type=WarmStartTypes(warm_start_type), parents=parents).to_input_req())
def test_warm_start_config_init(type, parents): warm_start_config = WarmStartConfig(warm_start_type=type, parents=parents) assert warm_start_config.type == type, "Warm start type initialization failed." assert warm_start_config.parents == set(parents), "Warm start parents config initialization failed." warm_start_config_req = warm_start_config.to_input_req() assert warm_start_config.type == WarmStartTypes(warm_start_config_req["WarmStartType"]) for parent in warm_start_config_req["ParentHyperParameterTuningJobs"]: assert parent['HyperParameterTuningJobName'] in parents
def test_attach_with_warm_start_config(sagemaker_session): warm_start_config = WarmStartConfig(warm_start_type=WarmStartTypes.TRANSFER_LEARNING, parents={"p1", "p2"}) job_details = copy.deepcopy(TUNING_JOB_DETAILS) job_details["WarmStartConfig"] = warm_start_config.to_input_req() sagemaker_session.sagemaker_client.describe_hyper_parameter_tuning_job = Mock(name='describe_tuning_job', return_value=job_details) tuner = HyperparameterTuner.attach(JOB_NAME, sagemaker_session=sagemaker_session) assert tuner.warm_start_config.type == warm_start_config.type assert tuner.warm_start_config.parents == warm_start_config.parents
def test_warm_start_config_init_negative(type, parents): with pytest.raises(ValueError): WarmStartConfig(warm_start_type=type, parents=parents)
ROLE, TRAIN_INSTANCE_COUNT, TRAIN_INSTANCE_TYPE, output_path="s3://bucket/prefix", sagemaker_session=SAGEMAKER_SESSION, ) ESTIMATOR_TWO = PCA( ROLE, TRAIN_INSTANCE_COUNT, TRAIN_INSTANCE_TYPE, NUM_COMPONENTS, sagemaker_session=SAGEMAKER_SESSION, ) WARM_START_CONFIG = WarmStartConfig( warm_start_type=WarmStartTypes.IDENTICAL_DATA_AND_ALGORITHM, parents={"p1", "p2", "p3"} ) TUNING_JOB_DETAILS = { "HyperParameterTuningJobConfig": { "ResourceLimits": {"MaxParallelTrainingJobs": 1, "MaxNumberOfTrainingJobs": 1}, "HyperParameterTuningJobObjective": { "MetricName": OBJECTIVE_METRIC_NAME, "Type": "Minimize", }, "Strategy": "Bayesian", "ParameterRanges": { "CategoricalParameterRanges": [], "ContinuousParameterRanges": [], "IntegerParameterRanges": [ {
def test_single_algo_tuning_step(sagemaker_session): data_source_uri_parameter = ParameterString( name="DataSourceS3Uri", default_value=f"s3://{BUCKET}/train_manifest") estimator = Estimator( image_uri=IMAGE_URI, role=ROLE, instance_count=1, instance_type="ml.c5.4xlarge", profiler_config=ProfilerConfig(system_monitor_interval_millis=500), rules=[], sagemaker_session=sagemaker_session, ) estimator.set_hyperparameters( num_layers=18, image_shape="3,224,224", num_classes=257, num_training_samples=15420, mini_batch_size=128, epochs=10, optimizer="sgd", top_k="2", precision_dtype="float32", augmentation_type="crop", ) hyperparameter_ranges = { "learning_rate": ContinuousParameter(0.0001, 0.05), "momentum": ContinuousParameter(0.0, 0.99), "weight_decay": ContinuousParameter(0.0, 0.99), } tuner = HyperparameterTuner( estimator=estimator, objective_metric_name="val:accuracy", hyperparameter_ranges=hyperparameter_ranges, objective_type="Maximize", max_jobs=5, max_parallel_jobs=2, early_stopping_type="OFF", strategy="Bayesian", warm_start_config=WarmStartConfig( warm_start_type=WarmStartTypes.IDENTICAL_DATA_AND_ALGORITHM, parents=set(["parent-hpo"]), ), ) inputs = TrainingInput(s3_data=data_source_uri_parameter) tuning_step = TuningStep( name="MyTuningStep", tuner=tuner, inputs=inputs, ) assert tuning_step.to_request() == { "Name": "MyTuningStep", "Type": "Tuning", "Arguments": { "HyperParameterTuningJobConfig": { "Strategy": "Bayesian", "ResourceLimits": { "MaxNumberOfTrainingJobs": 5, "MaxParallelTrainingJobs": 2 }, "TrainingJobEarlyStoppingType": "OFF", "HyperParameterTuningJobObjective": { "Type": "Maximize", "MetricName": "val:accuracy", }, "ParameterRanges": { "ContinuousParameterRanges": [ { "Name": "learning_rate", "MinValue": "0.0001", "MaxValue": "0.05", "ScalingType": "Auto", }, { "Name": "momentum", "MinValue": "0.0", "MaxValue": "0.99", "ScalingType": "Auto", }, { "Name": "weight_decay", "MinValue": "0.0", "MaxValue": "0.99", "ScalingType": "Auto", }, ], "CategoricalParameterRanges": [], "IntegerParameterRanges": [], }, }, "TrainingJobDefinition": { "StaticHyperParameters": { "num_layers": "18", "image_shape": "3,224,224", "num_classes": "257", "num_training_samples": "15420", "mini_batch_size": "128", "epochs": "10", "optimizer": "sgd", "top_k": "2", "precision_dtype": "float32", "augmentation_type": "crop", }, "RoleArn": "DummyRole", "OutputDataConfig": { "S3OutputPath": "s3://my-bucket/" }, "ResourceConfig": { "InstanceCount": 1, "InstanceType": "ml.c5.4xlarge", "VolumeSizeInGB": 30, }, "StoppingCondition": { "MaxRuntimeInSeconds": 86400 }, "AlgorithmSpecification": { "TrainingInputMode": "File", "TrainingImage": "fakeimage", }, "InputDataConfig": [{ "DataSource": { "S3DataSource": { "S3DataType": "S3Prefix", "S3Uri": data_source_uri_parameter, "S3DataDistributionType": "FullyReplicated", } }, "ChannelName": "training", }], }, "WarmStartConfig": { "WarmStartType": "IdenticalDataAndAlgorithm", "ParentHyperParameterTuningJobs": [{ "HyperParameterTuningJobName": "parent-hpo", }], }, }, } assert tuning_step.properties.HyperParameterTuningJobName.expr == { "Get": "Steps.MyTuningStep.HyperParameterTuningJobName" } assert tuning_step.properties.TrainingJobSummaries[ 0].TrainingJobName.expr == { "Get": "Steps.MyTuningStep.TrainingJobSummaries[0].TrainingJobName" } assert tuning_step.get_top_model_s3_uri( 0, "my-bucket", "my-prefix" ).expr == { "Std:Join": { "On": "/", "Values": [ "s3:/", "my-bucket", "my-prefix", { "Get": "Steps.MyTuningStep.TrainingJobSummaries[0].TrainingJobName" }, "output/model.tar.gz", ], } }