예제 #1
0
def test_tuning_kmeans_identical_dataset_algorithm_tuner_raw(
        sagemaker_session, kmeans_train_set, kmeans_estimator,
        hyperparameter_ranges):
    parent_tuning_job_name = unique_name_from_base("kmeans-identical",
                                                   max_length=32)
    child_tuning_job_name = unique_name_from_base("c-kmeans-identical",
                                                  max_length=32)
    _tune(
        kmeans_estimator,
        kmeans_train_set,
        job_name=parent_tuning_job_name,
        hyperparameter_ranges=hyperparameter_ranges,
        max_parallel_jobs=1,
        max_jobs=1,
    )
    child_tuner = _tune(
        kmeans_estimator,
        kmeans_train_set,
        job_name=child_tuning_job_name,
        hyperparameter_ranges=hyperparameter_ranges,
        warm_start_config=WarmStartConfig(
            warm_start_type=WarmStartTypes.IDENTICAL_DATA_AND_ALGORITHM,
            parents=[parent_tuning_job_name],
        ),
        max_parallel_jobs=1,
        max_jobs=1,
    )

    child_warm_start_config_response = WarmStartConfig.from_job_desc(
        sagemaker_session.sagemaker_client.describe_hyper_parameter_tuning_job(
            HyperParameterTuningJobName=child_tuning_job_name)
        ["WarmStartConfig"])

    assert child_warm_start_config_response.type == child_tuner.warm_start_config.type
    assert child_warm_start_config_response.parents == child_tuner.warm_start_config.parents
def test_warm_start_config_init(type, parents):
    warm_start_config = WarmStartConfig(warm_start_type=type, parents=parents)

    assert warm_start_config.type == type, "Warm start type initialization failed."
    assert warm_start_config.parents == set(parents), "Warm start parents config initialization failed."

    warm_start_config_req = warm_start_config.to_input_req()
    assert warm_start_config.type == WarmStartTypes(warm_start_config_req["WarmStartType"])
    for parent in warm_start_config_req["ParentHyperParameterTuningJobs"]:
        assert parent['HyperParameterTuningJobName'] in parents
def test_attach_with_warm_start_config(sagemaker_session):
    warm_start_config = WarmStartConfig(warm_start_type=WarmStartTypes.TRANSFER_LEARNING, parents={"p1", "p2"})
    job_details = copy.deepcopy(TUNING_JOB_DETAILS)
    job_details["WarmStartConfig"] = warm_start_config.to_input_req()

    sagemaker_session.sagemaker_client.describe_hyper_parameter_tuning_job = Mock(name='describe_tuning_job',
                                                                                  return_value=job_details)

    tuner = HyperparameterTuner.attach(JOB_NAME, sagemaker_session=sagemaker_session)
    assert tuner.warm_start_config.type == warm_start_config.type
    assert tuner.warm_start_config.parents == warm_start_config.parents
예제 #4
0
def test_tuning_kmeans_identical_dataset_algorithm_tuner(
        sagemaker_session, kmeans_train_set, kmeans_estimator,
        hyperparameter_ranges):
    """Tests Identical dataset and algorithm use case with one parent and child job launched with
    .identical_dataset_and_algorithm_tuner()"""

    parent_tuning_job_name = unique_name_from_base("km-iden1-parent",
                                                   max_length=32)
    child_tuning_job_name = unique_name_from_base("km-iden1-child",
                                                  max_length=32)

    parent_tuner = _tune(
        kmeans_estimator,
        kmeans_train_set,
        job_name=parent_tuning_job_name,
        hyperparameter_ranges=hyperparameter_ranges,
    )

    child_tuner = parent_tuner.identical_dataset_and_algorithm_tuner()
    _tune(
        kmeans_estimator,
        kmeans_train_set,
        job_name=child_tuning_job_name,
        tuner=child_tuner,
        max_parallel_jobs=1,
        max_jobs=1,
    )

    child_warm_start_config_response = WarmStartConfig.from_job_desc(
        sagemaker_session.sagemaker_client.describe_hyper_parameter_tuning_job(
            HyperParameterTuningJobName=child_tuning_job_name)
        ["WarmStartConfig"])

    assert child_warm_start_config_response.type == child_tuner.warm_start_config.type
    assert child_warm_start_config_response.parents == child_tuner.warm_start_config.parents
def test_tune_warm_start(sagemaker_session, warm_start_type, parents):

    def assert_create_tuning_job_request(**kwrags):
        assert kwrags["HyperParameterTuningJobConfig"] == SAMPLE_TUNING_JOB_REQUEST["HyperParameterTuningJobConfig"]
        assert kwrags["HyperParameterTuningJobName"] == "dummy-tuning-1"
        assert kwrags["TrainingJobDefinition"] == SAMPLE_TUNING_JOB_REQUEST["TrainingJobDefinition"]
        assert kwrags["WarmStartConfig"] == {
            'WarmStartType': warm_start_type,
            'ParentHyperParameterTuningJobs': [{'HyperParameterTuningJobName': parent} for parent in parents]
        }

    sagemaker_session.sagemaker_client.create_hyper_parameter_tuning_job.side_effect = assert_create_tuning_job_request
    sagemaker_session.tune(job_name="dummy-tuning-1",
                           strategy="Bayesian",
                           objective_type="Maximize",
                           objective_metric_name="val-score",
                           max_jobs=100,
                           max_parallel_jobs=5,
                           parameter_ranges=SAMPLE_PARAM_RANGES,
                           static_hyperparameters=STATIC_HPs,
                           image="dummy-image-1",
                           input_mode="File",
                           metric_definitions=SAMPLE_METRIC_DEF,
                           role=EXPANDED_ROLE,
                           input_config=SAMPLE_INPUT,
                           output_config=SAMPLE_OUTPUT,
                           resource_config=RESOURCE_CONFIG,
                           stop_condition=SAMPLE_STOPPING_CONDITION,
                           tags=None,
                           warm_start_config=WarmStartConfig(warm_start_type=WarmStartTypes(warm_start_type),
                                                             parents=parents).to_input_req())
예제 #6
0
def test_create_transfer_learning_tuner(sagemaker_session,
                                        kmeans_train_set,
                                        kmeans_estimator,
                                        hyperparameter_ranges):
    """Tests Transfer learning use case with two parents and child job launched with
        create_transfer_learning_tuner() """
    parent_tuning_job_name_1 = name_from_base("km-tran2-parent1", max_length=32, short=True)
    parent_tuning_job_name_2 = name_from_base("km-tran2-parent2", max_length=32, short=True)
    child_tuning_job_name = name_from_base("km-tran2-child", max_length=32, short=True)

    parent_tuner_1 = _tune(kmeans_estimator, kmeans_train_set, job_name=parent_tuning_job_name_1,
                           hyperparameter_ranges=hyperparameter_ranges, max_parallel_jobs=1, max_jobs=1)

    parent_tuner_2 = _tune(kmeans_estimator, kmeans_train_set, job_name=parent_tuning_job_name_2,
                           hyperparameter_ranges=hyperparameter_ranges, max_parallel_jobs=1, max_jobs=1)

    child_tuner = create_transfer_learning_tuner(parent=parent_tuner_1.latest_tuning_job.name,
                                                 sagemaker_session=sagemaker_session,
                                                 estimator=kmeans_estimator,
                                                 additional_parents={parent_tuner_2.latest_tuning_job.name})
    _tune(kmeans_estimator, kmeans_train_set, job_name=child_tuning_job_name, tuner=child_tuner)

    child_warm_start_config_response = WarmStartConfig.from_job_desc(
        sagemaker_session.sagemaker_client.describe_hyper_parameter_tuning_job(
            HyperParameterTuningJobName=child_tuning_job_name)["WarmStartConfig"])

    assert child_warm_start_config_response.type == child_tuner.warm_start_config.type
    assert child_warm_start_config_response.parents == child_tuner.warm_start_config.parents
def test_prepare_warm_start_config_cls(warm_start_config_req):
    warm_start_config = WarmStartConfig.from_job_desc(warm_start_config_req)

    assert warm_start_config.type == WarmStartTypes(
        warm_start_config_req["WarmStartType"]), "Warm start type initialization failed."

    for p in warm_start_config_req["ParentHyperParameterTuningJobs"]:
        assert p['HyperParameterTuningJobName'] in warm_start_config.parents, \
            "Warm start parents config initialization failed."
예제 #8
0
def test_transfer_learning_tuner(sagemaker_session,
                                 kmeans_train_set,
                                 kmeans_estimator,
                                 hyperparameter_ranges):
    """Tests Transfer learning use case with one parent and child job launched with
        .transfer_learning_tuner() """

    parent_tuning_job_name = name_from_base("km-tran1-parent", max_length=32, short=True)
    child_tuning_job_name = name_from_base("km-tran1-child", max_length=32, short=True)

    parent_tuner = _tune(kmeans_estimator, kmeans_train_set, job_name=parent_tuning_job_name,
                         hyperparameter_ranges=hyperparameter_ranges, max_jobs=1, max_parallel_jobs=1)

    child_tuner = parent_tuner.transfer_learning_tuner()
    _tune(kmeans_estimator, kmeans_train_set, job_name=child_tuning_job_name, tuner=child_tuner, max_parallel_jobs=1,
          max_jobs=1)

    child_warm_start_config_response = WarmStartConfig.from_job_desc(
        sagemaker_session.sagemaker_client.describe_hyper_parameter_tuning_job(
            HyperParameterTuningJobName=child_tuning_job_name)["WarmStartConfig"])

    assert child_warm_start_config_response.type == child_tuner.warm_start_config.type
    assert child_warm_start_config_response.parents == child_tuner.warm_start_config.parents
예제 #9
0
def test_prepare_warm_start_config_cls_negative(warm_start_config_req):
    warm_start_config = WarmStartConfig.from_job_desc(warm_start_config_req)
    assert warm_start_config is None, "Warm start config should be None for invalid type/parents"
예제 #10
0
def test_warm_start_config_init_negative(type, parents):
    with pytest.raises(ValueError):
        WarmStartConfig(warm_start_type=type, parents=parents)
    ROLE,
    TRAIN_INSTANCE_COUNT,
    TRAIN_INSTANCE_TYPE,
    output_path="s3://bucket/prefix",
    sagemaker_session=SAGEMAKER_SESSION,
)
ESTIMATOR_TWO = PCA(
    ROLE,
    TRAIN_INSTANCE_COUNT,
    TRAIN_INSTANCE_TYPE,
    NUM_COMPONENTS,
    sagemaker_session=SAGEMAKER_SESSION,
)

WARM_START_CONFIG = WarmStartConfig(
    warm_start_type=WarmStartTypes.IDENTICAL_DATA_AND_ALGORITHM, parents={"p1", "p2", "p3"}
)

TUNING_JOB_DETAILS = {
    "HyperParameterTuningJobConfig": {
        "ResourceLimits": {"MaxParallelTrainingJobs": 1, "MaxNumberOfTrainingJobs": 1},
        "HyperParameterTuningJobObjective": {
            "MetricName": OBJECTIVE_METRIC_NAME,
            "Type": "Minimize",
        },
        "Strategy": "Bayesian",
        "ParameterRanges": {
            "CategoricalParameterRanges": [],
            "ContinuousParameterRanges": [],
            "IntegerParameterRanges": [
                {
예제 #12
0
def test_single_algo_tuning_step(sagemaker_session):
    data_source_uri_parameter = ParameterString(
        name="DataSourceS3Uri", default_value=f"s3://{BUCKET}/train_manifest")
    estimator = Estimator(
        image_uri=IMAGE_URI,
        role=ROLE,
        instance_count=1,
        instance_type="ml.c5.4xlarge",
        profiler_config=ProfilerConfig(system_monitor_interval_millis=500),
        rules=[],
        sagemaker_session=sagemaker_session,
    )
    estimator.set_hyperparameters(
        num_layers=18,
        image_shape="3,224,224",
        num_classes=257,
        num_training_samples=15420,
        mini_batch_size=128,
        epochs=10,
        optimizer="sgd",
        top_k="2",
        precision_dtype="float32",
        augmentation_type="crop",
    )

    hyperparameter_ranges = {
        "learning_rate": ContinuousParameter(0.0001, 0.05),
        "momentum": ContinuousParameter(0.0, 0.99),
        "weight_decay": ContinuousParameter(0.0, 0.99),
    }

    tuner = HyperparameterTuner(
        estimator=estimator,
        objective_metric_name="val:accuracy",
        hyperparameter_ranges=hyperparameter_ranges,
        objective_type="Maximize",
        max_jobs=5,
        max_parallel_jobs=2,
        early_stopping_type="OFF",
        strategy="Bayesian",
        warm_start_config=WarmStartConfig(
            warm_start_type=WarmStartTypes.IDENTICAL_DATA_AND_ALGORITHM,
            parents=set(["parent-hpo"]),
        ),
    )

    inputs = TrainingInput(s3_data=data_source_uri_parameter)

    tuning_step = TuningStep(
        name="MyTuningStep",
        tuner=tuner,
        inputs=inputs,
    )

    assert tuning_step.to_request() == {
        "Name": "MyTuningStep",
        "Type": "Tuning",
        "Arguments": {
            "HyperParameterTuningJobConfig": {
                "Strategy": "Bayesian",
                "ResourceLimits": {
                    "MaxNumberOfTrainingJobs": 5,
                    "MaxParallelTrainingJobs": 2
                },
                "TrainingJobEarlyStoppingType": "OFF",
                "HyperParameterTuningJobObjective": {
                    "Type": "Maximize",
                    "MetricName": "val:accuracy",
                },
                "ParameterRanges": {
                    "ContinuousParameterRanges": [
                        {
                            "Name": "learning_rate",
                            "MinValue": "0.0001",
                            "MaxValue": "0.05",
                            "ScalingType": "Auto",
                        },
                        {
                            "Name": "momentum",
                            "MinValue": "0.0",
                            "MaxValue": "0.99",
                            "ScalingType": "Auto",
                        },
                        {
                            "Name": "weight_decay",
                            "MinValue": "0.0",
                            "MaxValue": "0.99",
                            "ScalingType": "Auto",
                        },
                    ],
                    "CategoricalParameterRanges": [],
                    "IntegerParameterRanges": [],
                },
            },
            "TrainingJobDefinition": {
                "StaticHyperParameters": {
                    "num_layers": "18",
                    "image_shape": "3,224,224",
                    "num_classes": "257",
                    "num_training_samples": "15420",
                    "mini_batch_size": "128",
                    "epochs": "10",
                    "optimizer": "sgd",
                    "top_k": "2",
                    "precision_dtype": "float32",
                    "augmentation_type": "crop",
                },
                "RoleArn":
                "DummyRole",
                "OutputDataConfig": {
                    "S3OutputPath": "s3://my-bucket/"
                },
                "ResourceConfig": {
                    "InstanceCount": 1,
                    "InstanceType": "ml.c5.4xlarge",
                    "VolumeSizeInGB": 30,
                },
                "StoppingCondition": {
                    "MaxRuntimeInSeconds": 86400
                },
                "AlgorithmSpecification": {
                    "TrainingInputMode": "File",
                    "TrainingImage": "fakeimage",
                },
                "InputDataConfig": [{
                    "DataSource": {
                        "S3DataSource": {
                            "S3DataType": "S3Prefix",
                            "S3Uri": data_source_uri_parameter,
                            "S3DataDistributionType": "FullyReplicated",
                        }
                    },
                    "ChannelName": "training",
                }],
            },
            "WarmStartConfig": {
                "WarmStartType":
                "IdenticalDataAndAlgorithm",
                "ParentHyperParameterTuningJobs": [{
                    "HyperParameterTuningJobName":
                    "parent-hpo",
                }],
            },
        },
    }

    assert tuning_step.properties.HyperParameterTuningJobName.expr == {
        "Get": "Steps.MyTuningStep.HyperParameterTuningJobName"
    }
    assert tuning_step.properties.TrainingJobSummaries[
        0].TrainingJobName.expr == {
            "Get": "Steps.MyTuningStep.TrainingJobSummaries[0].TrainingJobName"
        }
    assert tuning_step.get_top_model_s3_uri(
        0, "my-bucket", "my-prefix"
    ).expr == {
        "Std:Join": {
            "On":
            "/",
            "Values": [
                "s3:/",
                "my-bucket",
                "my-prefix",
                {
                    "Get":
                    "Steps.MyTuningStep.TrainingJobSummaries[0].TrainingJobName"
                },
                "output/model.tar.gz",
            ],
        }
    }