def test_multi_estimator_tuning(sagemaker_session, estimator_fm, estimator_knn, data_set, cpu_instance_type): tuner = HyperparameterTuner.create( base_tuning_job_name=BASE_TUNING_JOB_NAME, estimator_dict={ ESTIMATOR_FM: estimator_fm, ESTIMATOR_KNN: estimator_knn }, objective_metric_name_dict={ ESTIMATOR_FM: OBJECTIVE_METRIC_NAME_FM, ESTIMATOR_KNN: OBJECTIVE_METRIC_NAME_KNN, }, hyperparameter_ranges_dict={ ESTIMATOR_FM: HYPER_PARAMETER_RANGES_FM, ESTIMATOR_KNN: HYPER_PARAMETER_RANGES_KNN, }, strategy=STRATEGY, objective_type=OBJECTIVE_TYPE, max_jobs=MAX_JOBS, max_parallel_jobs=MAX_PARALLEL_JOBS, tags=TAGS, ) _fit_tuner(sagemaker_session, tuner) _retrieve_analytics(sagemaker_session, tuner.latest_tuning_job.name) tuner_attached = _attach_tuner(sagemaker_session, tuner.latest_tuning_job.name) _deploy_and_predict(sagemaker_session, tuner_attached, data_set, cpu_instance_type)
def test_multi_algo_tuning_step(sagemaker_session): data_source_uri_parameter = ParameterString( name="DataSourceS3Uri", default_value=f"s3://{BUCKET}/train_manifest") instance_count = ParameterInteger(name="InstanceCount", default_value=1) estimator = Estimator( image_uri=IMAGE_URI, role=ROLE, instance_count=instance_count, instance_type="ml.c5.4xlarge", profiler_config=ProfilerConfig(system_monitor_interval_millis=500), rules=[], sagemaker_session=sagemaker_session, max_retry_attempts=10, ) estimator.set_hyperparameters( num_layers=18, image_shape="3,224,224", num_classes=257, num_training_samples=15420, mini_batch_size=128, epochs=10, optimizer="sgd", top_k="2", precision_dtype="float32", augmentation_type="crop", ) initial_lr_param = ParameterString(name="InitialLR", default_value="0.0001") hyperparameter_ranges = { "learning_rate": ContinuousParameter(initial_lr_param, 0.05), "momentum": ContinuousParameter(0.0, 0.99), "weight_decay": ContinuousParameter(0.0, 0.99), } tuner = HyperparameterTuner.create( estimator_dict={ "estimator-1": estimator, "estimator-2": estimator, }, objective_type="Minimize", objective_metric_name_dict={ "estimator-1": "val:loss", "estimator-2": "val:loss", }, hyperparameter_ranges_dict={ "estimator-1": hyperparameter_ranges, "estimator-2": hyperparameter_ranges, }, ) inputs = TrainingInput(s3_data=data_source_uri_parameter) tuning_step = TuningStep( name="MyTuningStep", tuner=tuner, inputs={ "estimator-1": inputs, "estimator-2": inputs, }, ) assert tuning_step.to_request() == { "Name": "MyTuningStep", "Type": "Tuning", "Arguments": { "HyperParameterTuningJobConfig": { "Strategy": "Bayesian", "ResourceLimits": { "MaxNumberOfTrainingJobs": 1, "MaxParallelTrainingJobs": 1 }, "TrainingJobEarlyStoppingType": "Off", }, "TrainingJobDefinitions": [ { "StaticHyperParameters": { "num_layers": "18", "image_shape": "3,224,224", "num_classes": "257", "num_training_samples": "15420", "mini_batch_size": "128", "epochs": "10", "optimizer": "sgd", "top_k": "2", "precision_dtype": "float32", "augmentation_type": "crop", }, "RoleArn": "DummyRole", "OutputDataConfig": { "S3OutputPath": "s3://my-bucket/" }, "ResourceConfig": { "InstanceCount": 1, "InstanceType": "ml.c5.4xlarge", "VolumeSizeInGB": 30, }, "StoppingCondition": { "MaxRuntimeInSeconds": 86400 }, "AlgorithmSpecification": { "TrainingInputMode": "File", "TrainingImage": "fakeimage", }, "InputDataConfig": [{ "DataSource": { "S3DataSource": { "S3DataType": "S3Prefix", "S3Uri": data_source_uri_parameter, "S3DataDistributionType": "FullyReplicated", } }, "ChannelName": "training", }], "DefinitionName": "estimator-1", "TuningObjective": { "Type": "Minimize", "MetricName": "val:loss" }, "HyperParameterRanges": { "ContinuousParameterRanges": [ { "Name": "learning_rate", "MinValue": initial_lr_param, "MaxValue": "0.05", "ScalingType": "Auto", }, { "Name": "momentum", "MinValue": "0.0", "MaxValue": "0.99", "ScalingType": "Auto", }, { "Name": "weight_decay", "MinValue": "0.0", "MaxValue": "0.99", "ScalingType": "Auto", }, ], "CategoricalParameterRanges": [], "IntegerParameterRanges": [], }, "RetryStrategy": { "MaximumRetryAttempts": 10, }, }, { "StaticHyperParameters": { "num_layers": "18", "image_shape": "3,224,224", "num_classes": "257", "num_training_samples": "15420", "mini_batch_size": "128", "epochs": "10", "optimizer": "sgd", "top_k": "2", "precision_dtype": "float32", "augmentation_type": "crop", }, "RoleArn": "DummyRole", "OutputDataConfig": { "S3OutputPath": "s3://my-bucket/" }, "ResourceConfig": { "InstanceCount": 1, "InstanceType": "ml.c5.4xlarge", "VolumeSizeInGB": 30, }, "StoppingCondition": { "MaxRuntimeInSeconds": 86400 }, "AlgorithmSpecification": { "TrainingInputMode": "File", "TrainingImage": "fakeimage", }, "InputDataConfig": [{ "DataSource": { "S3DataSource": { "S3DataType": "S3Prefix", "S3Uri": data_source_uri_parameter, "S3DataDistributionType": "FullyReplicated", } }, "ChannelName": "training", }], "DefinitionName": "estimator-2", "TuningObjective": { "Type": "Minimize", "MetricName": "val:loss" }, "HyperParameterRanges": { "ContinuousParameterRanges": [ { "Name": "learning_rate", "MinValue": initial_lr_param, "MaxValue": "0.05", "ScalingType": "Auto", }, { "Name": "momentum", "MinValue": "0.0", "MaxValue": "0.99", "ScalingType": "Auto", }, { "Name": "weight_decay", "MinValue": "0.0", "MaxValue": "0.99", "ScalingType": "Auto", }, ], "CategoricalParameterRanges": [], "IntegerParameterRanges": [], }, "RetryStrategy": { "MaximumRetryAttempts": 10, }, }, ], }, }
def test_tuning_step_with_multi_algo_tuner(pipeline_session, entry_point): pytorch_estimator = PyTorch( entry_point=entry_point, role=sagemaker.get_execution_role(), framework_version="1.5.0", py_version="py3", instance_count=1, instance_type="ml.m5.xlarge", sagemaker_session=pipeline_session, enable_sagemaker_metrics=True, max_retry_attempts=3, hyperparameters={ "static-hp": "hp1", "train_size": "1280" }, ) tuner = HyperparameterTuner.create( estimator_dict={ "estimator-1": pytorch_estimator, "estimator-2": pytorch_estimator, }, objective_metric_name_dict={ "estimator-1": "test:acc", "estimator-2": "test:acc", }, hyperparameter_ranges_dict={ "estimator-1": { "batch-size": IntegerParameter(64, 128) }, "estimator-2": { "batch-size": IntegerParameter(256, 512) }, }, metric_definitions_dict={ "estimator-1": [{ "Name": "test:acc", "Regex": "Overall test accuracy: (.*?);" }], "estimator-2": [{ "Name": "test:acc", "Regex": "Overall test accuracy: (.*?);" }], }, ) input_path = f"s3://{pipeline_session.default_bucket()}/training-data" inputs = { "estimator-1": TrainingInput(s3_data=input_path), "estimator-2": TrainingInput(s3_data=input_path), } step_args = tuner.fit( inputs=inputs, include_cls_metadata={ "estimator-1": False, "estimator-2": False, }, ) step = TuningStep( name="MyTuningStep", step_args=step_args, ) pipeline = Pipeline( name="MyPipeline", steps=[step], sagemaker_session=pipeline_session, ) assert json.loads(pipeline.definition())["Steps"][0] == { "Name": "MyTuningStep", "Type": "Tuning", "Arguments": step_args, }
def test_tuning_multi_algos( sagemaker_session, role, cpu_instance_type, pipeline_name, region_name, script_dir, athena_dataset_definition, ): base_dir = os.path.join(DATA_DIR, "pytorch_mnist") entry_point = os.path.join(base_dir, "mnist.py") input_path = sagemaker_session.upload_data( path=os.path.join(base_dir, "training"), key_prefix="integ-test-data/pytorch_mnist/training", ) instance_count = ParameterInteger(name="InstanceCount", default_value=1) instance_type = ParameterString(name="InstanceType", default_value="ml.m5.xlarge") input_data = f"s3://sagemaker-sample-data-{region_name}/processing/census/census-income.csv" sklearn_processor = SKLearnProcessor( framework_version="0.20.0", instance_type=instance_type, instance_count=instance_count, base_job_name="test-sklearn", sagemaker_session=sagemaker_session, role=role, ) property_file = PropertyFile(name="DataAttributes", output_name="attributes", path="attributes.json") step_process = ProcessingStep( name="my-process", display_name="ProcessingStep", description="description for Processing step", processor=sklearn_processor, inputs=[ ProcessingInput(source=input_data, destination="/opt/ml/processing/input"), ProcessingInput(dataset_definition=athena_dataset_definition), ], outputs=[ ProcessingOutput(output_name="train_data", source="/opt/ml/processing/train"), ProcessingOutput(output_name="attributes", source="/opt/ml/processing/attributes.json"), ], property_files=[property_file], code=os.path.join(script_dir, "preprocessing.py"), ) static_hp_1 = ParameterString(name="InstanceType", default_value="ml.m5.xlarge") json_get_hp = JsonGet(step_name=step_process.name, property_file=property_file, json_path="train_size") pytorch_estimator = PyTorch( entry_point=entry_point, role=role, framework_version="1.5.0", py_version="py3", instance_count=instance_count, instance_type=instance_type, sagemaker_session=sagemaker_session, enable_sagemaker_metrics=True, max_retry_attempts=3, hyperparameters={ "static-hp": static_hp_1, "train_size": json_get_hp }, ) min_batch_size = ParameterString(name="MinBatchSize", default_value="64") max_batch_size = json_get_hp tuner = HyperparameterTuner.create( estimator_dict={ "estimator-1": pytorch_estimator, "estimator-2": pytorch_estimator, }, objective_metric_name_dict={ "estimator-1": "test:acc", "estimator-2": "test:acc", }, hyperparameter_ranges_dict={ "estimator-1": { "batch-size": IntegerParameter(min_batch_size, max_batch_size) }, "estimator-2": { "batch-size": IntegerParameter(min_batch_size, max_batch_size) }, }, metric_definitions_dict={ "estimator-1": [{ "Name": "test:acc", "Regex": "Overall test accuracy: (.*?);" }], "estimator-2": [{ "Name": "test:acc", "Regex": "Overall test accuracy: (.*?);" }], }, ) inputs = { "estimator-1": TrainingInput(s3_data=input_path), "estimator-2": TrainingInput(s3_data=input_path), } step_tune = TuningStep( name="my-tuning-step", tuner=tuner, inputs=inputs, ) pipeline = Pipeline( name=pipeline_name, parameters=[ instance_count, instance_type, min_batch_size, max_batch_size ], steps=[step_process, step_tune], sagemaker_session=sagemaker_session, ) try: response = pipeline.create(role) create_arn = response["PipelineArn"] assert re.match( rf"arn:aws:sagemaker:{region_name}:\d{{12}}:pipeline/{pipeline_name}", create_arn, ) execution = pipeline.start(parameters={}) assert re.match( rf"arn:aws:sagemaker:{region_name}:\d{{12}}:pipeline/{pipeline_name}/execution/", execution.arn, ) finally: try: pipeline.delete() except Exception: pass