コード例 #1
0
REGION = 'us-west-2'
BUCKET_NAME = 'Some-Bucket'
ROLE = 'myrole'
IMAGE_NAME = 'image'

TRAIN_INSTANCE_COUNT = 1
TRAIN_INSTANCE_TYPE = 'ml.c4.xlarge'
NUM_COMPONENTS = 5

SCRIPT_NAME = 'my_script.py'
FRAMEWORK_VERSION = '1.0.0'

INPUTS = 's3://mybucket/train'
OBJECTIVE_METRIC_NAME = 'mock_metric'
HYPERPARAMETER_RANGES = {
    'validated': ContinuousParameter(0, 5),
    'elizabeth': IntegerParameter(0, 5),
    'blank': CategoricalParameter([0, 5])
}
METRIC_DEFINTIONS = 'mock_metric_definitions'

TUNING_JOB_DETAILS = {
    'HyperParameterTuningJobConfig': {
        'ResourceLimits': {
            'MaxParallelTrainingJobs': 1,
            'MaxNumberOfTrainingJobs': 1
        },
        'HyperParameterTuningJobObjective': {
            'MetricName': OBJECTIVE_METRIC_NAME,
            'Type': 'Minimize'
        },
コード例 #2
0
def test_continuous_parameter():
    cont_param = ContinuousParameter(0.1, 1e-2)
    assert isinstance(cont_param, _ParameterRange)
    assert cont_param.__name__ is 'Continuous'
コード例 #3
0
ファイル: train.py プロジェクト: QuynhDV/ml-id-lab3
    best_model = fm.model_data
else:
    fm.set_hyperparameters(feature_dim=nbFeatures,
                           predictor_type='binary_classifier',
                           mini_batch_size=1000,
                           num_factors=64,
                           epochs=100)

    my_tuner = HyperparameterTuner(
        estimator=fm,
        objective_metric_name='test:binary_classification_accuracy',
        hyperparameter_ranges={
            'epochs': IntegerParameter(1, 200),
            'mini_batch_size': IntegerParameter(10, 10000),
            'factors_wd': ContinuousParameter(1e-8, 512)
        },
        max_jobs=4,
        max_parallel_jobs=4)

    my_tuner.fit({
        'train': train_data,
        'test': test_data
    },
                 include_cls_metadata=False)

    my_tuner.wait()

    #sm_session = sagemaker.Session()
    #best_log = sm_session.logs_for_job(my_tuner.best_training_job())
    #print(best_log)
コード例 #4
0
            "mini_batch_size": "200",
            "num_factors": "64",
            "predictor_type": 'regressor'
        }
    },
    "inputs": {
        "train": "s3://<s3-bucket>/prepare/train/train.protobuf",  # replace
    }
}

config["tune_model"] = {
    "tuner_config": {
        "objective_metric_name": "test:rmse",
        "objective_type": "Minimize",
        "hyperparameter_ranges": {
            "factors_lr": ContinuousParameter(0.0001, 0.2),
            "factors_init_sigma": ContinuousParameter(0.0001, 1)
        },
        "max_jobs": 20,
        "max_parallel_jobs": 2,
        "base_tuning_job_name": "hpo-recommender"
    },
    "inputs": {
        "train": "s3://<s3-bucket>/prepare/train/train.protobuf",  # replace
        "test":
        "s3://<s3-bucket>/prepare/validate/validate.protobuf"  # replace
    }
}

config["batch_transform"] = {
    "transform_config": {
コード例 #5
0
EXECUTION_ROLE = "SageMakerRole"

STRATEGY = "Bayesian"
OBJECTIVE_TYPE = "Minimize"

TAGS = [{"Key": "pysdk-test", "Value": "multi-algo-tuner"}]

ESTIMATOR_FM = "fm-one"
ESTIMATOR_KNN = "knn-two"

# TODO: change to use one of the new standard metrics for 1P algorithm
OBJECTIVE_METRIC_NAME_FM = "test:rmse"
OBJECTIVE_METRIC_NAME_KNN = "test:mse"

HYPER_PARAMETER_RANGES_FM = {
    "factors_wd": ContinuousParameter(1, 30),
    "factors_lr": ContinuousParameter(40, 50),
}
HYPER_PARAMETER_RANGES_KNN = {
    "k": IntegerParameter(3, 400),
    "sample_size": IntegerParameter(40, 550),
}

MAX_JOBS = 2
MAX_PARALLEL_JOBS = 2


@pytest.fixture(scope="module")
def data_set():
    pickle_args = {} if sys.version_info.major == 2 else {"encoding": "latin1"}
    with gzip.open(DATA_PATH, "rb") as f:
コード例 #6
0
    }, {
        'Name': 'validation:accuracy',
        'Regex': 'val_accuracy: ([0-9\\.]+)'
    }]
    sm_estimator = sagemaker_estimator(sagemaker_role, code_entry, code_dir,
                                       instance_type, instance_count,
                                       hyperparameters, metric_definitions)

    # sagemaker training job
    training_job_name = "tf-mnist-training-{}".format(
        strftime("%d-%H-%M-%S", gmtime()))
    sagemaker_training(sm_estimator, train_s3, training_job_name)

    # sagemaker tuning job
    hyperparameter_ranges = {
        'epochs':
        IntegerParameter(50, 200),
        'learning_rate':
        ContinuousParameter(0.0001, 0.1, scaling_type="Logarithmic"),
        'batch_size':
        IntegerParameter(32, 256),
        'drop_rate':
        ContinuousParameter(0.0, 1.0)
    }

    tuning_job_name = "tf-mnist-tuning-{}".format(
        strftime("%d-%H-%M-%S", gmtime()))
    max_jobs = 4
    max_parallel_jobs = 2
    #sagemaker_hyperparam_tuning(sm_estimator, train_s3, hyperparameter_ranges, metric_definitions, tuning_job_name, max_jobs, max_parallel_jobs)
コード例 #7
0
def test_tuning_chainer(sagemaker_session, chainer_latest_version,
                        chainer_latest_py_version, cpu_instance_type):
    with timeout(minutes=TUNING_DEFAULT_TIMEOUT_MINUTES):
        script_path = os.path.join(DATA_DIR, "chainer_mnist", "mnist.py")
        data_path = os.path.join(DATA_DIR, "chainer_mnist")

        estimator = Chainer(
            entry_point=script_path,
            role="SageMakerRole",
            framework_version=chainer_latest_version,
            py_version=chainer_latest_py_version,
            instance_count=1,
            instance_type=cpu_instance_type,
            sagemaker_session=sagemaker_session,
            hyperparameters={"epochs": 1},
        )

        train_input = estimator.sagemaker_session.upload_data(
            path=os.path.join(data_path, "train"),
            key_prefix="integ-test-data/chainer_mnist/train")
        test_input = estimator.sagemaker_session.upload_data(
            path=os.path.join(data_path, "test"),
            key_prefix="integ-test-data/chainer_mnist/test")

        hyperparameter_ranges = {"alpha": ContinuousParameter(0.001, 0.005)}

        objective_metric_name = "Validation-accuracy"
        metric_definitions = [{
            "Name":
            "Validation-accuracy",
            "Regex":
            r"\[J1\s+\d\.\d+\s+\d\.\d+\s+\d\.\d+\s+(\d\.\d+)",
        }]

        tuner = HyperparameterTuner(
            estimator,
            objective_metric_name,
            hyperparameter_ranges,
            metric_definitions,
            max_jobs=2,
            max_parallel_jobs=2,
        )

        tuning_job_name = unique_name_from_base("chainer", max_length=32)
        print("Started hyperparameter tuning job with name: {}".format(
            tuning_job_name))
        tuner.fit({
            "train": train_input,
            "test": test_input
        },
                  job_name=tuning_job_name)

    best_training_job = tuner.best_training_job()
    with timeout_and_delete_endpoint_by_name(best_training_job,
                                             sagemaker_session):
        predictor = tuner.deploy(1, cpu_instance_type)

        batch_size = 100
        data = np.zeros((batch_size, 784), dtype="float32")
        output = predictor.predict(data)
        assert len(output) == batch_size

        data = np.zeros((batch_size, 1, 28, 28), dtype="float32")
        output = predictor.predict(data)
        assert len(output) == batch_size

        data = np.zeros((batch_size, 28, 28), dtype="float32")
        output = predictor.predict(data)
        assert len(output) == batch_size
コード例 #8
0
ファイル: sage.py プロジェクト: opringle/vdcnn-gluonnlp
    # Assume permissions to provision services with Sagemaker role
    session = boto3.Session(profile_name=args.profile)
    sagemaker_session = sagemaker.Session(boto_session=session)
    local_session = sagemaker.local.local_session.LocalSession(
        boto_session=session)

    # Initialize variables
    custom_code_upload_location = 's3://{}/customcode/mxnet'.format(
        args.bucket_name)
    model_artifacts_location = 's3://{}/artifacts'.format(args.bucket_name)
    data_path = 's3://{}/{}'.format(args.bucket_name, args.data_dir)

    # Hyperparameters to search
    search_space = {
        'min_lr': ContinuousParameter(0.0001, 1),
        'max_lr': ContinuousParameter(0.0001, 1),
        'lr_cycle_epochs': ContinuousParameter(1, 10),
        'lr_increase_fraction': ContinuousParameter(0.1, 0.5),
        'momentum': ContinuousParameter(0.8, 0.999),
        'dropout': ContinuousParameter(0.01, 0.99),
        'temp_conv_filters': IntegerParameter(32, 128),
        'l2': ContinuousParameter(0.0, 0.2)
    }

    # Hyperparameters to fix
    hyperparameters = {
        'epochs': 30,
        'batch_size': 24,
        'num_buckets': 30,
        'batch_seq_ration': 0.5,
コード例 #9
0
def test_multi_algo_tuning_step(sagemaker_session):
    data_source_uri_parameter = ParameterString(
        name="DataSourceS3Uri", default_value=f"s3://{BUCKET}/train_manifest")
    estimator = Estimator(
        image_uri=IMAGE_URI,
        role=ROLE,
        instance_count=1,
        instance_type="ml.c5.4xlarge",
        profiler_config=ProfilerConfig(system_monitor_interval_millis=500),
        rules=[],
        sagemaker_session=sagemaker_session,
    )

    estimator.set_hyperparameters(
        num_layers=18,
        image_shape="3,224,224",
        num_classes=257,
        num_training_samples=15420,
        mini_batch_size=128,
        epochs=10,
        optimizer="sgd",
        top_k="2",
        precision_dtype="float32",
        augmentation_type="crop",
    )

    hyperparameter_ranges = {
        "learning_rate": ContinuousParameter(0.0001, 0.05),
        "momentum": ContinuousParameter(0.0, 0.99),
        "weight_decay": ContinuousParameter(0.0, 0.99),
    }

    tuner = HyperparameterTuner.create(
        estimator_dict={
            "estimator-1": estimator,
            "estimator-2": estimator,
        },
        objective_type="Minimize",
        objective_metric_name_dict={
            "estimator-1": "val:loss",
            "estimator-2": "val:loss",
        },
        hyperparameter_ranges_dict={
            "estimator-1": hyperparameter_ranges,
            "estimator-2": hyperparameter_ranges,
        },
    )

    inputs = TrainingInput(s3_data=data_source_uri_parameter)

    tuning_step = TuningStep(
        name="MyTuningStep",
        tuner=tuner,
        inputs={
            "estimator-1": inputs,
            "estimator-2": inputs,
        },
    )

    assert tuning_step.to_request() == {
        "Name": "MyTuningStep",
        "Type": "Tuning",
        "Arguments": {
            "HyperParameterTuningJobConfig": {
                "Strategy": "Bayesian",
                "ResourceLimits": {
                    "MaxNumberOfTrainingJobs": 1,
                    "MaxParallelTrainingJobs": 1
                },
                "TrainingJobEarlyStoppingType": "Off",
            },
            "TrainingJobDefinitions": [
                {
                    "StaticHyperParameters": {
                        "num_layers": "18",
                        "image_shape": "3,224,224",
                        "num_classes": "257",
                        "num_training_samples": "15420",
                        "mini_batch_size": "128",
                        "epochs": "10",
                        "optimizer": "sgd",
                        "top_k": "2",
                        "precision_dtype": "float32",
                        "augmentation_type": "crop",
                    },
                    "RoleArn":
                    "DummyRole",
                    "OutputDataConfig": {
                        "S3OutputPath": "s3://my-bucket/"
                    },
                    "ResourceConfig": {
                        "InstanceCount": 1,
                        "InstanceType": "ml.c5.4xlarge",
                        "VolumeSizeInGB": 30,
                    },
                    "StoppingCondition": {
                        "MaxRuntimeInSeconds": 86400
                    },
                    "AlgorithmSpecification": {
                        "TrainingInputMode": "File",
                        "TrainingImage": "fakeimage",
                    },
                    "InputDataConfig": [{
                        "DataSource": {
                            "S3DataSource": {
                                "S3DataType": "S3Prefix",
                                "S3Uri": data_source_uri_parameter,
                                "S3DataDistributionType": "FullyReplicated",
                            }
                        },
                        "ChannelName": "training",
                    }],
                    "DefinitionName":
                    "estimator-1",
                    "TuningObjective": {
                        "Type": "Minimize",
                        "MetricName": "val:loss"
                    },
                    "HyperParameterRanges": {
                        "ContinuousParameterRanges": [
                            {
                                "Name": "learning_rate",
                                "MinValue": "0.0001",
                                "MaxValue": "0.05",
                                "ScalingType": "Auto",
                            },
                            {
                                "Name": "momentum",
                                "MinValue": "0.0",
                                "MaxValue": "0.99",
                                "ScalingType": "Auto",
                            },
                            {
                                "Name": "weight_decay",
                                "MinValue": "0.0",
                                "MaxValue": "0.99",
                                "ScalingType": "Auto",
                            },
                        ],
                        "CategoricalParameterRanges": [],
                        "IntegerParameterRanges": [],
                    },
                },
                {
                    "StaticHyperParameters": {
                        "num_layers": "18",
                        "image_shape": "3,224,224",
                        "num_classes": "257",
                        "num_training_samples": "15420",
                        "mini_batch_size": "128",
                        "epochs": "10",
                        "optimizer": "sgd",
                        "top_k": "2",
                        "precision_dtype": "float32",
                        "augmentation_type": "crop",
                    },
                    "RoleArn":
                    "DummyRole",
                    "OutputDataConfig": {
                        "S3OutputPath": "s3://my-bucket/"
                    },
                    "ResourceConfig": {
                        "InstanceCount": 1,
                        "InstanceType": "ml.c5.4xlarge",
                        "VolumeSizeInGB": 30,
                    },
                    "StoppingCondition": {
                        "MaxRuntimeInSeconds": 86400
                    },
                    "AlgorithmSpecification": {
                        "TrainingInputMode": "File",
                        "TrainingImage": "fakeimage",
                    },
                    "InputDataConfig": [{
                        "DataSource": {
                            "S3DataSource": {
                                "S3DataType": "S3Prefix",
                                "S3Uri": data_source_uri_parameter,
                                "S3DataDistributionType": "FullyReplicated",
                            }
                        },
                        "ChannelName": "training",
                    }],
                    "DefinitionName":
                    "estimator-2",
                    "TuningObjective": {
                        "Type": "Minimize",
                        "MetricName": "val:loss"
                    },
                    "HyperParameterRanges": {
                        "ContinuousParameterRanges": [
                            {
                                "Name": "learning_rate",
                                "MinValue": "0.0001",
                                "MaxValue": "0.05",
                                "ScalingType": "Auto",
                            },
                            {
                                "Name": "momentum",
                                "MinValue": "0.0",
                                "MaxValue": "0.99",
                                "ScalingType": "Auto",
                            },
                            {
                                "Name": "weight_decay",
                                "MinValue": "0.0",
                                "MaxValue": "0.99",
                                "ScalingType": "Auto",
                            },
                        ],
                        "CategoricalParameterRanges": [],
                        "IntegerParameterRanges": [],
                    },
                },
            ],
        },
    }
コード例 #10
0
def test_tuning_lda(sagemaker_session, cpu_instance_type):
    with timeout(minutes=TUNING_DEFAULT_TIMEOUT_MINUTES):
        data_path = os.path.join(DATA_DIR, "lda")
        data_filename = "nips-train_1.pbr"

        with open(os.path.join(data_path, data_filename), "rb") as f:
            all_records = read_records(f)

        # all records must be same
        feature_num = int(
            all_records[0].features["values"].float32_tensor.shape[0])

        lda = LDA(
            role="SageMakerRole",
            instance_type=cpu_instance_type,
            num_topics=10,
            sagemaker_session=sagemaker_session,
        )

        record_set = prepare_record_set_from_local_files(
            data_path, lda.data_location, len(all_records), feature_num,
            sagemaker_session)
        test_record_set = prepare_record_set_from_local_files(
            data_path, lda.data_location, len(all_records), feature_num,
            sagemaker_session)
        test_record_set.channel = "test"

        # specify which hp you want to optimize over
        hyperparameter_ranges = {
            "alpha0": ContinuousParameter(1, 10),
            "num_topics": IntegerParameter(1, 2),
        }
        objective_metric_name = "test:pwll"

        tuner = HyperparameterTuner(
            estimator=lda,
            objective_metric_name=objective_metric_name,
            hyperparameter_ranges=hyperparameter_ranges,
            objective_type="Maximize",
            max_jobs=2,
            max_parallel_jobs=2,
            early_stopping_type="Auto",
        )

        tuning_job_name = unique_name_from_base("test-lda", max_length=32)
        print("Started hyperparameter tuning job with name:" + tuning_job_name)
        tuner.fit([record_set, test_record_set],
                  mini_batch_size=1,
                  job_name=tuning_job_name)

    attached_tuner = HyperparameterTuner.attach(
        tuning_job_name, sagemaker_session=sagemaker_session)
    assert attached_tuner.early_stopping_type == "Auto"
    assert attached_tuner.estimator.alpha0 == 1.0
    assert attached_tuner.estimator.num_topics == 1

    best_training_job = attached_tuner.best_training_job()

    with timeout_and_delete_endpoint_by_name(best_training_job,
                                             sagemaker_session):
        predictor = tuner.deploy(1, cpu_instance_type)
        predict_input = np.random.rand(1, feature_num)
        result = predictor.predict(predict_input)

        assert len(result) == 1
        for record in result:
            assert record.label["topic_mixture"] is not None
コード例 #11
0
def test_single_algo_tuning_step(sagemaker_session):
    data_source_uri_parameter = ParameterString(
        name="DataSourceS3Uri", default_value=f"s3://{BUCKET}/train_manifest")
    estimator = Estimator(
        image_uri=IMAGE_URI,
        role=ROLE,
        instance_count=1,
        instance_type="ml.c5.4xlarge",
        profiler_config=ProfilerConfig(system_monitor_interval_millis=500),
        rules=[],
        sagemaker_session=sagemaker_session,
    )
    estimator.set_hyperparameters(
        num_layers=18,
        image_shape="3,224,224",
        num_classes=257,
        num_training_samples=15420,
        mini_batch_size=128,
        epochs=10,
        optimizer="sgd",
        top_k="2",
        precision_dtype="float32",
        augmentation_type="crop",
    )

    hyperparameter_ranges = {
        "learning_rate": ContinuousParameter(0.0001, 0.05),
        "momentum": ContinuousParameter(0.0, 0.99),
        "weight_decay": ContinuousParameter(0.0, 0.99),
    }

    tuner = HyperparameterTuner(
        estimator=estimator,
        objective_metric_name="val:accuracy",
        hyperparameter_ranges=hyperparameter_ranges,
        objective_type="Maximize",
        max_jobs=5,
        max_parallel_jobs=2,
        early_stopping_type="OFF",
        strategy="Bayesian",
        warm_start_config=WarmStartConfig(
            warm_start_type=WarmStartTypes.IDENTICAL_DATA_AND_ALGORITHM,
            parents=set(["parent-hpo"]),
        ),
    )

    inputs = TrainingInput(s3_data=data_source_uri_parameter)

    tuning_step = TuningStep(
        name="MyTuningStep",
        tuner=tuner,
        inputs=inputs,
    )

    assert tuning_step.to_request() == {
        "Name": "MyTuningStep",
        "Type": "Tuning",
        "Arguments": {
            "HyperParameterTuningJobConfig": {
                "Strategy": "Bayesian",
                "ResourceLimits": {
                    "MaxNumberOfTrainingJobs": 5,
                    "MaxParallelTrainingJobs": 2
                },
                "TrainingJobEarlyStoppingType": "OFF",
                "HyperParameterTuningJobObjective": {
                    "Type": "Maximize",
                    "MetricName": "val:accuracy",
                },
                "ParameterRanges": {
                    "ContinuousParameterRanges": [
                        {
                            "Name": "learning_rate",
                            "MinValue": "0.0001",
                            "MaxValue": "0.05",
                            "ScalingType": "Auto",
                        },
                        {
                            "Name": "momentum",
                            "MinValue": "0.0",
                            "MaxValue": "0.99",
                            "ScalingType": "Auto",
                        },
                        {
                            "Name": "weight_decay",
                            "MinValue": "0.0",
                            "MaxValue": "0.99",
                            "ScalingType": "Auto",
                        },
                    ],
                    "CategoricalParameterRanges": [],
                    "IntegerParameterRanges": [],
                },
            },
            "TrainingJobDefinition": {
                "StaticHyperParameters": {
                    "num_layers": "18",
                    "image_shape": "3,224,224",
                    "num_classes": "257",
                    "num_training_samples": "15420",
                    "mini_batch_size": "128",
                    "epochs": "10",
                    "optimizer": "sgd",
                    "top_k": "2",
                    "precision_dtype": "float32",
                    "augmentation_type": "crop",
                },
                "RoleArn":
                "DummyRole",
                "OutputDataConfig": {
                    "S3OutputPath": "s3://my-bucket/"
                },
                "ResourceConfig": {
                    "InstanceCount": 1,
                    "InstanceType": "ml.c5.4xlarge",
                    "VolumeSizeInGB": 30,
                },
                "StoppingCondition": {
                    "MaxRuntimeInSeconds": 86400
                },
                "AlgorithmSpecification": {
                    "TrainingInputMode": "File",
                    "TrainingImage": "fakeimage",
                },
                "InputDataConfig": [{
                    "DataSource": {
                        "S3DataSource": {
                            "S3DataType": "S3Prefix",
                            "S3Uri": data_source_uri_parameter,
                            "S3DataDistributionType": "FullyReplicated",
                        }
                    },
                    "ChannelName": "training",
                }],
            },
            "WarmStartConfig": {
                "WarmStartType":
                "IdenticalDataAndAlgorithm",
                "ParentHyperParameterTuningJobs": [{
                    "HyperParameterTuningJobName":
                    "parent-hpo",
                }],
            },
        },
    }

    assert tuning_step.properties.HyperParameterTuningJobName.expr == {
        "Get": "Steps.MyTuningStep.HyperParameterTuningJobName"
    }
    assert tuning_step.properties.TrainingJobSummaries[
        0].TrainingJobName.expr == {
            "Get": "Steps.MyTuningStep.TrainingJobSummaries[0].TrainingJobName"
        }
    assert tuning_step.get_top_model_s3_uri(
        0, "my-bucket", "my-prefix"
    ).expr == {
        "Std:Join": {
            "On":
            "/",
            "Values": [
                "s3:/",
                "my-bucket",
                "my-prefix",
                {
                    "Get":
                    "Steps.MyTuningStep.TrainingJobSummaries[0].TrainingJobName"
                },
                "output/model.tar.gz",
            ],
        }
    }
コード例 #12
0
#Set up the job
from sagemaker.pytorch import PyTorch

estimator = PyTorch(entry_point="mnist.py",
                    role=role,
                    framework_version='1.4.0',
                    train_instance_count=1,
                    train_instance_type='ml.m4.xlarge',
                    hyperparameters={
                        'epochs': 6,
                        'backend': 'gloo'
                    })


hyperparameter_ranges = {'lr': ContinuousParameter(0.001, 0.1),'batch-size': CategoricalParameter([32,64,128,256,512])}


objective_metric_name = 'average test loss'
objective_type = 'Minimize'
metric_definitions = [{'Name': 'average test loss',
                       'Regex': 'Test set: Average loss: ([0-9\\.]+)'}]



tuner = HyperparameterTuner(estimator,
                            objective_metric_name,
                            hyperparameter_ranges,
                            metric_definitions,
                            max_jobs=9,
                            max_parallel_jobs=3,
コード例 #13
0
# ### Hyperparameter optimization
# Another part of the model tuning phase is to perform hyperparameter optimization. This section gives you an opportunity to tune your hyperparameters to see the extent to which tuning improves your model performance. Use the following template code to help you get started launching an Amazon SageMaker hyperparameter tuning job and viewing the evaluation metrics. Use the following questions to help guide you through the rest of the section.
# 
# ### Key questions to consider:
# 1. How does the outcome of your objective metric of choice change as timing of your tuning job increases? What's the relationship between the different objective metrics you're getting and time? 
# 2. What is the correlation between your objective metric and the individual hyperparameters? Is there a hyperparameter that has a strong correlation with your objective metric? If so, what might you do to leverage this strong correlation?
# 3. Analyze the performance of your model after hyperparameter tuning. Is current performance sufficient for what you need to solve your business problem?
# 
# #### <span style="color: blue;">Project presentation: Record key decisions and methods you use in this section in your project presentations, as well as any new performance metrics you obtain after evaluating your model again.</span>

# In[51]:


from sagemaker.tuner import IntegerParameter, CategoricalParameter, ContinuousParameter, HyperparameterTuner

hyperparameter_ranges = {'wd': ContinuousParameter(1e-5, 1),
                        'l1': ContinuousParameter(1e-5, 0.01),
                        'learning_rate': ContinuousParameter(1e-4, 0.1)
                        }

objective_metric_name = 'test:recall'

tuner = HyperparameterTuner(linear_estimator_smote,
                            objective_metric_name,
                            hyperparameter_ranges,
                            max_jobs=10,
                            max_parallel_jobs=3)

tuner.fit([train_records_smote, val_records_smote, test_records_smote], include_cls_metadata=False)

コード例 #14
0
def test_tuning_lda(sagemaker_session):
    with timeout(minutes=TUNING_DEFAULT_TIMEOUT_MINUTES):
        data_path = os.path.join(DATA_DIR, 'lda')
        data_filename = 'nips-train_1.pbr'

        with open(os.path.join(data_path, data_filename), 'rb') as f:
            all_records = read_records(f)

        # all records must be same
        feature_num = int(
            all_records[0].features['values'].float32_tensor.shape[0])

        lda = LDA(role='SageMakerRole',
                  train_instance_type='ml.c4.xlarge',
                  num_topics=10,
                  sagemaker_session=sagemaker_session)

        record_set = prepare_record_set_from_local_files(
            data_path, lda.data_location, len(all_records), feature_num,
            sagemaker_session)
        test_record_set = prepare_record_set_from_local_files(
            data_path, lda.data_location, len(all_records), feature_num,
            sagemaker_session)
        test_record_set.channel = 'test'

        # specify which hp you want to optimize over
        hyperparameter_ranges = {
            'alpha0': ContinuousParameter(1, 10),
            'num_topics': IntegerParameter(1, 2)
        }
        objective_metric_name = 'test:pwll'

        tuner = HyperparameterTuner(
            estimator=lda,
            objective_metric_name=objective_metric_name,
            hyperparameter_ranges=hyperparameter_ranges,
            objective_type='Maximize',
            max_jobs=2,
            max_parallel_jobs=2)

        tuning_job_name = unique_name_from_base('test-lda', max_length=32)
        tuner.fit([record_set, test_record_set],
                  mini_batch_size=1,
                  job_name=tuning_job_name)

        print('Started hyperparameter tuning job with name:' +
              tuner.latest_tuning_job.name)

        time.sleep(15)
        tuner.wait()

    best_training_job = tuner.best_training_job()
    with timeout_and_delete_endpoint_by_name(best_training_job,
                                             sagemaker_session):
        predictor = tuner.deploy(1, 'ml.c4.xlarge')
        predict_input = np.random.rand(1, feature_num)
        result = predictor.predict(predict_input)

        assert len(result) == 1
        for record in result:
            assert record.label['topic_mixture'] is not None
コード例 #15
0
ファイル: train.py プロジェクト: sbuttler/examples
print("Using inputs: ", inputs)

estimator = PyTorch(entry_point="cifar10.py",
                    source_dir=os.getcwd() + "/source",
                    role=role,
                    framework_version='1.0.0',
                    py_version='py3',
                    instance_count=1,
                    instance_type='ml.c5.xlarge',
                    hyperparameters={
                        'epochs': 50,
                        'momentum': 0.9
                    })

hyperparameter_ranges = {
    'lr': ContinuousParameter(0.0001, 0.001),
    'hidden_nodes': IntegerParameter(20, 100),
    'batch_size': CategoricalParameter([128, 256, 512]),
    'conv1_channels': CategoricalParameter([32, 64, 128]),
    'conv2_channels': CategoricalParameter([64, 128, 256, 512]),
}

objective_metric_name = 'average test accuracy'
objective_type = 'Maximize'
metric_definitions = [{
    'Name': 'average test accuracy',
    'Regex': 'Test Accuracy: ([0-9\\.]+)'
}]

tuner = HyperparameterTuner(estimator,
                            objective_metric_name,
コード例 #16
0
def test_tuning_chainer(sagemaker_session):
    with timeout(minutes=TUNING_DEFAULT_TIMEOUT_MINUTES):
        script_path = os.path.join(DATA_DIR, 'chainer_mnist', 'mnist.py')
        data_path = os.path.join(DATA_DIR, 'chainer_mnist')

        estimator = Chainer(entry_point=script_path,
                            role='SageMakerRole',
                            py_version=PYTHON_VERSION,
                            train_instance_count=1,
                            train_instance_type='ml.c4.xlarge',
                            sagemaker_session=sagemaker_session,
                            hyperparameters={'epochs': 1})

        train_input = estimator.sagemaker_session.upload_data(
            path=os.path.join(data_path, 'train'),
            key_prefix='integ-test-data/chainer_mnist/train')
        test_input = estimator.sagemaker_session.upload_data(
            path=os.path.join(data_path, 'test'),
            key_prefix='integ-test-data/chainer_mnist/test')

        hyperparameter_ranges = {'alpha': ContinuousParameter(0.001, 0.005)}

        objective_metric_name = 'Validation-accuracy'
        metric_definitions = [{
            'Name':
            'Validation-accuracy',
            'Regex':
            r'\[J1\s+\d\.\d+\s+\d\.\d+\s+\d\.\d+\s+(\d\.\d+)'
        }]

        tuner = HyperparameterTuner(estimator,
                                    objective_metric_name,
                                    hyperparameter_ranges,
                                    metric_definitions,
                                    max_jobs=2,
                                    max_parallel_jobs=2)

        tuning_job_name = unique_name_from_base('chainer', max_length=32)
        tuner.fit({
            'train': train_input,
            'test': test_input
        },
                  job_name=tuning_job_name)

        print('Started hyperparameter tuning job with name:' + tuning_job_name)

        time.sleep(15)
        tuner.wait()

    best_training_job = tuner.best_training_job()
    with timeout_and_delete_endpoint_by_name(best_training_job,
                                             sagemaker_session):
        predictor = tuner.deploy(1, 'ml.c4.xlarge')

        batch_size = 100
        data = np.zeros((batch_size, 784), dtype='float32')
        output = predictor.predict(data)
        assert len(output) == batch_size

        data = np.zeros((batch_size, 1, 28, 28), dtype='float32')
        output = predictor.predict(data)
        assert len(output) == batch_size

        data = np.zeros((batch_size, 28, 28), dtype='float32')
        output = predictor.predict(data)
        assert len(output) == batch_size
コード例 #17
0
ファイル: sagemaker_config.py プロジェクト: ryankarlos/AWS-ML
    "batch_transform_instance_type": 'ml.m4.xlarge'
}

# Parameters for pipeline execution
processing_instance_count = ParameterInteger(name="ProcessingInstanceCount",
                                             default_value=1)
processing_instance_type = ParameterString(name="ProcessingInstanceType",
                                           default_value="ml.m5.xlarge")
training_instance_type = ParameterString(name="TrainingInstanceType",
                                         default_value="ml.m5.xlarge")
# Parameters for pipeline execution
training_instance_count = ParameterInteger(name="TrainingInstanceCount",
                                           default_value=1)
model_approval_status = ParameterString(
    name="ModelApprovalStatus",
    default_value=
    "PendingManualApproval",  # ModelApprovalStatus can be set to a default of "Approved" if you don't want manual approval.
)

hp_config_abalone = {
    "xgboost": {
        "hyperparameter_ranges": {
            "max_depth": IntegerParameter(3, 10, scaling_type="Auto"),
            "eta": ContinuousParameter(0, 1, scaling_type="Auto"),
            "alpha": ContinuousParameter(0, 2, scaling_type="Auto"),
            "min_child_weight": ContinuousParameter(1, 10,
                                                    scaling_type="Auto"),
            "subsample": ContinuousParameter(0.2, 0.9, scaling_type="Auto"),
        }
    }
}