def test_continuous_parameter_ranges(): cont_param = ContinuousParameter(0.1, 1e-2) ranges = cont_param.as_tuning_range('some') assert len(ranges.keys()) == 3 assert ranges['Name'] == 'some' assert ranges['MinValue'] == '0.1' assert ranges['MaxValue'] == '0.01'
def get_xgb_tuner(output_path, model_name): xgb = _init_model(role, output_path, model_name) # Set core hyperparameters xgb.set_hyperparameters( eval_metric='rmse', objective= 'reg:linear', # plenty of options out there: https://github.com/dmlc/xgboost/blob/master/doc/parameter.rst#learning-task-parameters num_round=100, rate_drop=0.3, tweedie_variance_power=1.4) hyperparemeters_to_tune = { 'eta': ContinuousParameter(0, 1), 'min_child_weight': ContinuousParameter(1, 10), 'alpha': ContinuousParameter(0, 2), 'max_depth': IntegerParameter(1, 10) } tuner = HyperparameterTuner( xgb, 'validation:rmse', # objective metric hyperparemeters_to_tune, max_jobs=20, max_parallel_jobs=3, base_tuning_job_name=model_name + "-tuner", objective_type='Minimize') return tuner
def test_continuous_parameter_ranges(): cont_param = ContinuousParameter(0.1, 1e-2) ranges = cont_param.as_tuning_range('some') assert len(ranges.keys()) == 3 assert ranges['Name'] == 'some' assert ranges['MinValue'] == '0.1' assert ranges['MaxValue'] == '0.01'
def test_tuning(sagemaker_session, ecr_image, instance_type): mx = MXNet(entry_point=SCRIPT_PATH, role='SageMakerRole', train_instance_count=1, train_instance_type=instance_type, sagemaker_session=sagemaker_session, image_name=ecr_image, hyperparameters={'epochs': 1}) hyperparameter_ranges = {'learning-rate': ContinuousParameter(0.01, 0.2)} objective_metric_name = 'Validation-accuracy' metric_definitions = [ {'Name': 'Validation-accuracy', 'Regex': 'Validation-accuracy=([0-9\\.]+)'}] tuner = HyperparameterTuner(mx, objective_metric_name, hyperparameter_ranges, metric_definitions, max_jobs=2, max_parallel_jobs=2) with timeout(minutes=20): prefix = 'mxnet_mnist/{}'.format(utils.sagemaker_timestamp()) train_input = mx.sagemaker_session.upload_data(path=os.path.join(DATA_PATH, 'train'), key_prefix=prefix + '/train') test_input = mx.sagemaker_session.upload_data(path=os.path.join(DATA_PATH, 'test'), key_prefix=prefix + '/test') job_name = utils.unique_name_from_base('test-mxnet-image', max_length=32) tuner.fit({'train': train_input, 'test': test_input}, job_name=job_name) tuner.wait()
def test_tuning_mxnet(sagemaker_session): with timeout(minutes=15): script_path = os.path.join(DATA_DIR, 'mxnet_mnist', 'tuning.py') data_path = os.path.join(DATA_DIR, 'mxnet_mnist') estimator = MXNet(entry_point=script_path, role='SageMakerRole', train_instance_count=1, train_instance_type='ml.m4.xlarge', sagemaker_session=sagemaker_session, base_job_name='tune-mxnet') hyperparameter_ranges = {'learning_rate': ContinuousParameter(0.01, 0.2)} objective_metric_name = 'Validation-accuracy' metric_definitions = [{'Name': 'Validation-accuracy', 'Regex': 'Validation-accuracy=([0-9\\.]+)'}] tuner = HyperparameterTuner(estimator, objective_metric_name, hyperparameter_ranges, metric_definitions, max_jobs=4, max_parallel_jobs=2) train_input = estimator.sagemaker_session.upload_data(path=os.path.join(data_path, 'train'), key_prefix='integ-test-data/mxnet_mnist/train') test_input = estimator.sagemaker_session.upload_data(path=os.path.join(data_path, 'test'), key_prefix='integ-test-data/mxnet_mnist/test') tuner.fit({'train': train_input, 'test': test_input}) print('Started hyperparameter tuning job with name:' + tuner.latest_tuning_job.name) time.sleep(15) tuner.wait() best_training_job = tuner.best_training_job() with timeout_and_delete_endpoint_by_name(best_training_job, sagemaker_session): predictor = tuner.deploy(1, 'ml.c4.xlarge') data = np.zeros(shape=(1, 1, 28, 28)) predictor.predict(data)
def test_tuning_lda(sagemaker_session): with timeout(minutes=TUNING_DEFAULT_TIMEOUT_MINUTES): data_path = os.path.join(DATA_DIR, 'lda') data_filename = 'nips-train_1.pbr' with open(os.path.join(data_path, data_filename), 'rb') as f: all_records = read_records(f) # all records must be same feature_num = int( all_records[0].features['values'].float32_tensor.shape[0]) lda = LDA(role='SageMakerRole', train_instance_type='ml.c4.xlarge', num_topics=10, sagemaker_session=sagemaker_session, base_job_name='test-lda') record_set = prepare_record_set_from_local_files( data_path, lda.data_location, len(all_records), feature_num, sagemaker_session) test_record_set = prepare_record_set_from_local_files( data_path, lda.data_location, len(all_records), feature_num, sagemaker_session) test_record_set.channel = 'test' # specify which hp you want to optimize over hyperparameter_ranges = { 'alpha0': ContinuousParameter(1, 10), 'num_topics': IntegerParameter(1, 2) } objective_metric_name = 'test:pwll' tuner = HyperparameterTuner( estimator=lda, objective_metric_name=objective_metric_name, hyperparameter_ranges=hyperparameter_ranges, objective_type='Maximize', max_jobs=2, max_parallel_jobs=2) tuner.fit([record_set, test_record_set], mini_batch_size=1) print('Started hyperparameter tuning job with name:' + tuner.latest_tuning_job.name) time.sleep(15) tuner.wait() best_training_job = tuner.best_training_job() with timeout_and_delete_endpoint_by_name(best_training_job, sagemaker_session): predictor = tuner.deploy(1, 'ml.c4.xlarge') predict_input = np.random.rand(1, feature_num) result = predictor.predict(predict_input) assert len(result) == 1 for record in result: assert record.label['topic_mixture'] is not None
def test_tuning_mxnet( sagemaker_session, mxnet_training_latest_version, mxnet_training_latest_py_version, cpu_instance_type, ): with timeout(minutes=TUNING_DEFAULT_TIMEOUT_MINUTES): script_path = os.path.join(DATA_DIR, "mxnet_mnist", "mnist.py") data_path = os.path.join(DATA_DIR, "mxnet_mnist") estimator = MXNet( entry_point=script_path, role="SageMakerRole", py_version=mxnet_training_latest_py_version, instance_count=1, instance_type=cpu_instance_type, framework_version=mxnet_training_latest_version, sagemaker_session=sagemaker_session, ) hyperparameter_ranges = { "learning-rate": ContinuousParameter(0.01, 0.2) } objective_metric_name = "Validation-accuracy" metric_definitions = [{ "Name": "Validation-accuracy", "Regex": "Validation-accuracy=([0-9\\.]+)" }] tuner = HyperparameterTuner( estimator, objective_metric_name, hyperparameter_ranges, metric_definitions, max_jobs=4, max_parallel_jobs=2, ) train_input = estimator.sagemaker_session.upload_data( path=os.path.join(data_path, "train"), key_prefix="integ-test-data/mxnet_mnist/train") test_input = estimator.sagemaker_session.upload_data( path=os.path.join(data_path, "test"), key_prefix="integ-test-data/mxnet_mnist/test") tuning_job_name = unique_name_from_base("tune-mxnet", max_length=32) print("Started hyperparameter tuning job with name:" + tuning_job_name) tuner.fit({ "train": train_input, "test": test_input }, job_name=tuning_job_name) best_training_job = tuner.best_training_job() with timeout_and_delete_endpoint_by_name(best_training_job, sagemaker_session): predictor = tuner.deploy(1, cpu_instance_type) data = np.zeros(shape=(1, 1, 28, 28)) predictor.predict(data)
class LinearAwsXGBooost(AwsXGBoost): default_hyperparameters: Dict[str, Any] = { "max_depth": 5, "eta": 0.2, "gamma": 4, "min_child_weights": 6, "subsample": 0.8, "objective": "reg:linear", "early_stopping_rounds": 10, "num_round": 200, } default_hyperparameter_tuning: Dict[str, Any] = { "max_depth": IntegerParameter(3, 12), "eta": ContinuousParameter(0.05, 0.5), "gamma": ContinuousParameter(0, 10), "subsample": ContinuousParameter(0.5, 0.9), "num_round": IntegerParameter(50, 400), } name: str = "linear_xgboost"
def test_tuning_tf(sagemaker_session): with timeout(minutes=TUNING_DEFAULT_TIMEOUT_MINUTES): script_path = os.path.join(DATA_DIR, "iris", "iris-dnn-classifier.py") estimator = TensorFlow( entry_point=script_path, role="SageMakerRole", training_steps=1, evaluation_steps=1, hyperparameters={"input_tensor_name": "inputs"}, train_instance_count=1, train_instance_type="ml.c4.xlarge", sagemaker_session=sagemaker_session, ) inputs = sagemaker_session.upload_data( path=DATA_PATH, key_prefix="integ-test-data/tf_iris") hyperparameter_ranges = { "learning_rate": ContinuousParameter(0.05, 0.2) } objective_metric_name = "loss" metric_definitions = [{"Name": "loss", "Regex": "loss = ([0-9\\.]+)"}] tuner = HyperparameterTuner( estimator, objective_metric_name, hyperparameter_ranges, metric_definitions, objective_type="Minimize", max_jobs=2, max_parallel_jobs=2, ) tuning_job_name = unique_name_from_base("tune-tf", max_length=32) tuner.fit(inputs, job_name=tuning_job_name) print("Started hyperparameter tuning job with name:" + tuning_job_name) time.sleep(15) tuner.wait() best_training_job = tuner.best_training_job() with timeout_and_delete_endpoint_by_name(best_training_job, sagemaker_session): predictor = tuner.deploy(1, "ml.c4.xlarge") features = [6.4, 3.2, 4.5, 1.5] dict_result = predictor.predict({"inputs": features}) print("predict result: {}".format(dict_result)) list_result = predictor.predict(features) print("predict result: {}".format(list_result)) assert dict_result == list_result
def test_tuning_chainer(sagemaker_session): with timeout(minutes=TUNING_DEFAULT_TIMEOUT_MINUTES): script_path = os.path.join(DATA_DIR, 'chainer_mnist', 'mnist.py') data_path = os.path.join(DATA_DIR, 'chainer_mnist') estimator = Chainer(entry_point=script_path, role='SageMakerRole', py_version=PYTHON_VERSION, train_instance_count=1, train_instance_type='ml.c4.xlarge', sagemaker_session=sagemaker_session, hyperparameters={'epochs': 1}) train_input = estimator.sagemaker_session.upload_data(path=os.path.join(data_path, 'train'), key_prefix='integ-test-data/chainer_mnist/train') test_input = estimator.sagemaker_session.upload_data(path=os.path.join(data_path, 'test'), key_prefix='integ-test-data/chainer_mnist/test') hyperparameter_ranges = {'alpha': ContinuousParameter(0.001, 0.005)} objective_metric_name = 'Validation-accuracy' metric_definitions = [ {'Name': 'Validation-accuracy', 'Regex': r'\[J1\s+\d\.\d+\s+\d\.\d+\s+\d\.\d+\s+(\d\.\d+)'}] tuner = HyperparameterTuner(estimator, objective_metric_name, hyperparameter_ranges, metric_definitions, max_jobs=2, max_parallel_jobs=2) tuning_job_name = unique_name_from_base('chainer', max_length=32) tuner.fit({'train': train_input, 'test': test_input}, job_name=tuning_job_name) print('Started hyperparameter tuning job with name:' + tuning_job_name) time.sleep(15) tuner.wait() best_training_job = tuner.best_training_job() with timeout_and_delete_endpoint_by_name(best_training_job, sagemaker_session): predictor = tuner.deploy(1, 'ml.c4.xlarge') batch_size = 100 data = np.zeros((batch_size, 784), dtype='float32') output = predictor.predict(data) assert len(output) == batch_size data = np.zeros((batch_size, 1, 28, 28), dtype='float32') output = predictor.predict(data) assert len(output) == batch_size data = np.zeros((batch_size, 28, 28), dtype='float32') output = predictor.predict(data) assert len(output) == batch_size
def test_tuning_tf_vpc_multi(sagemaker_session): """Test Tensorflow multi-instance using the same VpcConfig for training and inference""" instance_type = "ml.c4.xlarge" instance_count = 2 script_path = os.path.join(DATA_DIR, "iris", "iris-dnn-classifier.py") ec2_client = sagemaker_session.boto_session.client("ec2") subnet_ids, security_group_id = vpc_test_utils.get_or_create_vpc_resources( ec2_client, sagemaker_session.boto_region_name) vpc_test_utils.setup_security_group_for_encryption(ec2_client, security_group_id) estimator = TensorFlow( entry_point=script_path, role="SageMakerRole", training_steps=1, evaluation_steps=1, hyperparameters={"input_tensor_name": "inputs"}, train_instance_count=instance_count, train_instance_type=instance_type, sagemaker_session=sagemaker_session, base_job_name="test-vpc-tf", subnets=subnet_ids, security_group_ids=[security_group_id], encrypt_inter_container_traffic=True, ) inputs = sagemaker_session.upload_data( path=DATA_PATH, key_prefix="integ-test-data/tf_iris") hyperparameter_ranges = {"learning_rate": ContinuousParameter(0.05, 0.2)} objective_metric_name = "loss" metric_definitions = [{"Name": "loss", "Regex": "loss = ([0-9\\.]+)"}] tuner = HyperparameterTuner( estimator, objective_metric_name, hyperparameter_ranges, metric_definitions, objective_type="Minimize", max_jobs=2, max_parallel_jobs=2, ) tuning_job_name = unique_name_from_base("tune-tf", max_length=32) with timeout(minutes=TUNING_DEFAULT_TIMEOUT_MINUTES): tuner.fit(inputs, job_name=tuning_job_name) print("Started hyperparameter tuning job with name:" + tuning_job_name) time.sleep(15) tuner.wait()
def test_tuning_tf(sagemaker_session): with timeout(minutes=TUNING_DEFAULT_TIMEOUT_MINUTES): script_path = os.path.join(DATA_DIR, 'iris', 'iris-dnn-classifier.py') estimator = TensorFlow(entry_point=script_path, role='SageMakerRole', training_steps=1, evaluation_steps=1, hyperparameters={'input_tensor_name': 'inputs'}, train_instance_count=1, train_instance_type='ml.c4.xlarge', sagemaker_session=sagemaker_session, base_job_name='tune-tf') inputs = sagemaker_session.upload_data( path=DATA_PATH, key_prefix='integ-test-data/tf_iris') hyperparameter_ranges = { 'learning_rate': ContinuousParameter(0.05, 0.2) } objective_metric_name = 'loss' metric_definitions = [{'Name': 'loss', 'Regex': 'loss = ([0-9\\.]+)'}] tuner = HyperparameterTuner(estimator, objective_metric_name, hyperparameter_ranges, metric_definitions, objective_type='Minimize', max_jobs=2, max_parallel_jobs=2) tuner.fit(inputs) print('Started hyperparameter tuning job with name:' + tuner.latest_tuning_job.name) time.sleep(15) tuner.wait() best_training_job = tuner.best_training_job() with timeout_and_delete_endpoint_by_name(best_training_job, sagemaker_session): predictor = tuner.deploy(1, 'ml.c4.xlarge') features = [6.4, 3.2, 4.5, 1.5] dict_result = predictor.predict({'inputs': features}) print('predict result: {}'.format(dict_result)) list_result = predictor.predict(features) print('predict result: {}'.format(list_result)) assert dict_result == list_result
def _get_continuous_parameter(self, data): if "min_value" not in data: raise DescriptorError( 'The "min_value" attribute of a continuous parameter is required' ) if "max_value" not in data: raise DescriptorError( 'The "max_value" attribute of a continuous parameter is required' ) return ContinuousParameter( min_value=data["min_value"], max_value=data["max_value"], scaling_type=data.get("scaling_type", "Auto"), )
def test_mxnet_tuning(sagemaker_session, mxnet_full_version): with timeout(minutes=15): script_path = os.path.join(DATA_DIR, 'mxnet_mnist', 'tuning.py') data_path = os.path.join(DATA_DIR, 'mxnet_mnist') estimator = MXNet(entry_point=script_path, role='SageMakerRole', framework_version=mxnet_full_version, train_instance_count=1, train_instance_type='ml.m4.xlarge', sagemaker_session=sagemaker_session, base_job_name='hpo') hyperparameter_ranges = { 'learning_rate': ContinuousParameter(0.01, 0.2) } objective_metric_name = 'Validation-accuracy' metric_definitions = [{ 'Name': 'Validation-accuracy', 'Regex': 'Validation-accuracy=([0-9\\.]+)' }] tuner = HyperparameterTuner(estimator, objective_metric_name, hyperparameter_ranges, metric_definitions, max_jobs=4, max_parallel_jobs=2) train_input = estimator.sagemaker_session.upload_data( path=os.path.join(data_path, 'train'), key_prefix='integ-test-data/mxnet_mnist/train') test_input = estimator.sagemaker_session.upload_data( path=os.path.join(data_path, 'test'), key_prefix='integ-test-data/mxnet_mnist/test') tuner.fit({'train': train_input, 'test': test_input}) print('tuning job successfully created: {}'.format( tuner.latest_tuning_job.name))
}, { 'Name': 'validation:accuracy', 'Regex': 'val_accuracy: ([0-9\\.]+)' }] sm_estimator = sagemaker_estimator(sagemaker_role, code_entry, code_dir, instance_type, instance_count, hyperparameters, metric_definitions) # sagemaker training job training_job_name = "tf-mnist-training-{}".format( strftime("%d-%H-%M-%S", gmtime())) sagemaker_training(sm_estimator, train_s3, training_job_name) # sagemaker tuning job hyperparameter_ranges = { 'epochs': IntegerParameter(50, 200), 'learning_rate': ContinuousParameter(0.0001, 0.1, scaling_type="Logarithmic"), 'batch_size': IntegerParameter(32, 256), 'drop_rate': ContinuousParameter(0.0, 1.0) } tuning_job_name = "tf-mnist-tuning-{}".format( strftime("%d-%H-%M-%S", gmtime())) max_jobs = 4 max_parallel_jobs = 2 #sagemaker_hyperparam_tuning(sm_estimator, train_s3, hyperparameter_ranges, metric_definitions, tuning_job_name, max_jobs, max_parallel_jobs)
"mini_batch_size": "200", "num_factors": "64", "predictor_type": "regressor", }, }, "inputs": { "train": "s3://{1}/prepare/train/train.protobuf", # replace }, } config["tune_model"] = { "tuner_config": { "objective_metric_name": "test:rmse", "objective_type": "Minimize", "hyperparameter_ranges": { "factors_lr": ContinuousParameter(0.0001, 0.2), "factors_init_sigma": ContinuousParameter(0.0001, 1), }, "max_jobs": 20, "max_parallel_jobs": 2, "base_tuning_job_name": "hpo-recommender", }, "inputs": { "train": "s3://{1}/prepare/train/train.protobuf", # replace "test": "s3://{1}/prepare/validate/validate.protobuf", # replace }, } config["batch_transform"] = { "transform_config": { "instance_count": 1,
key_prefix=prefix) print("Using inputs: ", inputs) estimator = PyTorch(entry_point="cifar10.py", source_dir=os.getcwd() + "/source", role=role, framework_version='1.0.0.dev', train_instance_count=1, train_instance_type='ml.c5.xlarge', hyperparameters={ 'epochs': 50, 'momentum': 0.9 }) hyperparameter_ranges = { 'lr': ContinuousParameter(0.0001, 0.001), 'hidden_nodes': IntegerParameter(20, 100), 'batch_size': CategoricalParameter([128, 256, 512]), 'conv1_channels': CategoricalParameter([32, 64, 128]), 'conv2_channels': CategoricalParameter([64, 128, 256, 512]), } objective_metric_name = 'average test accuracy' objective_type = 'Maximize' metric_definitions = [{ 'Name': 'average test accuracy', 'Regex': 'Test Accuracy: ([0-9\\.]+)' }] tuner = HyperparameterTuner(estimator, objective_metric_name,
train_data_location = 's3://sagemaker-sample-data-{}/mxnet/mnist/train'.format( region) test_data_location = 's3://sagemaker-sample-data-{}/mxnet/mnist/test'.format( region) estimator = MXNet(entry_point='mnist.py', role=role, train_instance_count=1, train_instance_type='ml.m4.xlarge', sagemaker_session=sagemaker.Session(), base_job_name='DEMO-hpo-mxnet', hyperparameters={'batch_size': 100}) hyperparameter_ranges = { 'optimizer': CategoricalParameter(['sgd', 'Adam']), 'learning_rate': ContinuousParameter(0.01, 0.2), 'num_epoch': IntegerParameter(10, 50) } objective_metric_name = 'Validation-accuracy' metric_definitions = [{ 'Name': 'Validation-accuracy', 'Regex': 'Validation-accuracy=([0-9\\.]+)' }] tuner = HyperparameterTuner(estimator, objective_metric_name, hyperparameter_ranges, metric_definitions, max_jobs=9, max_parallel_jobs=3)
def test_continuous_parameter(): cont_param = ContinuousParameter(0.1, 1e-2) assert isinstance(cont_param, _ParameterRange) assert cont_param.__name__ is 'Continuous'
def test_tuning_chainer(sagemaker_session, chainer_latest_version, chainer_latest_py_version, cpu_instance_type): with timeout(minutes=TUNING_DEFAULT_TIMEOUT_MINUTES): script_path = os.path.join(DATA_DIR, "chainer_mnist", "mnist.py") data_path = os.path.join(DATA_DIR, "chainer_mnist") estimator = Chainer( entry_point=script_path, role="SageMakerRole", framework_version=chainer_latest_version, py_version=chainer_latest_py_version, instance_count=1, instance_type=cpu_instance_type, sagemaker_session=sagemaker_session, hyperparameters={"epochs": 1}, ) train_input = estimator.sagemaker_session.upload_data( path=os.path.join(data_path, "train"), key_prefix="integ-test-data/chainer_mnist/train") test_input = estimator.sagemaker_session.upload_data( path=os.path.join(data_path, "test"), key_prefix="integ-test-data/chainer_mnist/test") hyperparameter_ranges = {"alpha": ContinuousParameter(0.001, 0.005)} objective_metric_name = "Validation-accuracy" metric_definitions = [{ "Name": "Validation-accuracy", "Regex": r"\[J1\s+\d\.\d+\s+\d\.\d+\s+\d\.\d+\s+(\d\.\d+)", }] tuner = HyperparameterTuner( estimator, objective_metric_name, hyperparameter_ranges, metric_definitions, max_jobs=2, max_parallel_jobs=2, ) tuning_job_name = unique_name_from_base("chainer", max_length=32) print("Started hyperparameter tuning job with name: {}".format( tuning_job_name)) tuner.fit({ "train": train_input, "test": test_input }, job_name=tuning_job_name) best_training_job = tuner.best_training_job() with timeout_and_delete_endpoint_by_name(best_training_job, sagemaker_session): predictor = tuner.deploy(1, cpu_instance_type) batch_size = 100 data = np.zeros((batch_size, 784), dtype="float32") output = predictor.predict(data) assert len(output) == batch_size data = np.zeros((batch_size, 1, 28, 28), dtype="float32") output = predictor.predict(data) assert len(output) == batch_size data = np.zeros((batch_size, 28, 28), dtype="float32") output = predictor.predict(data) assert len(output) == batch_size
REGION = 'us-west-2' BUCKET_NAME = 'Some-Bucket' ROLE = 'myrole' IMAGE_NAME = 'image' TRAIN_INSTANCE_COUNT = 1 TRAIN_INSTANCE_TYPE = 'ml.c4.xlarge' NUM_COMPONENTS = 5 SCRIPT_NAME = 'my_script.py' FRAMEWORK_VERSION = '1.0.0' INPUTS = 's3://mybucket/train' OBJECTIVE_METRIC_NAME = 'mock_metric' HYPERPARAMETER_RANGES = { 'validated': ContinuousParameter(0, 5), 'elizabeth': IntegerParameter(0, 5), 'blank': CategoricalParameter([0, 5]) } METRIC_DEFINTIONS = 'mock_metric_definitions' TUNING_JOB_DETAILS = { 'HyperParameterTuningJobConfig': { 'ResourceLimits': { 'MaxParallelTrainingJobs': 1, 'MaxNumberOfTrainingJobs': 1 }, 'HyperParameterTuningJobObjective': { 'MetricName': OBJECTIVE_METRIC_NAME, 'Type': 'Minimize' },
def test_multi_algo_tuning_step(sagemaker_session): data_source_uri_parameter = ParameterString( name="DataSourceS3Uri", default_value=f"s3://{BUCKET}/train_manifest") instance_count = ParameterInteger(name="InstanceCount", default_value=1) estimator = Estimator( image_uri=IMAGE_URI, role=ROLE, instance_count=instance_count, instance_type="ml.c5.4xlarge", profiler_config=ProfilerConfig(system_monitor_interval_millis=500), rules=[], sagemaker_session=sagemaker_session, max_retry_attempts=10, ) estimator.set_hyperparameters( num_layers=18, image_shape="3,224,224", num_classes=257, num_training_samples=15420, mini_batch_size=128, epochs=10, optimizer="sgd", top_k="2", precision_dtype="float32", augmentation_type="crop", ) initial_lr_param = ParameterString(name="InitialLR", default_value="0.0001") hyperparameter_ranges = { "learning_rate": ContinuousParameter(initial_lr_param, 0.05), "momentum": ContinuousParameter(0.0, 0.99), "weight_decay": ContinuousParameter(0.0, 0.99), } tuner = HyperparameterTuner.create( estimator_dict={ "estimator-1": estimator, "estimator-2": estimator, }, objective_type="Minimize", objective_metric_name_dict={ "estimator-1": "val:loss", "estimator-2": "val:loss", }, hyperparameter_ranges_dict={ "estimator-1": hyperparameter_ranges, "estimator-2": hyperparameter_ranges, }, ) inputs = TrainingInput(s3_data=data_source_uri_parameter) tuning_step = TuningStep( name="MyTuningStep", tuner=tuner, inputs={ "estimator-1": inputs, "estimator-2": inputs, }, ) assert tuning_step.to_request() == { "Name": "MyTuningStep", "Type": "Tuning", "Arguments": { "HyperParameterTuningJobConfig": { "Strategy": "Bayesian", "ResourceLimits": { "MaxNumberOfTrainingJobs": 1, "MaxParallelTrainingJobs": 1 }, "TrainingJobEarlyStoppingType": "Off", }, "TrainingJobDefinitions": [ { "StaticHyperParameters": { "num_layers": "18", "image_shape": "3,224,224", "num_classes": "257", "num_training_samples": "15420", "mini_batch_size": "128", "epochs": "10", "optimizer": "sgd", "top_k": "2", "precision_dtype": "float32", "augmentation_type": "crop", }, "RoleArn": "DummyRole", "OutputDataConfig": { "S3OutputPath": "s3://my-bucket/" }, "ResourceConfig": { "InstanceCount": 1, "InstanceType": "ml.c5.4xlarge", "VolumeSizeInGB": 30, }, "StoppingCondition": { "MaxRuntimeInSeconds": 86400 }, "AlgorithmSpecification": { "TrainingInputMode": "File", "TrainingImage": "fakeimage", }, "InputDataConfig": [{ "DataSource": { "S3DataSource": { "S3DataType": "S3Prefix", "S3Uri": data_source_uri_parameter, "S3DataDistributionType": "FullyReplicated", } }, "ChannelName": "training", }], "DefinitionName": "estimator-1", "TuningObjective": { "Type": "Minimize", "MetricName": "val:loss" }, "HyperParameterRanges": { "ContinuousParameterRanges": [ { "Name": "learning_rate", "MinValue": initial_lr_param, "MaxValue": "0.05", "ScalingType": "Auto", }, { "Name": "momentum", "MinValue": "0.0", "MaxValue": "0.99", "ScalingType": "Auto", }, { "Name": "weight_decay", "MinValue": "0.0", "MaxValue": "0.99", "ScalingType": "Auto", }, ], "CategoricalParameterRanges": [], "IntegerParameterRanges": [], }, "RetryStrategy": { "MaximumRetryAttempts": 10, }, }, { "StaticHyperParameters": { "num_layers": "18", "image_shape": "3,224,224", "num_classes": "257", "num_training_samples": "15420", "mini_batch_size": "128", "epochs": "10", "optimizer": "sgd", "top_k": "2", "precision_dtype": "float32", "augmentation_type": "crop", }, "RoleArn": "DummyRole", "OutputDataConfig": { "S3OutputPath": "s3://my-bucket/" }, "ResourceConfig": { "InstanceCount": 1, "InstanceType": "ml.c5.4xlarge", "VolumeSizeInGB": 30, }, "StoppingCondition": { "MaxRuntimeInSeconds": 86400 }, "AlgorithmSpecification": { "TrainingInputMode": "File", "TrainingImage": "fakeimage", }, "InputDataConfig": [{ "DataSource": { "S3DataSource": { "S3DataType": "S3Prefix", "S3Uri": data_source_uri_parameter, "S3DataDistributionType": "FullyReplicated", } }, "ChannelName": "training", }], "DefinitionName": "estimator-2", "TuningObjective": { "Type": "Minimize", "MetricName": "val:loss" }, "HyperParameterRanges": { "ContinuousParameterRanges": [ { "Name": "learning_rate", "MinValue": initial_lr_param, "MaxValue": "0.05", "ScalingType": "Auto", }, { "Name": "momentum", "MinValue": "0.0", "MaxValue": "0.99", "ScalingType": "Auto", }, { "Name": "weight_decay", "MinValue": "0.0", "MaxValue": "0.99", "ScalingType": "Auto", }, ], "CategoricalParameterRanges": [], "IntegerParameterRanges": [], }, "RetryStrategy": { "MaximumRetryAttempts": 10, }, }, ], }, }
from sagemaker.tuner import IntegerParameter, ContinuousParameter, HyperparameterTuner # TODO: Create the hyperparameter tuner object xgb_hyperparameter_tuner = HyperparameterTuner( estimator= xgb, # The estimator object to use as the basis for the training jobs. objective_metric_name= 'validation:rmse', # The metric used to compare trained models. objective_type= 'Minimize', # Whether we wish to minimize or maximize the metric. max_jobs=10, # The total number of models to train max_parallel_jobs=3, # The number of models to train in parallel hyperparameter_ranges={ 'max_depth': IntegerParameter(3, 12), 'eta': ContinuousParameter(0.05, 0.5), 'min_child_weight': IntegerParameter(2, 8), 'subsample': ContinuousParameter(0.5, 0.9), 'gamma': ContinuousParameter(0, 10), }) """ QUIZ PART 3 """ # ### (TODO) Testing the model # # Now that we've run our hyperparameter tuning job, it's time to see how well the # best performing model actually performs. To do this we will use SageMaker's # Batch Transform functionality. Batch Transform is a convenient way to perform # inference on a large dataset in a way that is not realtime. That is, we don't # necessarily need to use our model's results immediately and instead we can
def tuning(self): s3_bucket, id, secret = s3_aws_engine(name=self.aws_env) s3_path = ModelTune._aws_s3_path(s3_bucket) boto_sess = ModelTune._boto_session(id, secret) logger.info('Getting algorithm image URI...') container = get_image_uri(boto_sess.region_name, 'xgboost', repo_version='0.90-1') logger.info('Creating sagemaker session...') sage_sess = sagemaker.Session(boto_sess) s3_input_train, s3_input_val = self.fetch_data(s3_path) logger.info( 'Creating sagemaker estimator to train using the supplied {} model...' .format(self.model_name)) if self.model_name == 'clf': train_instance_type = 'ml.m5.4xlarge' else: train_instance_type = 'ml.m5.2xlarge' est = Estimator(container, role=self.role, train_instance_count=1, train_instance_type=train_instance_type, output_path=s3_path + 'tuning_' + self.model_name + '/', sagemaker_session=sage_sess, base_job_name=self.model_name + '-tuning-job') logger.info('Setting hyper-parameters...') hyperparameter_ranges = { 'num_round': IntegerParameter(1, 4000), 'eta': ContinuousParameter(0, 0.5), 'max_depth': IntegerParameter(1, 10), 'min_child_weight': ContinuousParameter(0, 120), 'subsample': ContinuousParameter(0.5, 1), 'colsample_bytree': ContinuousParameter(0.5, 1), 'gamma': ContinuousParameter(0, 5), 'lambda': ContinuousParameter(0, 1000), 'alpha': ContinuousParameter(0, 1000) } if self.model_name == 'clf': est.set_hyperparameters( objective='reg:logistic', scale_pos_weight=self._get_imb_ratio()['imb_ratio']) objective_metric_name = 'validation:f1' objective_type = 'Maximize' else: est.set_hyperparameters(objective='reg:linear') objective_metric_name = 'validation:rmse' objective_type = 'Minimize' if est.hyperparam_dict is None: raise ValueError('Hyper-parameters are missing') else: logger.info(est.hyperparam_dict) tuner = HyperparameterTuner( estimator=est, objective_metric_name=objective_metric_name, hyperparameter_ranges=hyperparameter_ranges, objective_type=objective_type, max_jobs=100, max_parallel_jobs=10) sw = Stopwatch(start=True) tuner.fit({'train': s3_input_train, 'validation': s3_input_val}) self.post_tune(sage_sess, tuner) logger.info('Elapsed time of tuning: {}'.format( sw.elapsed.human_str()))
def test_tuning_lda(sagemaker_session, cpu_instance_type): with timeout(minutes=TUNING_DEFAULT_TIMEOUT_MINUTES): data_path = os.path.join(DATA_DIR, "lda") data_filename = "nips-train_1.pbr" with open(os.path.join(data_path, data_filename), "rb") as f: all_records = read_records(f) # all records must be same feature_num = int( all_records[0].features["values"].float32_tensor.shape[0]) lda = LDA( role="SageMakerRole", instance_type=cpu_instance_type, num_topics=10, sagemaker_session=sagemaker_session, ) record_set = prepare_record_set_from_local_files( data_path, lda.data_location, len(all_records), feature_num, sagemaker_session) test_record_set = prepare_record_set_from_local_files( data_path, lda.data_location, len(all_records), feature_num, sagemaker_session) test_record_set.channel = "test" # specify which hp you want to optimize over hyperparameter_ranges = { "alpha0": ContinuousParameter(1, 10), "num_topics": IntegerParameter(1, 2), } objective_metric_name = "test:pwll" tuner = HyperparameterTuner( estimator=lda, objective_metric_name=objective_metric_name, hyperparameter_ranges=hyperparameter_ranges, objective_type="Maximize", max_jobs=2, max_parallel_jobs=2, early_stopping_type="Auto", ) tuning_job_name = unique_name_from_base("test-lda", max_length=32) print("Started hyperparameter tuning job with name:" + tuning_job_name) tuner.fit([record_set, test_record_set], mini_batch_size=1, job_name=tuning_job_name) attached_tuner = HyperparameterTuner.attach( tuning_job_name, sagemaker_session=sagemaker_session) assert attached_tuner.early_stopping_type == "Auto" assert attached_tuner.estimator.alpha0 == 1.0 assert attached_tuner.estimator.num_topics == 1 best_training_job = attached_tuner.best_training_job() with timeout_and_delete_endpoint_by_name(best_training_job, sagemaker_session): predictor = tuner.deploy(1, cpu_instance_type) predict_input = np.random.rand(1, feature_num) result = predictor.predict(predict_input) assert len(result) == 1 for record in result: assert record.label["topic_mixture"] is not None
#Set up the job from sagemaker.pytorch import PyTorch estimator = PyTorch(entry_point="mnist.py", role=role, framework_version='1.4.0', train_instance_count=1, train_instance_type='ml.m4.xlarge', hyperparameters={ 'epochs': 6, 'backend': 'gloo' }) hyperparameter_ranges = {'lr': ContinuousParameter(0.001, 0.1),'batch-size': CategoricalParameter([32,64,128,256,512])} objective_metric_name = 'average test loss' objective_type = 'Minimize' metric_definitions = [{'Name': 'average test loss', 'Regex': 'Test set: Average loss: ([0-9\\.]+)'}] tuner = HyperparameterTuner(estimator, objective_metric_name, hyperparameter_ranges, metric_definitions, max_jobs=9, max_parallel_jobs=3,
best_model = fm.model_data else: fm.set_hyperparameters(feature_dim=nbFeatures, predictor_type='binary_classifier', mini_batch_size=1000, num_factors=64, epochs=100) my_tuner = HyperparameterTuner( estimator=fm, objective_metric_name='test:binary_classification_accuracy', hyperparameter_ranges={ 'epochs': IntegerParameter(1, 200), 'mini_batch_size': IntegerParameter(10, 10000), 'factors_wd': ContinuousParameter(1e-8, 512) }, max_jobs=4, max_parallel_jobs=4) my_tuner.fit({ 'train': train_data, 'test': test_data }, include_cls_metadata=False) my_tuner.wait() #sm_session = sagemaker.Session() #best_log = sm_session.logs_for_job(my_tuner.best_training_job()) #print(best_log)
JOB_NAME = 'tuning_job' REGION = 'us-west-2' BUCKET_NAME = 'Some-Bucket' ROLE = 'myrole' IMAGE_NAME = 'image' TRAIN_INSTANCE_COUNT = 1 TRAIN_INSTANCE_TYPE = 'ml.c4.xlarge' NUM_COMPONENTS = 5 SCRIPT_NAME = 'my_script.py' FRAMEWORK_VERSION = '1.0.0' INPUTS = 's3://mybucket/train' OBJECTIVE_METRIC_NAME = 'mock_metric' HYPERPARAMETER_RANGES = {'validated': ContinuousParameter(0, 5), 'elizabeth': IntegerParameter(0, 5), 'blank': CategoricalParameter([0, 5])} METRIC_DEFINTIONS = 'mock_metric_definitions' TUNING_JOB_DETAILS = { 'HyperParameterTuningJobConfig': { 'ResourceLimits': { 'MaxParallelTrainingJobs': 1, 'MaxNumberOfTrainingJobs': 1 }, 'HyperParameterTuningJobObjective': { 'MetricName': OBJECTIVE_METRIC_NAME, 'Type': 'Minimize' }, 'Strategy': 'Bayesian',
EXECUTION_ROLE = "SageMakerRole" STRATEGY = "Bayesian" OBJECTIVE_TYPE = "Minimize" TAGS = [{"Key": "pysdk-test", "Value": "multi-algo-tuner"}] ESTIMATOR_FM = "fm-one" ESTIMATOR_KNN = "knn-two" # TODO: change to use one of the new standard metrics for 1P algorithm OBJECTIVE_METRIC_NAME_FM = "test:rmse" OBJECTIVE_METRIC_NAME_KNN = "test:mse" HYPER_PARAMETER_RANGES_FM = { "factors_wd": ContinuousParameter(1, 30), "factors_lr": ContinuousParameter(40, 50), } HYPER_PARAMETER_RANGES_KNN = { "k": IntegerParameter(3, 400), "sample_size": IntegerParameter(40, 550), } MAX_JOBS = 2 MAX_PARALLEL_JOBS = 2 @pytest.fixture(scope="module") def data_set(): return datasets.one_p_mnist()
def test_single_algo_tuning_step(sagemaker_session): data_source_uri_parameter = ParameterString( name="DataSourceS3Uri", default_value=f"s3://{BUCKET}/train_manifest") estimator = Estimator( image_uri=IMAGE_URI, role=ROLE, instance_count=1, instance_type="ml.c5.4xlarge", profiler_config=ProfilerConfig(system_monitor_interval_millis=500), rules=[], sagemaker_session=sagemaker_session, ) estimator.set_hyperparameters( num_layers=18, image_shape="3,224,224", num_classes=257, num_training_samples=15420, mini_batch_size=128, epochs=10, optimizer="sgd", top_k="2", precision_dtype="float32", augmentation_type="crop", ) hyperparameter_ranges = { "learning_rate": ContinuousParameter(0.0001, 0.05), "momentum": ContinuousParameter(0.0, 0.99), "weight_decay": ContinuousParameter(0.0, 0.99), } tuner = HyperparameterTuner( estimator=estimator, objective_metric_name="val:accuracy", hyperparameter_ranges=hyperparameter_ranges, objective_type="Maximize", max_jobs=5, max_parallel_jobs=2, early_stopping_type="OFF", strategy="Bayesian", warm_start_config=WarmStartConfig( warm_start_type=WarmStartTypes.IDENTICAL_DATA_AND_ALGORITHM, parents=set(["parent-hpo"]), ), ) inputs = TrainingInput(s3_data=data_source_uri_parameter) tuning_step = TuningStep( name="MyTuningStep", tuner=tuner, inputs=inputs, ) assert tuning_step.to_request() == { "Name": "MyTuningStep", "Type": "Tuning", "Arguments": { "HyperParameterTuningJobConfig": { "Strategy": "Bayesian", "ResourceLimits": { "MaxNumberOfTrainingJobs": 5, "MaxParallelTrainingJobs": 2 }, "TrainingJobEarlyStoppingType": "OFF", "HyperParameterTuningJobObjective": { "Type": "Maximize", "MetricName": "val:accuracy", }, "ParameterRanges": { "ContinuousParameterRanges": [ { "Name": "learning_rate", "MinValue": "0.0001", "MaxValue": "0.05", "ScalingType": "Auto", }, { "Name": "momentum", "MinValue": "0.0", "MaxValue": "0.99", "ScalingType": "Auto", }, { "Name": "weight_decay", "MinValue": "0.0", "MaxValue": "0.99", "ScalingType": "Auto", }, ], "CategoricalParameterRanges": [], "IntegerParameterRanges": [], }, }, "TrainingJobDefinition": { "StaticHyperParameters": { "num_layers": "18", "image_shape": "3,224,224", "num_classes": "257", "num_training_samples": "15420", "mini_batch_size": "128", "epochs": "10", "optimizer": "sgd", "top_k": "2", "precision_dtype": "float32", "augmentation_type": "crop", }, "RoleArn": "DummyRole", "OutputDataConfig": { "S3OutputPath": "s3://my-bucket/" }, "ResourceConfig": { "InstanceCount": 1, "InstanceType": "ml.c5.4xlarge", "VolumeSizeInGB": 30, }, "StoppingCondition": { "MaxRuntimeInSeconds": 86400 }, "AlgorithmSpecification": { "TrainingInputMode": "File", "TrainingImage": "fakeimage", }, "InputDataConfig": [{ "DataSource": { "S3DataSource": { "S3DataType": "S3Prefix", "S3Uri": data_source_uri_parameter, "S3DataDistributionType": "FullyReplicated", } }, "ChannelName": "training", }], }, "WarmStartConfig": { "WarmStartType": "IdenticalDataAndAlgorithm", "ParentHyperParameterTuningJobs": [{ "HyperParameterTuningJobName": "parent-hpo", }], }, }, } assert tuning_step.properties.HyperParameterTuningJobName.expr == { "Get": "Steps.MyTuningStep.HyperParameterTuningJobName" } assert tuning_step.properties.TrainingJobSummaries[ 0].TrainingJobName.expr == { "Get": "Steps.MyTuningStep.TrainingJobSummaries[0].TrainingJobName" } assert tuning_step.get_top_model_s3_uri( 0, "my-bucket", "my-prefix" ).expr == { "Std:Join": { "On": "/", "Values": [ "s3:/", "my-bucket", "my-prefix", { "Get": "Steps.MyTuningStep.TrainingJobSummaries[0].TrainingJobName" }, "output/model.tar.gz", ], } }
# clean_bucket_prefix(BUCKET, 'vanilla_lstm/checkpoints') hyper_parameters = {'learning_rate': 0.01, 'batch_size': 20, 'n_inputs': 3, 'n_outputs': 1, 'n_units': 3, 'time_series_name': 'passengers'} tf_estimator = TensorFlow(entry_point='sagemaker_estimator_adapter.py', source_dir=source_dir, base_job_name='vanilla-lstm-estimator', role=role, training_steps=1000, evaluation_steps=1, hyperparameters=hyper_parameters, train_instance_count=1, train_instance_type='ml.m5.large', framework_version='1.11.0', py_version='py2', checkpoint_path=('%s/vanilla_lstm/checkpoints' % BUCKET)) hyperparameter_ranges = {'learning_rate': ContinuousParameter(0.008, 0.2), 'n_inputs': IntegerParameter(3, 10), 'n_units': IntegerParameter(3, 10)} objective_metric_name = 'mae' objective_type = 'Minimize' metric_definitions = [{'Name': 'mae', 'Regex': 'mae = ([0-9\\.]+)'}] tuner = HyperparameterTuner(tf_estimator, objective_metric_name, hyperparameter_ranges, metric_definitions, max_jobs=40, max_parallel_jobs=3,