def test_integer_parameter_ranges():
    int_param = IntegerParameter(1, 2)
    ranges = int_param.as_tuning_range('some')
    assert len(ranges.keys()) == 3
    assert ranges['Name'] == 'some'
    assert ranges['MinValue'] == '1'
    assert ranges['MaxValue'] == '2'
def test_tuning_tf(
    sagemaker_session,
    cpu_instance_type,
    tensorflow_training_latest_version,
    tensorflow_training_latest_py_version,
):
    resource_path = os.path.join(DATA_DIR, "tensorflow_mnist")
    script_path = os.path.join(resource_path, "mnist.py")

    estimator = TensorFlow(
        entry_point=script_path,
        role="SageMakerRole",
        instance_count=1,
        instance_type=cpu_instance_type,
        sagemaker_session=sagemaker_session,
        framework_version=tensorflow_training_latest_version,
        py_version=tensorflow_training_latest_py_version,
    )

    hyperparameter_ranges = {"epochs": IntegerParameter(1, 2)}
    objective_metric_name = "accuracy"
    metric_definitions = [{
        "Name": objective_metric_name,
        "Regex": "accuracy = ([0-9\\.]+)"
    }]

    tuner = HyperparameterTuner(
        estimator,
        objective_metric_name,
        hyperparameter_ranges,
        metric_definitions,
        max_jobs=2,
        max_parallel_jobs=2,
    )

    with timeout(minutes=TUNING_DEFAULT_TIMEOUT_MINUTES):
        inputs = estimator.sagemaker_session.upload_data(
            path=os.path.join(resource_path, "data"),
            key_prefix="scriptmode/mnist")

        tuning_job_name = unique_name_from_base("tune-tf", max_length=32)
        print("Started hyperparameter tuning job with name: " +
              tuning_job_name)
        tuner.fit(inputs, job_name=tuning_job_name)
def test_tuning_tf_script_mode(sagemaker_session):
    resource_path = os.path.join(DATA_DIR, 'tensorflow_mnist')
    script_path = os.path.join(resource_path, 'mnist.py')

    estimator = TensorFlow(entry_point=script_path,
                           role='SageMakerRole',
                           train_instance_count=1,
                           train_instance_type='ml.m4.xlarge',
                           script_mode=True,
                           sagemaker_session=sagemaker_session,
                           py_version=PYTHON_VERSION,
                           framework_version=TensorFlow.LATEST_VERSION)

    hyperparameter_ranges = {'epochs': IntegerParameter(1, 2)}
    objective_metric_name = 'accuracy'
    metric_definitions = [{
        'Name': objective_metric_name,
        'Regex': 'accuracy = ([0-9\\.]+)'
    }]

    tuner = HyperparameterTuner(estimator,
                                objective_metric_name,
                                hyperparameter_ranges,
                                metric_definitions,
                                max_jobs=2,
                                max_parallel_jobs=2)

    with timeout(minutes=TUNING_DEFAULT_TIMEOUT_MINUTES):
        inputs = estimator.sagemaker_session.upload_data(
            path=os.path.join(resource_path, 'data'),
            key_prefix='scriptmode/mnist')

        tuning_job_name = unique_name_from_base('tune-tf-script-mode',
                                                max_length=32)
        tuner.fit(inputs, job_name=tuning_job_name)

        print('Started hyperparameter tuning job with name: ' +
              tuning_job_name)

        time.sleep(15)
        tuner.wait()
def test_attach_tuning_pytorch(sagemaker_session):
    mnist_dir = os.path.join(DATA_DIR, 'pytorch_mnist')
    mnist_script = os.path.join(mnist_dir, 'mnist.py')

    estimator = PyTorch(entry_point=mnist_script, role='SageMakerRole',
                        train_instance_count=1, py_version=PYTHON_VERSION,
                        train_instance_type='ml.c4.xlarge', sagemaker_session=sagemaker_session)

    with timeout(minutes=TUNING_DEFAULT_TIMEOUT_MINUTES):
        objective_metric_name = 'evaluation-accuracy'
        metric_definitions = [{'Name': 'evaluation-accuracy', 'Regex': r'Overall test accuracy: (\d+)'}]
        hyperparameter_ranges = {'batch-size': IntegerParameter(50, 100)}

        tuner = HyperparameterTuner(estimator, objective_metric_name, hyperparameter_ranges, metric_definitions,
                                    max_jobs=2, max_parallel_jobs=2)

        training_data = estimator.sagemaker_session.upload_data(path=os.path.join(mnist_dir, 'training'),
                                                                key_prefix='integ-test-data/pytorch_mnist/training')
        tuner.fit({'training': training_data})

        tuning_job_name = tuner.latest_tuning_job.name

        print('Started hyperparameter tuning job with name:' + tuning_job_name)

        time.sleep(15)
        tuner.wait()

    attached_tuner = HyperparameterTuner.attach(tuning_job_name, sagemaker_session=sagemaker_session)
    best_training_job = tuner.best_training_job()
    with timeout_and_delete_endpoint_by_name(best_training_job, sagemaker_session):
        predictor = attached_tuner.deploy(1, 'ml.c4.xlarge')
        data = np.zeros(shape=(1, 1, 28, 28), dtype=np.float32)
        predictor.predict(data)

        batch_size = 100
        data = np.random.rand(batch_size, 1, 28, 28).astype(np.float32)
        output = predictor.predict(data)

        assert output.shape == (batch_size, 10)
def test_tuning(sagemaker_session, ecr_image, instance_type,
                framework_version):
    resource_path = os.path.join(os.path.dirname(__file__), '..', '..',
                                 'resources')
    script = os.path.join(resource_path, 'mnist', 'mnist.py')

    estimator = TensorFlow(entry_point=script,
                           role='SageMakerRole',
                           train_instance_type=instance_type,
                           train_instance_count=1,
                           sagemaker_session=sagemaker_session,
                           image_name=ecr_image,
                           framework_version=framework_version,
                           script_mode=True)

    hyperparameter_ranges = {'epochs': IntegerParameter(1, 2)}
    objective_metric_name = 'accuracy'
    metric_definitions = [{
        'Name': objective_metric_name,
        'Regex': 'accuracy = ([0-9\\.]+)'
    }]

    tuner = HyperparameterTuner(estimator,
                                objective_metric_name,
                                hyperparameter_ranges,
                                metric_definitions,
                                max_jobs=2,
                                max_parallel_jobs=2)

    with timeout(minutes=20):
        inputs = estimator.sagemaker_session.upload_data(
            path=os.path.join(resource_path, 'mnist', 'data'),
            key_prefix='scriptmode/mnist')

        tuning_job_name = unique_name_from_base('test-tf-sm-tuning',
                                                max_length=32)
        tuner.fit(inputs, job_name=tuning_job_name)
        tuner.wait()
def test_model_dir_with_training_job_name(sagemaker_session, ecr_image, instance_type, framework_version):
    resource_path = os.path.join(os.path.dirname(__file__), '../..', 'resources')
    script = os.path.join(resource_path, 'tuning_model_dir', 'entry.py')

    estimator = TensorFlow(entry_point=script,
                           role='SageMakerRole',
                           train_instance_type=instance_type,
                           train_instance_count=1,
                           image_name=ecr_image,
                           framework_version=framework_version,
                           py_version='py3',
                           sagemaker_session=sagemaker_session)

    tuner = HyperparameterTuner(estimator=estimator,
                                objective_metric_name='accuracy',
                                hyperparameter_ranges={'arbitrary_value': IntegerParameter(0, 1)},
                                metric_definitions=[{'Name': 'accuracy', 'Regex': 'accuracy=([01])'}],
                                max_jobs=1,
                                max_parallel_jobs=1)

    # User script has logic to check for the correct model_dir
    tuner.fit(job_name=unique_name_from_base('test-tf-model-dir', max_length=32))
    tuner.wait()
def test_marketplace_tuning_job(sagemaker_session, cpu_instance_type):
    data_path = os.path.join(DATA_DIR, "marketplace", "training")
    region = sagemaker_session.boto_region_name
    account = REGION_ACCOUNT_MAP[region]
    algorithm_arn = ALGORITHM_ARN.format(
        partition=_aws_partition(region), region=region, account=account
    )

    mktplace = AlgorithmEstimator(
        algorithm_arn=algorithm_arn,
        role="SageMakerRole",
        instance_count=1,
        instance_type=cpu_instance_type,
        sagemaker_session=sagemaker_session,
        base_job_name="test-marketplace",
    )

    train_input = mktplace.sagemaker_session.upload_data(
        path=data_path, key_prefix="integ-test-data/marketplace/train"
    )

    mktplace.set_hyperparameters(max_leaf_nodes=10)

    hyperparameter_ranges = {"max_leaf_nodes": IntegerParameter(1, 100000)}

    tuner = HyperparameterTuner(
        estimator=mktplace,
        base_tuning_job_name="byo",
        objective_metric_name="validation:accuracy",
        hyperparameter_ranges=hyperparameter_ranges,
        max_jobs=2,
        max_parallel_jobs=2,
    )

    tuner.fit({"training": train_input}, include_cls_metadata=False)
    time.sleep(15)
    tuner.wait()
def test_tuning(sagemaker_session, image_uri, instance_type, framework_version):
    resource_path = os.path.join(os.path.dirname(__file__), "..", "..", "resources")
    script = os.path.join(resource_path, "mnist", "mnist.py")

    estimator = TensorFlow(
        entry_point=script,
        role="SageMakerRole",
        train_instance_type=instance_type,
        train_instance_count=1,
        sagemaker_session=sagemaker_session,
        image_name=image_uri,
        framework_version=framework_version,
        script_mode=True,
    )

    hyperparameter_ranges = {"epochs": IntegerParameter(1, 2)}
    objective_metric_name = "accuracy"
    metric_definitions = [{"Name": objective_metric_name, "Regex": "accuracy = ([0-9\\.]+)"}]

    tuner = HyperparameterTuner(
        estimator,
        objective_metric_name,
        hyperparameter_ranges,
        metric_definitions,
        max_jobs=2,
        max_parallel_jobs=2,
    )

    with timeout(minutes=20):
        inputs = estimator.sagemaker_session.upload_data(
            path=os.path.join(resource_path, "mnist", "data"), key_prefix="scriptmode/mnist"
        )

        tuning_job_name = unique_name_from_base("test-tf-sm-tuning", max_length=32)
        tuner.fit(inputs, job_name=tuning_job_name)
        tuner.wait()
def test_tuning_step_with_multi_algo_tuner(pipeline_session, entry_point):
    pytorch_estimator = PyTorch(
        entry_point=entry_point,
        role=sagemaker.get_execution_role(),
        framework_version="1.5.0",
        py_version="py3",
        instance_count=1,
        instance_type="ml.m5.xlarge",
        sagemaker_session=pipeline_session,
        enable_sagemaker_metrics=True,
        max_retry_attempts=3,
        hyperparameters={
            "static-hp": "hp1",
            "train_size": "1280"
        },
    )

    tuner = HyperparameterTuner.create(
        estimator_dict={
            "estimator-1": pytorch_estimator,
            "estimator-2": pytorch_estimator,
        },
        objective_metric_name_dict={
            "estimator-1": "test:acc",
            "estimator-2": "test:acc",
        },
        hyperparameter_ranges_dict={
            "estimator-1": {
                "batch-size": IntegerParameter(64, 128)
            },
            "estimator-2": {
                "batch-size": IntegerParameter(256, 512)
            },
        },
        metric_definitions_dict={
            "estimator-1": [{
                "Name": "test:acc",
                "Regex": "Overall test accuracy: (.*?);"
            }],
            "estimator-2": [{
                "Name": "test:acc",
                "Regex": "Overall test accuracy: (.*?);"
            }],
        },
    )
    input_path = f"s3://{pipeline_session.default_bucket()}/training-data"
    inputs = {
        "estimator-1": TrainingInput(s3_data=input_path),
        "estimator-2": TrainingInput(s3_data=input_path),
    }
    step_args = tuner.fit(
        inputs=inputs,
        include_cls_metadata={
            "estimator-1": False,
            "estimator-2": False,
        },
    )

    step = TuningStep(
        name="MyTuningStep",
        step_args=step_args,
    )

    pipeline = Pipeline(
        name="MyPipeline",
        steps=[step],
        sagemaker_session=pipeline_session,
    )

    assert json.loads(pipeline.definition())["Steps"][0] == {
        "Name": "MyTuningStep",
        "Type": "Tuning",
        "Arguments": step_args,
    }
def test_tuning_byo_estimator(sagemaker_session):
    """Use Factorization Machines algorithm as an example here.

    First we need to prepare data for training. We take standard data set, convert it to the
    format that the algorithm can process and upload it to S3.
    Then we create the Estimator and set hyperparamets as required by the algorithm.
    Next, we can call fit() with path to the S3.
    Later the trained model is deployed and prediction is called against the endpoint.
    Default predictor is updated with json serializer and deserializer.
    """
    image_name = registry(sagemaker_session.boto_session.region_name
                          ) + '/factorization-machines:1'
    training_data_path = os.path.join(DATA_DIR, 'dummy_tensor')

    with timeout(minutes=TUNING_DEFAULT_TIMEOUT_MINUTES):
        data_path = os.path.join(DATA_DIR, 'one_p_mnist', 'mnist.pkl.gz')
        pickle_args = {} if sys.version_info.major == 2 else {
            'encoding': 'latin1'
        }

        with gzip.open(data_path, 'rb') as f:
            train_set, _, _ = pickle.load(f, **pickle_args)

        prefix = 'test_byo_estimator'
        key = 'recordio-pb-data'
        s3_train_data = sagemaker_session.upload_data(path=training_data_path,
                                                      key_prefix=os.path.join(
                                                          prefix, 'train',
                                                          key))

        estimator = Estimator(image_name=image_name,
                              role='SageMakerRole',
                              train_instance_count=1,
                              train_instance_type='ml.c4.xlarge',
                              sagemaker_session=sagemaker_session,
                              base_job_name='test-byo')

        estimator.set_hyperparameters(num_factors=10,
                                      feature_dim=784,
                                      mini_batch_size=100,
                                      predictor_type='binary_classifier')

        hyperparameter_ranges = {'mini_batch_size': IntegerParameter(100, 200)}

        tuner = HyperparameterTuner(
            estimator=estimator,
            base_tuning_job_name='byo',
            objective_metric_name='test:binary_classification_accuracy',
            hyperparameter_ranges=hyperparameter_ranges,
            max_jobs=2,
            max_parallel_jobs=2)

        tuner.fit({
            'train': s3_train_data,
            'test': s3_train_data
        },
                  include_cls_metadata=False)

        print('Started hyperparameter tuning job with name:' +
              tuner.latest_tuning_job.name)

        time.sleep(15)
        tuner.wait()

    best_training_job = tuner.best_training_job()
    with timeout_and_delete_endpoint_by_name(best_training_job,
                                             sagemaker_session):
        predictor = tuner.deploy(1,
                                 'ml.m4.xlarge',
                                 endpoint_name=best_training_job)
        predictor.serializer = _fm_serializer
        predictor.content_type = 'application/json'
        predictor.deserializer = json_deserializer

        result = predictor.predict(train_set[0][:10])

        assert len(result['predictions']) == 10
        for prediction in result['predictions']:
            assert prediction['score'] is not None
def test_tuning_kmeans(sagemaker_session):
    with timeout(minutes=TUNING_DEFAULT_TIMEOUT_MINUTES):
        data_path = os.path.join(DATA_DIR, 'one_p_mnist', 'mnist.pkl.gz')
        pickle_args = {} if sys.version_info.major == 2 else {
            'encoding': 'latin1'
        }

        # Load the data into memory as numpy arrays
        with gzip.open(data_path, 'rb') as f:
            train_set, _, _ = pickle.load(f, **pickle_args)

        kmeans = KMeans(role='SageMakerRole',
                        train_instance_count=1,
                        train_instance_type='ml.c4.xlarge',
                        k=10,
                        sagemaker_session=sagemaker_session,
                        base_job_name='tk',
                        output_path='s3://{}/'.format(
                            sagemaker_session.default_bucket()))

        # set kmeans specific hp
        kmeans.init_method = 'random'
        kmeans.max_iterators = 1
        kmeans.tol = 1
        kmeans.num_trials = 1
        kmeans.local_init_method = 'kmeans++'
        kmeans.half_life_time_size = 1
        kmeans.epochs = 1

        records = kmeans.record_set(train_set[0][:100])
        test_records = kmeans.record_set(train_set[0][:100], channel='test')

        # specify which hp you want to optimize over
        hyperparameter_ranges = {
            'extra_center_factor': IntegerParameter(1, 10),
            'mini_batch_size': IntegerParameter(10, 100),
            'epochs': IntegerParameter(1, 2),
            'init_method': CategoricalParameter(['kmeans++', 'random'])
        }
        objective_metric_name = 'test:msd'

        tuner = HyperparameterTuner(
            estimator=kmeans,
            objective_metric_name=objective_metric_name,
            hyperparameter_ranges=hyperparameter_ranges,
            objective_type='Minimize',
            max_jobs=2,
            max_parallel_jobs=2)

        tuner.fit([records, test_records])

        print('Started hyperparameter tuning job with name:' +
              tuner.latest_tuning_job.name)

        time.sleep(15)
        tuner.wait()

    best_training_job = tuner.best_training_job()
    with timeout_and_delete_endpoint_by_name(best_training_job,
                                             sagemaker_session):
        predictor = tuner.deploy(1, 'ml.c4.xlarge')
        result = predictor.predict(train_set[0][:10])

        assert len(result) == 10
        for record in result:
            assert record.label['closest_cluster'] is not None
            assert record.label['distance_to_cluster'] is not None
def test_tuning_lda(sagemaker_session, cpu_instance_type):
    with timeout(minutes=TUNING_DEFAULT_TIMEOUT_MINUTES):
        data_path = os.path.join(DATA_DIR, "lda")
        data_filename = "nips-train_1.pbr"

        with open(os.path.join(data_path, data_filename), "rb") as f:
            all_records = read_records(f)

        # all records must be same
        feature_num = int(
            all_records[0].features["values"].float32_tensor.shape[0])

        lda = LDA(
            role="SageMakerRole",
            instance_type=cpu_instance_type,
            num_topics=10,
            sagemaker_session=sagemaker_session,
        )

        record_set = prepare_record_set_from_local_files(
            data_path, lda.data_location, len(all_records), feature_num,
            sagemaker_session)
        test_record_set = prepare_record_set_from_local_files(
            data_path, lda.data_location, len(all_records), feature_num,
            sagemaker_session)
        test_record_set.channel = "test"

        # specify which hp you want to optimize over
        hyperparameter_ranges = {
            "alpha0": ContinuousParameter(1, 10),
            "num_topics": IntegerParameter(1, 2),
        }
        objective_metric_name = "test:pwll"

        tuner = HyperparameterTuner(
            estimator=lda,
            objective_metric_name=objective_metric_name,
            hyperparameter_ranges=hyperparameter_ranges,
            objective_type="Maximize",
            max_jobs=2,
            max_parallel_jobs=2,
            early_stopping_type="Auto",
        )

        tuning_job_name = unique_name_from_base("test-lda", max_length=32)
        print("Started hyperparameter tuning job with name:" + tuning_job_name)
        tuner.fit([record_set, test_record_set],
                  mini_batch_size=1,
                  job_name=tuning_job_name)

    attached_tuner = HyperparameterTuner.attach(
        tuning_job_name, sagemaker_session=sagemaker_session)
    assert attached_tuner.early_stopping_type == "Auto"
    assert attached_tuner.estimator.alpha0 == 1.0
    assert attached_tuner.estimator.num_topics == 1

    best_training_job = attached_tuner.best_training_job()

    with timeout_and_delete_endpoint_by_name(best_training_job,
                                             sagemaker_session):
        predictor = tuner.deploy(1, cpu_instance_type)
        predict_input = np.random.rand(1, feature_num)
        result = predictor.predict(predict_input)

        assert len(result) == 1
        for record in result:
            assert record.label["topic_mixture"] is not None
Beispiel #13
0
    region)
test_data_location = 's3://sagemaker-sample-data-{}/mxnet/mnist/test'.format(
    region)

estimator = MXNet(entry_point='mnist.py',
                  role=role,
                  train_instance_count=1,
                  train_instance_type='ml.m4.xlarge',
                  sagemaker_session=sagemaker.Session(),
                  base_job_name='DEMO-hpo-mxnet',
                  hyperparameters={'batch_size': 100})

hyperparameter_ranges = {
    'optimizer': CategoricalParameter(['sgd', 'Adam']),
    'learning_rate': ContinuousParameter(0.01, 0.2),
    'num_epoch': IntegerParameter(10, 50)
}

objective_metric_name = 'Validation-accuracy'
metric_definitions = [{
    'Name': 'Validation-accuracy',
    'Regex': 'Validation-accuracy=([0-9\\.]+)'
}]

tuner = HyperparameterTuner(estimator,
                            objective_metric_name,
                            hyperparameter_ranges,
                            metric_definitions,
                            max_jobs=9,
                            max_parallel_jobs=3)
            'Key': 'Name',
            'Value': 'DeepAR-Training'
        }, {
            'Key': 'input data',
            'Value': 'train-400.json'
        }, {
            'Key': 'instance type / workers / parallel / count',
            'Value': 'ml.m5.xlarge / 4 / 1 / 1'
        }, {
            'Key': 'hypers choice',
            'Value': 'likelihood: deterministic-L1 / student-T'
        }])  # tags中不能出现中英文逗号

    # hyperparameter_ranges = {'likelihood': CategoricalParameter(["deterministic-L1", 'student-T'])}
    hyperparameter_ranges = {
        'embedding_dimension': IntegerParameter(32, 64),
        'num_cells': IntegerParameter(32, 64),
        'num_layers': IntegerParameter(2, 3)
    }
    objective_metric_name = 'test:mean_wQuantileLoss'
    tuner = HyperparameterTuner(
        estimator_hyper,
        objective_metric_name,
        hyperparameter_ranges,
        strategy='Bayesian',
        objective_type='Minimize',
        max_jobs=2,
        max_parallel_jobs=1,
        tags=[{
            'Key': 'App',
            'Value': 'Sagemaker-DeepAR'
Beispiel #15
0
    }, {
        'Name': 'validation:accuracy',
        'Regex': 'val_accuracy: ([0-9\\.]+)'
    }]
    sm_estimator = sagemaker_estimator(sagemaker_role, code_entry, code_dir,
                                       instance_type, instance_count,
                                       hyperparameters, metric_definitions)

    # sagemaker training job
    training_job_name = "tf-mnist-training-{}".format(
        strftime("%d-%H-%M-%S", gmtime()))
    sagemaker_training(sm_estimator, train_s3, training_job_name)

    # sagemaker tuning job
    hyperparameter_ranges = {
        'epochs':
        IntegerParameter(50, 200),
        'learning_rate':
        ContinuousParameter(0.0001, 0.1, scaling_type="Logarithmic"),
        'batch_size':
        IntegerParameter(32, 256),
        'drop_rate':
        ContinuousParameter(0.0, 1.0)
    }

    tuning_job_name = "tf-mnist-tuning-{}".format(
        strftime("%d-%H-%M-%S", gmtime()))
    max_jobs = 4
    max_parallel_jobs = 2
    #sagemaker_hyperparam_tuning(sm_estimator, train_s3, hyperparameter_ranges, metric_definitions, tuning_job_name, max_jobs, max_parallel_jobs)
hyper_parameters = {'learning_rate': 0.01, 'batch_size': 20, 'n_inputs': 3, 'n_outputs': 1, 'n_units': 3,
                    'time_series_name': 'passengers'}

tf_estimator = TensorFlow(entry_point='sagemaker_estimator_adapter.py',
                          source_dir=source_dir,
                          base_job_name='vanilla-lstm-estimator', role=role,
                          training_steps=1000,
                          evaluation_steps=1,
                          hyperparameters=hyper_parameters,
                          train_instance_count=1, train_instance_type='ml.m5.large',
                          framework_version='1.11.0', py_version='py2',
                          checkpoint_path=('%s/vanilla_lstm/checkpoints' % BUCKET))

hyperparameter_ranges = {'learning_rate': ContinuousParameter(0.008, 0.2),
                         'n_inputs': IntegerParameter(3, 10),
                         'n_units': IntegerParameter(3, 10)}

objective_metric_name = 'mae'
objective_type = 'Minimize'
metric_definitions = [{'Name': 'mae',
                       'Regex': 'mae = ([0-9\\.]+)'}]


tuner = HyperparameterTuner(tf_estimator,
                            objective_metric_name,
                            hyperparameter_ranges,
                            metric_definitions,
                            max_jobs=40,
                            max_parallel_jobs=3,
                            objective_type=objective_type,
Beispiel #17
0
def test_tuning_lda(sagemaker_session):
    with timeout(minutes=TUNING_DEFAULT_TIMEOUT_MINUTES):
        data_path = os.path.join(DATA_DIR, 'lda')
        data_filename = 'nips-train_1.pbr'

        with open(os.path.join(data_path, data_filename), 'rb') as f:
            all_records = read_records(f)

        # all records must be same
        feature_num = int(
            all_records[0].features['values'].float32_tensor.shape[0])

        lda = LDA(role='SageMakerRole',
                  train_instance_type='ml.c4.xlarge',
                  num_topics=10,
                  sagemaker_session=sagemaker_session)

        record_set = prepare_record_set_from_local_files(
            data_path, lda.data_location, len(all_records), feature_num,
            sagemaker_session)
        test_record_set = prepare_record_set_from_local_files(
            data_path, lda.data_location, len(all_records), feature_num,
            sagemaker_session)
        test_record_set.channel = 'test'

        # specify which hp you want to optimize over
        hyperparameter_ranges = {
            'alpha0': ContinuousParameter(1, 10),
            'num_topics': IntegerParameter(1, 2)
        }
        objective_metric_name = 'test:pwll'

        tuner = HyperparameterTuner(
            estimator=lda,
            objective_metric_name=objective_metric_name,
            hyperparameter_ranges=hyperparameter_ranges,
            objective_type='Maximize',
            max_jobs=2,
            max_parallel_jobs=2,
            early_stopping_type='Auto')

        tuning_job_name = unique_name_from_base('test-lda', max_length=32)
        tuner.fit([record_set, test_record_set],
                  mini_batch_size=1,
                  job_name=tuning_job_name)

        latest_tuning_job_name = tuner.latest_tuning_job.name

        print('Started hyperparameter tuning job with name:' +
              latest_tuning_job_name)

        time.sleep(15)
        tuner.wait()

    desc = tuner.latest_tuning_job.sagemaker_session.sagemaker_client \
        .describe_hyper_parameter_tuning_job(HyperParameterTuningJobName=latest_tuning_job_name)
    assert desc['HyperParameterTuningJobConfig'][
        'TrainingJobEarlyStoppingType'] == 'Auto'

    best_training_job = tuner.best_training_job()
    with timeout_and_delete_endpoint_by_name(best_training_job,
                                             sagemaker_session):
        predictor = tuner.deploy(1, 'ml.c4.xlarge')
        predict_input = np.random.rand(1, feature_num)
        result = predictor.predict(predict_input)

        assert len(result) == 1
        for record in result:
            assert record.label['topic_mixture'] is not None
Beispiel #18
0
    fm.fit({'train': train_data, 'test': test_data})

    best_model = fm.model_data
else:
    fm.set_hyperparameters(feature_dim=nbFeatures,
                           predictor_type='binary_classifier',
                           mini_batch_size=1000,
                           num_factors=64,
                           epochs=100)

    my_tuner = HyperparameterTuner(
        estimator=fm,
        objective_metric_name='test:binary_classification_accuracy',
        hyperparameter_ranges={
            'epochs': IntegerParameter(1, 200),
            'mini_batch_size': IntegerParameter(10, 10000),
            'factors_wd': ContinuousParameter(1e-8, 512)
        },
        max_jobs=4,
        max_parallel_jobs=4)

    my_tuner.fit({
        'train': train_data,
        'test': test_data
    },
                 include_cls_metadata=False)

    my_tuner.wait()

    #sm_session = sagemaker.Session()
Beispiel #19
0
    def tuning(self):

        s3_bucket, id, secret = s3_aws_engine(name=self.aws_env)

        s3_path = ModelTune._aws_s3_path(s3_bucket)

        boto_sess = ModelTune._boto_session(id, secret)

        logger.info('Getting algorithm image URI...')

        container = get_image_uri(boto_sess.region_name,
                                  'xgboost',
                                  repo_version='0.90-1')

        logger.info('Creating sagemaker session...')

        sage_sess = sagemaker.Session(boto_sess)

        s3_input_train, s3_input_val = self.fetch_data(s3_path)

        logger.info(
            'Creating sagemaker estimator to train using the supplied {} model...'
            .format(self.model_name))

        if self.model_name == 'clf':
            train_instance_type = 'ml.m5.4xlarge'
        else:
            train_instance_type = 'ml.m5.2xlarge'

        est = Estimator(container,
                        role=self.role,
                        train_instance_count=1,
                        train_instance_type=train_instance_type,
                        output_path=s3_path + 'tuning_' + self.model_name +
                        '/',
                        sagemaker_session=sage_sess,
                        base_job_name=self.model_name + '-tuning-job')

        logger.info('Setting hyper-parameters...')

        hyperparameter_ranges = {
            'num_round': IntegerParameter(1, 4000),
            'eta': ContinuousParameter(0, 0.5),
            'max_depth': IntegerParameter(1, 10),
            'min_child_weight': ContinuousParameter(0, 120),
            'subsample': ContinuousParameter(0.5, 1),
            'colsample_bytree': ContinuousParameter(0.5, 1),
            'gamma': ContinuousParameter(0, 5),
            'lambda': ContinuousParameter(0, 1000),
            'alpha': ContinuousParameter(0, 1000)
        }

        if self.model_name == 'clf':
            est.set_hyperparameters(
                objective='reg:logistic',
                scale_pos_weight=self._get_imb_ratio()['imb_ratio'])
            objective_metric_name = 'validation:f1'
            objective_type = 'Maximize'
        else:
            est.set_hyperparameters(objective='reg:linear')
            objective_metric_name = 'validation:rmse'
            objective_type = 'Minimize'

        if est.hyperparam_dict is None:
            raise ValueError('Hyper-parameters are missing')
        else:
            logger.info(est.hyperparam_dict)

        tuner = HyperparameterTuner(
            estimator=est,
            objective_metric_name=objective_metric_name,
            hyperparameter_ranges=hyperparameter_ranges,
            objective_type=objective_type,
            max_jobs=100,
            max_parallel_jobs=10)

        sw = Stopwatch(start=True)

        tuner.fit({'train': s3_input_train, 'validation': s3_input_val})

        self.post_tune(sage_sess, tuner)

        logger.info('Elapsed time of tuning: {}'.format(
            sw.elapsed.human_str()))
Beispiel #20
0
TAGS = [{"Key": "pysdk-test", "Value": "multi-algo-tuner"}]

ESTIMATOR_FM = "fm-one"
ESTIMATOR_KNN = "knn-two"

# TODO: change to use one of the new standard metrics for 1P algorithm
OBJECTIVE_METRIC_NAME_FM = "test:rmse"
OBJECTIVE_METRIC_NAME_KNN = "test:mse"

HYPER_PARAMETER_RANGES_FM = {
    "factors_wd": ContinuousParameter(1, 30),
    "factors_lr": ContinuousParameter(40, 50),
}
HYPER_PARAMETER_RANGES_KNN = {
    "k": IntegerParameter(3, 400),
    "sample_size": IntegerParameter(40, 550),
}

MAX_JOBS = 2
MAX_PARALLEL_JOBS = 2


@pytest.fixture(scope="module")
def data_set():
    return datasets.one_p_mnist()


@pytest.fixture(scope="function")
def estimator_fm(sagemaker_session, cpu_instance_type):
    fm_image = image_uris.retrieve("factorization-machines",
def test_attach_tuning_pytorch(
    sagemaker_session,
    cpu_instance_type,
    pytorch_inference_latest_version,
    pytorch_inference_latest_py_version,
):
    mnist_dir = os.path.join(DATA_DIR, "pytorch_mnist")
    mnist_script = os.path.join(mnist_dir, "mnist.py")

    estimator = PyTorch(
        entry_point=mnist_script,
        role="SageMakerRole",
        instance_count=1,
        framework_version=pytorch_inference_latest_version,
        py_version=pytorch_inference_latest_py_version,
        instance_type=cpu_instance_type,
        sagemaker_session=sagemaker_session,
    )

    with timeout(minutes=TUNING_DEFAULT_TIMEOUT_MINUTES):
        objective_metric_name = "evaluation-accuracy"
        metric_definitions = [{
            "Name": "evaluation-accuracy",
            "Regex": r"Overall test accuracy: (\d+)"
        }]
        hyperparameter_ranges = {"batch-size": IntegerParameter(50, 100)}

        tuner = HyperparameterTuner(
            estimator,
            objective_metric_name,
            hyperparameter_ranges,
            metric_definitions,
            max_jobs=2,
            max_parallel_jobs=2,
            early_stopping_type="Auto",
        )

        training_data = estimator.sagemaker_session.upload_data(
            path=os.path.join(mnist_dir, "training"),
            key_prefix="integ-test-data/pytorch_mnist/training",
        )

        tuning_job_name = unique_name_from_base("pytorch", max_length=32)
        print("Started hyperparameter tuning job with name: {}".format(
            tuning_job_name))
        tuner.fit({"training": training_data}, job_name=tuning_job_name)

    endpoint_name = tuning_job_name
    model_name = "model-name-1"
    attached_tuner = HyperparameterTuner.attach(
        tuning_job_name, sagemaker_session=sagemaker_session)
    assert attached_tuner.early_stopping_type == "Auto"

    with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session):
        predictor = attached_tuner.deploy(1,
                                          cpu_instance_type,
                                          endpoint_name=endpoint_name,
                                          model_name=model_name)
    best_training_job = tuner.best_training_job()
    with timeout_and_delete_endpoint_by_name(best_training_job,
                                             sagemaker_session):
        predictor = attached_tuner.deploy(1, cpu_instance_type)
        data = np.zeros(shape=(1, 1, 28, 28), dtype=np.float32)
        predictor.predict(data)

        batch_size = 100
        data = np.random.rand(batch_size, 1, 28, 28).astype(np.float32)
        output = predictor.predict(data)

        assert output.shape == (batch_size, 10)
        _assert_model_name_match(sagemaker_session.sagemaker_client,
                                 endpoint_name, model_name)
Beispiel #22
0
print("Using inputs: ", inputs)

estimator = PyTorch(entry_point="cifar10.py",
                    source_dir=os.getcwd() + "/source",
                    role=role,
                    framework_version='1.0.0.dev',
                    train_instance_count=1,
                    train_instance_type='ml.c5.xlarge',
                    hyperparameters={
                        'epochs': 50,
                        'momentum': 0.9
                    })

hyperparameter_ranges = {
    'lr': ContinuousParameter(0.0001, 0.001),
    'hidden_nodes': IntegerParameter(20, 100),
    'batch_size': CategoricalParameter([128, 256, 512]),
    'conv1_channels': CategoricalParameter([32, 64, 128]),
    'conv2_channels': CategoricalParameter([64, 128, 256, 512]),
}

objective_metric_name = 'average test accuracy'
objective_type = 'Maximize'
metric_definitions = [{
    'Name': 'average test accuracy',
    'Regex': 'Test Accuracy: ([0-9\\.]+)'
}]

tuner = HyperparameterTuner(estimator,
                            objective_metric_name,
                            hyperparameter_ranges,
REGION = 'us-west-2'
BUCKET_NAME = 'Some-Bucket'
ROLE = 'myrole'
IMAGE_NAME = 'image'

TRAIN_INSTANCE_COUNT = 1
TRAIN_INSTANCE_TYPE = 'ml.c4.xlarge'
NUM_COMPONENTS = 5

SCRIPT_NAME = 'my_script.py'
FRAMEWORK_VERSION = '1.0.0'

INPUTS = 's3://mybucket/train'
OBJECTIVE_METRIC_NAME = 'mock_metric'
HYPERPARAMETER_RANGES = {'validated': ContinuousParameter(0, 5),
                         'elizabeth': IntegerParameter(0, 5),
                         'blank': CategoricalParameter([0, 5])}
METRIC_DEFINTIONS = 'mock_metric_definitions'

TUNING_JOB_DETAILS = {
    'HyperParameterTuningJobConfig': {
        'ResourceLimits': {
            'MaxParallelTrainingJobs': 1,
            'MaxNumberOfTrainingJobs': 1
        },
        'HyperParameterTuningJobObjective': {
            'MetricName': OBJECTIVE_METRIC_NAME,
            'Type': 'Minimize'
        },
        'Strategy': 'Bayesian',
        'ParameterRanges': {
Beispiel #24
0
def test_tuning_byo_estimator(sagemaker_session):
    """Use Factorization Machines algorithm as an example here.

    First we need to prepare data for training. We take standard data set, convert it to the
    format that the algorithm can process and upload it to S3.
    Then we create the Estimator and set hyperparamets as required by the algorithm.
    Next, we can call fit() with path to the S3.
    Later the trained model is deployed and prediction is called against the endpoint.
    Default predictor is updated with json serializer and deserializer.
    """
    image_name = registry(sagemaker_session.boto_session.region_name
                          ) + "/factorization-machines:1"
    training_data_path = os.path.join(DATA_DIR, "dummy_tensor")

    with timeout(minutes=TUNING_DEFAULT_TIMEOUT_MINUTES):
        data_path = os.path.join(DATA_DIR, "one_p_mnist", "mnist.pkl.gz")
        pickle_args = {} if sys.version_info.major == 2 else {
            "encoding": "latin1"
        }

        with gzip.open(data_path, "rb") as f:
            train_set, _, _ = pickle.load(f, **pickle_args)

        prefix = "test_byo_estimator"
        key = "recordio-pb-data"
        s3_train_data = sagemaker_session.upload_data(path=training_data_path,
                                                      key_prefix=os.path.join(
                                                          prefix, "train",
                                                          key))

        estimator = Estimator(
            image_name=image_name,
            role="SageMakerRole",
            train_instance_count=1,
            train_instance_type="ml.c4.xlarge",
            sagemaker_session=sagemaker_session,
        )

        estimator.set_hyperparameters(num_factors=10,
                                      feature_dim=784,
                                      mini_batch_size=100,
                                      predictor_type="binary_classifier")

        hyperparameter_ranges = {"mini_batch_size": IntegerParameter(100, 200)}

        tuner = HyperparameterTuner(
            estimator=estimator,
            objective_metric_name="test:binary_classification_accuracy",
            hyperparameter_ranges=hyperparameter_ranges,
            max_jobs=2,
            max_parallel_jobs=2,
        )

        tuner.fit(
            {
                "train": s3_train_data,
                "test": s3_train_data
            },
            include_cls_metadata=False,
            job_name=unique_name_from_base("byo", 32),
        )

        print("Started hyperparameter tuning job with name:" +
              tuner.latest_tuning_job.name)

        time.sleep(15)
        tuner.wait()

    best_training_job = tuner.best_training_job()
    with timeout_and_delete_endpoint_by_name(best_training_job,
                                             sagemaker_session):
        predictor = tuner.deploy(1,
                                 "ml.m4.xlarge",
                                 endpoint_name=best_training_job)
        predictor.serializer = _fm_serializer
        predictor.content_type = "application/json"
        predictor.deserializer = json_deserializer

        result = predictor.predict(train_set[0][:10])

        assert len(result["predictions"]) == 10
        for prediction in result["predictions"]:
            assert prediction["score"] is not None
Beispiel #25
0
        
    preds = np.fromstring(predictions[1:], sep=',')
    confusion_matrix = pd.crosstab(index=labels, columns=np.round(preds), rownames=['True'], colnames=['predictions']).astype(int)
    plt.figure(figsize = (5,5))
    sns.heatmap(confusion_matrix, annot=True, fmt='.2f', cmap="YlGnBu").set_title('Confusion Matrix') 

predict_xgboost(xgb_predictor, test_features, test_labels)


# In[84]:


from sagemaker.tuner import IntegerParameter, CategoricalParameter, ContinuousParameter, HyperparameterTuner

hyperparameter_ranges_xgb = {'eta': ContinuousParameter(0.01, 0.2),
                         'max_depth': IntegerParameter(3, 9),
                         'gamma': IntegerParameter(0, 5),
                         'min_child_weight': IntegerParameter(2, 6),
                         'subsample': ContinuousParameter(0.5, 0.9),
                         'colsample_bytree': ContinuousParameter(0.5, 0.9)}

objective_metric_name_xgb = 'validation:auc'

tuner_xgb = HyperparameterTuner(xgb,
                            objective_metric_name_xgb,
                            hyperparameter_ranges_xgb,
                            max_jobs=10,
                            max_parallel_jobs=1)

tuner_xgb.fit({'train': s3_input_train, 'validation': s3_input_validation})
def hyperparameter_ranges():
    return {'extra_center_factor': IntegerParameter(1, 10),
            'mini_batch_size': IntegerParameter(10, 100),
            'epochs': IntegerParameter(1, 2),
            'init_method': CategoricalParameter(['kmeans++', 'random'])}
Beispiel #27
0
def get_pipeline(
    region,
    sagemaker_project_arn=None,
    role=None,
    default_bucket=None,
    model_package_group_name="restatePackageGroup",  # Choose any name
    pipeline_name="restate-p-XXXXXXXXX",  # You can find your pipeline name in the Studio UI (project -> Pipelines -> name)
    base_job_prefix="restate",  # Choose any name
):
    """Gets a SageMaker ML Pipeline instance working with on RE data.
    Args:
        region: AWS region to create and run the pipeline.
        role: IAM role to create and run steps and pipeline.
        default_bucket: the bucket to use for storing the artifacts
    Returns:
        an instance of a pipeline
    """
    sagemaker_session = get_session(region, default_bucket)
    if role is None:
        role = sagemaker.session.get_execution_role(sagemaker_session)

    # Parameters for pipeline execution
    processing_instance_count = ParameterInteger(name="ProcessingInstanceCount", default_value=1)
    processing_instance_type = ParameterString(
        name="ProcessingInstanceType", default_value="ml.m5.2xlarge"
    )
    training_instance_type = ParameterString(
        name="TrainingInstanceType", default_value="ml.m5.xlarge"
    )
    model_approval_status = ParameterString(
        name="ModelApprovalStatus",
        default_value="PendingManualApproval",  # ModelApprovalStatus can be set to a default of "Approved" if you don't want manual approval.
    )
    input_data = ParameterString(
        name="InputDataUrl",
        default_value=f"",  # Change this to point to the s3 location of your raw input data.
    )

    data_sources = []
    # Sagemaker session
    sess = sagemaker_session

    # You can configure this with your own bucket name, e.g.
    # bucket = "my-bucket"
    bucket = sess.default_bucket()

    data_sources.append(
        ProcessingInput(
            input_name="restate-california",
            dataset_definition=DatasetDefinition(
                local_path="/opt/ml/processing/restate-california",
                data_distribution_type="FullyReplicated",
                # You can override below to point to other database or use different queries
                athena_dataset_definition=AthenaDatasetDefinition(
                    catalog="AwsDataCatalog",
                    database="restate",
                    query_string="SELECT * FROM restate.california_10",
                    output_s3_uri=f"s3://{bucket}/athena/",
                    output_format="PARQUET",
                ),
            ),
        )
    )

    print(f"Data Wrangler export storage bucket: {bucket}")

    # unique flow export ID
    flow_export_id = f"{time.strftime('%d-%H-%M-%S', time.gmtime())}-{str(uuid.uuid4())[:8]}"
    flow_export_name = f"flow-{flow_export_id}"

    # Output name is auto-generated from the select node's ID + output name from the flow file.
    output_name = "99ae1ec3-dd5f-453c-bfae-721dac423cd7.default"

    s3_output_prefix = f"export-{flow_export_name}/output"
    s3_output_path = f"s3://{bucket}/{s3_output_prefix}"
    print(f"Flow S3 export result path: {s3_output_path}")

    processing_job_output = ProcessingOutput(
        output_name=output_name,
        source="/opt/ml/processing/output",
        destination=s3_output_path,
        s3_upload_mode="EndOfJob",
    )

    # name of the flow file which should exist in the current notebook working directory
    flow_file_name = "sagemaker-pipeline/restate-athena-california.flow"

    # Load .flow file from current notebook working directory
    #!echo "Loading flow file from current notebook working directory: $PWD"

    with open(flow_file_name) as f:
        flow = json.load(f)

    # Upload flow to S3
    s3_client = boto3.client("s3")
    s3_client.upload_file(
        flow_file_name,
        bucket,
        f"data_wrangler_flows/{flow_export_name}.flow",
        ExtraArgs={"ServerSideEncryption": "aws:kms"},
    )

    flow_s3_uri = f"s3://{bucket}/data_wrangler_flows/{flow_export_name}.flow"

    print(f"Data Wrangler flow {flow_file_name} uploaded to {flow_s3_uri}")

    ## Input - Flow: restate-athena-russia.flow
    flow_input = ProcessingInput(
        source=flow_s3_uri,
        destination="/opt/ml/processing/flow",
        input_name="flow",
        s3_data_type="S3Prefix",
        s3_input_mode="File",
        s3_data_distribution_type="FullyReplicated",
    )

    # IAM role for executing the processing job.
    iam_role = role

    # Unique processing job name. Give a unique name every time you re-execute processing jobs
    processing_job_name = f"data-wrangler-flow-processing-{flow_export_id}"

    # Data Wrangler Container URL.
    container_uri = sagemaker.image_uris.retrieve(
        framework="data-wrangler",  # we are using the Sagemaker built in xgboost algorithm
        region=region,
    )

    # Processing Job Instance count and instance type.
    instance_count = 2
    instance_type = "ml.m5.4xlarge"

    # Size in GB of the EBS volume to use for storing data during processing
    volume_size_in_gb = 30

    # Content type for each output. Data Wrangler supports CSV as default and Parquet.
    output_content_type = "CSV"

    # Network Isolation mode; default is off
    enable_network_isolation = False

    # List of tags to be passed to the processing job
    user_tags = []

    # Output configuration used as processing job container arguments
    output_config = {output_name: {"content_type": output_content_type}}

    # KMS key for per object encryption; default is None
    kms_key = None

    processor = Processor(
        role=iam_role,
        image_uri=container_uri,
        instance_count=instance_count,
        instance_type=instance_type,
        volume_size_in_gb=volume_size_in_gb,
        network_config=NetworkConfig(enable_network_isolation=enable_network_isolation),
        sagemaker_session=sess,
        output_kms_key=kms_key,
        tags=user_tags,
    )

    data_wrangler_step = ProcessingStep(
        name="DataWranglerProcess",
        processor=processor,
        inputs=[flow_input] + data_sources,
        outputs=[processing_job_output],
        job_arguments=[f"--output-config '{json.dumps(output_config)}'"],
    )

    # Processing step for feature engineering
    # this processor does not have awswrangler installed
    sklearn_processor = SKLearnProcessor(
        framework_version="0.23-1",
        instance_type=processing_instance_type,
        instance_count=processing_instance_count,
        base_job_name=f"{base_job_prefix}/sklearn-restate-preprocess",  # choose any name
        sagemaker_session=sagemaker_session,
        role=role,
    )

    step_process = ProcessingStep(
        name="Preprocess",  # choose any name
        processor=sklearn_processor,
        inputs=[
            ProcessingInput(
                source=data_wrangler_step.properties.ProcessingOutputConfig.Outputs[
                    output_name
                ].S3Output.S3Uri,
                destination="/opt/ml/processing/data/raw-data-dir",
            )
        ],
        outputs=[
            ProcessingOutput(output_name="train", source="/opt/ml/processing/train"),
            ProcessingOutput(output_name="validation", source="/opt/ml/processing/validation"),
            ProcessingOutput(output_name="test", source="/opt/ml/processing/test"),
        ],
        code=os.path.join(BASE_DIR, "preprocess.py"),
        job_arguments=[
            "--input-data",
            data_wrangler_step.properties.ProcessingOutputConfig.Outputs[
                output_name
            ].S3Output.S3Uri,
        ],
    )

    # Training step for generating model artifacts
    model_path = f"s3://{sagemaker_session.default_bucket()}/{base_job_prefix}/restateTrain"
    model_bucket_key = f"{sagemaker_session.default_bucket()}/{base_job_prefix}/restateTrain"
    cache_config = CacheConfig(enable_caching=True, expire_after="30d")

    xgb_image_uri = sagemaker.image_uris.retrieve(
        framework="xgboost",  # we are using the Sagemaker built in xgboost algorithm
        region=region,
        version="1.0-1",
        py_version="py3",
        instance_type=training_instance_type,
    )
    xgb_train = Estimator(
        image_uri=xgb_image_uri,
        instance_type=training_instance_type,
        instance_count=1,
        output_path=model_path,
        base_job_name=f"{base_job_prefix}/restate-xgb-train",
        sagemaker_session=sagemaker_session,
        role=role,
    )
    xgb_train.set_hyperparameters(
        #    #objective="binary:logistic",
        #    objective="reg:linear",
        num_round=50,
        #    max_depth=5,
        #    eta=0.2,
        #    gamma=4,
        #    min_child_weight=6,
        #    subsample=0.7,
        #    silent=0,
    )

    xgb_train.set_hyperparameters(grow_policy="lossguide")

    xgb_objective_metric_name = "validation:mse"
    xgb_hyperparameter_ranges = {
        "max_depth": IntegerParameter(2, 10, scaling_type="Linear"),
    }

    xgb_tuner_log = HyperparameterTuner(
        xgb_train,
        xgb_objective_metric_name,
        xgb_hyperparameter_ranges,
        max_jobs=3,
        max_parallel_jobs=3,
        strategy="Random",
        objective_type="Minimize",
    )

    xgb_step_tuning = TuningStep(
        name="XGBHPTune",
        tuner=xgb_tuner_log,
        inputs={
            "train": TrainingInput(
                s3_data=step_process.properties.ProcessingOutputConfig.Outputs[
                    "train"
                ].S3Output.S3Uri,
                content_type="text/csv",
            ),
            "validation": TrainingInput(
                s3_data=step_process.properties.ProcessingOutputConfig.Outputs[
                    "validation"
                ].S3Output.S3Uri,
                content_type="text/csv",
            ),
        },
        cache_config=cache_config,
    )

    # dtree_image_uri = '625467769535.dkr.ecr.ap-southeast-1.amazonaws.com/sagemaker-decision-tree:latest'
    dtree_image_uri = sagemaker_session.sagemaker_client.describe_image_version(
        ImageName="restate-dtree"
    )["ContainerImage"]

    dtree_train = Estimator(
        image_uri=dtree_image_uri,
        role=role,
        instance_count=1,
        instance_type=training_instance_type,
        base_job_name=f"{base_job_prefix}/restate-dtree-train",
        output_path=model_path,
        sagemaker_session=sagemaker_session,
    )

    dtree_objective_metric_name = "validation:mse"
    dtree_metric_definitions = [{"Name": "validation:mse", "Regex": "mse:(\S+)"}]

    dtree_hyperparameter_ranges = {
        "max_depth": IntegerParameter(10, 50, scaling_type="Linear"),
        "max_leaf_nodes": IntegerParameter(2, 12, scaling_type="Linear"),
    }

    dtree_tuner_log = HyperparameterTuner(
        dtree_train,
        dtree_objective_metric_name,
        dtree_hyperparameter_ranges,
        dtree_metric_definitions,
        max_jobs=3,
        max_parallel_jobs=3,
        strategy="Random",
        objective_type="Minimize",
    )

    dtree_step_tuning = TuningStep(
        name="DTreeHPTune",
        tuner=dtree_tuner_log,
        inputs={
            "training": TrainingInput(
                s3_data=step_process.properties.ProcessingOutputConfig.Outputs[
                    "train"
                ].S3Output.S3Uri,
                content_type="text/csv",
            ),
            "validation": TrainingInput(
                s3_data=step_process.properties.ProcessingOutputConfig.Outputs[
                    "validation"
                ].S3Output.S3Uri,
                content_type="text/csv",
            ),
        },
        cache_config=cache_config,
    )

    dtree_script_eval = ScriptProcessor(
        image_uri=dtree_image_uri,
        command=["python3"],
        instance_type=processing_instance_type,
        instance_count=1,
        base_job_name=f"{base_job_prefix}/script-dtree-eval",
        sagemaker_session=sagemaker_session,
        role=role,
    )

    dtree_evaluation_report = PropertyFile(
        name="EvaluationReportDTree",
        output_name="dtree_evaluation",
        path="dtree_evaluation.json",
    )

    dtree_step_eval = ProcessingStep(
        name="DTreeEval",
        processor=dtree_script_eval,
        inputs=[
            ProcessingInput(
                # source=dtree_step_train.properties.ModelArtifacts.S3ModelArtifacts,
                source=dtree_step_tuning.get_top_model_s3_uri(top_k=0, s3_bucket=model_bucket_key),
                destination="/opt/ml/processing/model",
            ),
            ProcessingInput(
                source=step_process.properties.ProcessingOutputConfig.Outputs[
                    "test"
                ].S3Output.S3Uri,
                destination="/opt/ml/processing/test",
            ),
        ],
        outputs=[
            ProcessingOutput(
                output_name="dtree_evaluation", source="/opt/ml/processing/evaluation"
            ),
        ],
        code=os.path.join(BASE_DIR, "dtree_evaluate.py"),
        property_files=[dtree_evaluation_report],
    )

    xgb_script_eval = ScriptProcessor(
        image_uri=xgb_image_uri,
        command=["python3"],
        instance_type=processing_instance_type,
        instance_count=1,
        base_job_name=f"{base_job_prefix}/script-xgb-eval",
        sagemaker_session=sagemaker_session,
        role=role,
    )

    xgb_evaluation_report = PropertyFile(
        name="EvaluationReportXGBoost",
        output_name="xgb_evaluation",
        path="xgb_evaluation.json",
    )

    xgb_step_eval = ProcessingStep(
        name="XGBEval",
        processor=xgb_script_eval,
        inputs=[
            ProcessingInput(
                source=xgb_step_tuning.get_top_model_s3_uri(top_k=0, s3_bucket=model_bucket_key),
                destination="/opt/ml/processing/model",
            ),
            ProcessingInput(
                source=step_process.properties.ProcessingOutputConfig.Outputs[
                    "test"
                ].S3Output.S3Uri,
                destination="/opt/ml/processing/test",
            ),
        ],
        outputs=[
            ProcessingOutput(output_name="xgb_evaluation", source="/opt/ml/processing/evaluation"),
        ],
        code=os.path.join(BASE_DIR, "xgb_evaluate.py"),
        property_files=[xgb_evaluation_report],
    )

    xgb_model_metrics = ModelMetrics(
        model_statistics=MetricsSource(
            s3_uri="{}/xgb_evaluation.json".format(
                xgb_step_eval.arguments["ProcessingOutputConfig"]["Outputs"][0]["S3Output"]["S3Uri"]
            ),
            content_type="application/json",
        )
    )

    dtree_model_metrics = ModelMetrics(
        model_statistics=MetricsSource(
            s3_uri="{}/dtree_evaluation.json".format(
                dtree_step_eval.arguments["ProcessingOutputConfig"]["Outputs"][0]["S3Output"][
                    "S3Uri"
                ]
            ),
            content_type="application/json",
        )
    )

    xgb_eval_metrics = JsonGet(
        step=xgb_step_eval,
        property_file=xgb_evaluation_report,
        json_path="regression_metrics.r2s.value",  # This should follow the structure of your report_dict defined in the evaluate.py file.
    )

    dtree_eval_metrics = JsonGet(
        step=dtree_step_eval,
        property_file=dtree_evaluation_report,
        json_path="regression_metrics.r2s.value",  # This should follow the structure of your report_dict defined in the evaluate.py file.
    )

    # Register model step that will be conditionally executed
    dtree_step_register = RegisterModel(
        name="DTreeReg",
        estimator=dtree_train,
        model_data=dtree_step_tuning.get_top_model_s3_uri(top_k=0, s3_bucket=model_bucket_key),
        content_types=["text/csv"],
        response_types=["text/csv"],
        inference_instances=["ml.t2.medium", "ml.m5.large"],
        transform_instances=["ml.m5.large"],
        model_package_group_name=model_package_group_name,
        approval_status=model_approval_status,
        model_metrics=dtree_model_metrics,
    )

    # Register model step that will be conditionally executed
    xgb_step_register = RegisterModel(
        name="XGBReg",
        estimator=xgb_train,
        model_data=xgb_step_tuning.get_top_model_s3_uri(top_k=0, s3_bucket=model_bucket_key),
        content_types=["text/csv"],
        response_types=["text/csv"],
        inference_instances=["ml.t2.medium", "ml.m5.large"],
        transform_instances=["ml.m5.large"],
        model_package_group_name=model_package_group_name,
        approval_status=model_approval_status,
        model_metrics=xgb_model_metrics,
    )

    # Condition step for evaluating model quality and branching execution
    cond_lte = ConditionGreaterThanOrEqualTo(  # You can change the condition here
        left=JsonGet(
            step=dtree_step_eval,
            property_file=dtree_evaluation_report,
            json_path="regression_metrics.r2s.value",  # This should follow the structure of your report_dict defined in the evaluate.py file.
        ),
        right=JsonGet(
            step=xgb_step_eval,
            property_file=xgb_evaluation_report,
            json_path="regression_metrics.r2s.value",  # This should follow the structure of your report_dict defined in the evaluate.py file.
        ),  # You can change the threshold here
    )

    step_cond = ConditionStep(
        name="AccuracyCond",
        conditions=[cond_lte],
        if_steps=[dtree_step_register],
        else_steps=[xgb_step_register],
    )
    create_date = time.strftime("%Y-%m-%d-%H-%M-%S")

    # Pipeline instance
    pipeline = Pipeline(
        name=pipeline_name,
        parameters=[
            processing_instance_type,
            processing_instance_count,
            training_instance_type,
            model_approval_status,
            input_data
        ],
        pipeline_experiment_config=PipelineExperimentConfig(
            pipeline_name + "-" + create_date, "restate-{}".format(create_date)
        ),
        steps=[
            data_wrangler_step,
            step_process,
            dtree_step_tuning,
            xgb_step_tuning,
            dtree_step_eval,
            xgb_step_eval,
            step_cond,
        ],
        sagemaker_session=sagemaker_session,
    )
    return pipeline
def test_tuning_byo_estimator(sagemaker_session, cpu_instance_type):
    """Use Factorization Machines algorithm as an example here.

    First we need to prepare data for training. We take standard data set, convert it to the
    format that the algorithm can process and upload it to S3.
    Then we create the Estimator and set hyperparamets as required by the algorithm.
    Next, we can call fit() with path to the S3.
    Later the trained model is deployed and prediction is called against the endpoint.
    Default predictor is updated with json serializer and deserializer.
    """
    image_uri = image_uris.retrieve("factorization-machines",
                                    sagemaker_session.boto_region_name)
    training_data_path = os.path.join(DATA_DIR, "dummy_tensor")

    with timeout(minutes=TUNING_DEFAULT_TIMEOUT_MINUTES):
        prefix = "test_byo_estimator"
        key = "recordio-pb-data"
        s3_train_data = sagemaker_session.upload_data(path=training_data_path,
                                                      key_prefix=os.path.join(
                                                          prefix, "train",
                                                          key))

        estimator = Estimator(
            image_uri=image_uri,
            role="SageMakerRole",
            instance_count=1,
            instance_type=cpu_instance_type,
            sagemaker_session=sagemaker_session,
        )

        estimator.set_hyperparameters(num_factors=10,
                                      feature_dim=784,
                                      mini_batch_size=100,
                                      predictor_type="binary_classifier")

        hyperparameter_ranges = {"mini_batch_size": IntegerParameter(100, 200)}

        tuner = HyperparameterTuner(
            estimator=estimator,
            objective_metric_name="test:binary_classification_accuracy",
            hyperparameter_ranges=hyperparameter_ranges,
            max_jobs=2,
            max_parallel_jobs=2,
        )

        tuning_job_name = unique_name_from_base("byo", 32)
        print("Started hyperparameter tuning job with name {}:".format(
            tuning_job_name))
        tuner.fit(
            {
                "train": s3_train_data,
                "test": s3_train_data
            },
            include_cls_metadata=False,
            job_name=tuning_job_name,
        )

    best_training_job = tuner.best_training_job()
    with timeout_and_delete_endpoint_by_name(best_training_job,
                                             sagemaker_session):
        predictor = tuner.deploy(
            1,
            cpu_instance_type,
            endpoint_name=best_training_job,
            serializer=_FactorizationMachineSerializer(),
            deserializer=JSONDeserializer(),
        )

        result = predictor.predict(datasets.one_p_mnist()[0][:10])

        assert len(result["predictions"]) == 10
        for prediction in result["predictions"]:
            assert prediction["score"] is not None
Beispiel #29
0
# First, make sure to import the relevant objects used to construct the tuner
from sagemaker.tuner import IntegerParameter, ContinuousParameter, HyperparameterTuner

# TODO: Create the hyperparameter tuner object

xgb_hyperparameter_tuner = HyperparameterTuner(
    estimator=
    xgb,  # The estimator object to use as the basis for the training jobs.
    objective_metric_name=
    'validation:rmse',  # The metric used to compare trained models.
    objective_type=
    'Minimize',  # Whether we wish to minimize or maximize the metric.
    max_jobs=10,  # The total number of models to train
    max_parallel_jobs=3,  # The number of models to train in parallel
    hyperparameter_ranges={
        'max_depth': IntegerParameter(3, 12),
        'eta': ContinuousParameter(0.05, 0.5),
        'min_child_weight': IntegerParameter(2, 8),
        'subsample': ContinuousParameter(0.5, 0.9),
        'gamma': ContinuousParameter(0, 10),
    })
"""
QUIZ PART 3
"""

# ### (TODO) Testing the model
#
# Now that we've run our hyperparameter tuning job, it's time to see how well the
# best performing model actually performs. To do this we will use SageMaker's
# Batch Transform functionality. Batch Transform is a convenient way to perform
# inference on a large dataset in a way that is not realtime. That is, we don't
def test_integer_parameter():
    int_param = IntegerParameter(1, 2)
    assert isinstance(int_param, _ParameterRange)
    assert int_param.__name__ is 'Integer'
Beispiel #31
0
def test_tuning_single_algo(
    sagemaker_session,
    role,
    cpu_instance_type,
    pipeline_name,
    region_name,
):
    base_dir = os.path.join(DATA_DIR, "pytorch_mnist")
    entry_point = os.path.join(base_dir, "mnist.py")
    input_path = sagemaker_session.upload_data(
        path=os.path.join(base_dir, "training"),
        key_prefix="integ-test-data/pytorch_mnist/training",
    )
    inputs = TrainingInput(s3_data=input_path)

    instance_count = ParameterInteger(name="InstanceCount", default_value=1)
    instance_type = ParameterString(name="InstanceType",
                                    default_value="ml.m5.xlarge")

    pytorch_estimator = PyTorch(
        entry_point=entry_point,
        role=role,
        framework_version="1.5.0",
        py_version="py3",
        instance_count=instance_count,
        instance_type=instance_type,
        sagemaker_session=sagemaker_session,
        enable_sagemaker_metrics=True,
        max_retry_attempts=3,
    )

    min_batch_size = ParameterInteger(name="MinBatchSize", default_value=64)
    max_batch_size = ParameterInteger(name="MaxBatchSize", default_value=128)
    hyperparameter_ranges = {
        "batch-size": IntegerParameter(min_batch_size, max_batch_size),
    }

    tuner = HyperparameterTuner(
        estimator=pytorch_estimator,
        objective_metric_name="test:acc",
        objective_type="Maximize",
        hyperparameter_ranges=hyperparameter_ranges,
        metric_definitions=[{
            "Name": "test:acc",
            "Regex": "Overall test accuracy: (.*?);"
        }],
        max_jobs=2,
        max_parallel_jobs=2,
    )

    step_tune = TuningStep(
        name="my-tuning-step",
        tuner=tuner,
        inputs=inputs,
    )

    best_model = Model(
        image_uri=pytorch_estimator.training_image_uri(),
        model_data=step_tune.get_top_model_s3_uri(
            top_k=0,
            s3_bucket=sagemaker_session.default_bucket(),
        ),
        sagemaker_session=sagemaker_session,
        role=role,
    )
    model_inputs = CreateModelInput(
        instance_type="ml.m5.large",
        accelerator_type="ml.eia1.medium",
    )
    step_best_model = CreateModelStep(
        name="1st-model",
        model=best_model,
        inputs=model_inputs,
    )

    second_best_model = Model(
        image_uri=pytorch_estimator.training_image_uri(),
        model_data=step_tune.get_top_model_s3_uri(
            top_k=1,
            s3_bucket=sagemaker_session.default_bucket(),
        ),
        sagemaker_session=sagemaker_session,
        role=role,
        entry_point=entry_point,
        source_dir=base_dir,
    )

    step_second_best_model = CreateModelStep(
        name="2nd-best-model",
        model=second_best_model,
        inputs=model_inputs,
    )

    pipeline = Pipeline(
        name=pipeline_name,
        parameters=[
            instance_count, instance_type, min_batch_size, max_batch_size
        ],
        steps=[step_tune, step_best_model, step_second_best_model],
        sagemaker_session=sagemaker_session,
    )

    try:
        response = pipeline.create(role)
        create_arn = response["PipelineArn"]
        assert re.match(
            rf"arn:aws:sagemaker:{region_name}:\d{{12}}:pipeline/{pipeline_name}",
            create_arn,
        )

        for _ in retries(
                max_retry_count=5,
                exception_message_prefix=
                "Waiting for a successful execution of pipeline",
                seconds_to_sleep=10,
        ):
            execution = pipeline.start(parameters={})
            assert re.match(
                rf"arn:aws:sagemaker:{region_name}:\d{{12}}:pipeline/{pipeline_name}/execution/",
                execution.arn,
            )
            try:
                execution.wait(delay=30, max_attempts=60)
            except WaiterError:
                pass
            execution_steps = execution.list_steps()

            assert len(execution_steps) == 3
            for step in execution_steps:
                assert step["StepStatus"] == "Succeeded"
            break
    finally:
        try:
            pipeline.delete()
        except Exception:
            pass