예제 #1
0
def test_tuning_mxnet(sagemaker_session):
    with timeout(minutes=TUNING_DEFAULT_TIMEOUT_MINUTES):
        script_path = os.path.join(DATA_DIR, 'mxnet_mnist', 'tuning.py')
        data_path = os.path.join(DATA_DIR, 'mxnet_mnist')

        estimator = MXNet(entry_point=script_path,
                          role='SageMakerRole',
                          train_instance_count=1,
                          train_instance_type='ml.m4.xlarge',
                          sagemaker_session=sagemaker_session,
                          base_job_name='tune-mxnet')

        hyperparameter_ranges = {'learning_rate': ContinuousParameter(0.01, 0.2)}
        objective_metric_name = 'Validation-accuracy'
        metric_definitions = [{'Name': 'Validation-accuracy', 'Regex': 'Validation-accuracy=([0-9\\.]+)'}]
        tuner = HyperparameterTuner(estimator, objective_metric_name, hyperparameter_ranges, metric_definitions,
                                    max_jobs=4, max_parallel_jobs=2)

        train_input = estimator.sagemaker_session.upload_data(path=os.path.join(data_path, 'train'),
                                                              key_prefix='integ-test-data/mxnet_mnist/train')
        test_input = estimator.sagemaker_session.upload_data(path=os.path.join(data_path, 'test'),
                                                             key_prefix='integ-test-data/mxnet_mnist/test')
        tuner.fit({'train': train_input, 'test': test_input})

        print('Started hyperparameter tuning job with name:' + tuner.latest_tuning_job.name)

        time.sleep(15)
        tuner.wait()

    best_training_job = tuner.best_training_job()
    with timeout_and_delete_endpoint_by_name(best_training_job, sagemaker_session):
        predictor = tuner.deploy(1, 'ml.c4.xlarge')
        data = np.zeros(shape=(1, 1, 28, 28))
        predictor.predict(data)
예제 #2
0
def test_tuning_kmeans(sagemaker_session):
    with timeout(minutes=TUNING_DEFAULT_TIMEOUT_MINUTES):
        data_path = os.path.join(DATA_DIR, 'one_p_mnist', 'mnist.pkl.gz')
        pickle_args = {} if sys.version_info.major == 2 else {'encoding': 'latin1'}

        # Load the data into memory as numpy arrays
        with gzip.open(data_path, 'rb') as f:
            train_set, _, _ = pickle.load(f, **pickle_args)

        kmeans = KMeans(role='SageMakerRole', train_instance_count=1,
                        train_instance_type='ml.c4.xlarge',
                        k=10, sagemaker_session=sagemaker_session, base_job_name='tk',
                        output_path='s3://{}/'.format(sagemaker_session.default_bucket()))

        # set kmeans specific hp
        kmeans.init_method = 'random'
        kmeans.max_iterators = 1
        kmeans.tol = 1
        kmeans.num_trials = 1
        kmeans.local_init_method = 'kmeans++'
        kmeans.half_life_time_size = 1
        kmeans.epochs = 1

        records = kmeans.record_set(train_set[0][:100])
        test_records = kmeans.record_set(train_set[0][:100], channel='test')

        # specify which hp you want to optimize over
        hyperparameter_ranges = {'extra_center_factor': IntegerParameter(1, 10),
                                 'mini_batch_size': IntegerParameter(10, 100),
                                 'epochs': IntegerParameter(1, 2),
                                 'init_method': CategoricalParameter(['kmeans++', 'random'])}
        objective_metric_name = 'test:msd'

        tuner = HyperparameterTuner(estimator=kmeans, objective_metric_name=objective_metric_name,
                                    hyperparameter_ranges=hyperparameter_ranges, objective_type='Minimize', max_jobs=2,
                                    max_parallel_jobs=2)

        tuner.fit([records, test_records])

        print('Started hyperparameter tuning job with name:' + tuner.latest_tuning_job.name)

        time.sleep(15)
        tuner.wait()

    best_training_job = tuner.best_training_job()
    with timeout_and_delete_endpoint_by_name(best_training_job, sagemaker_session):
        predictor = tuner.deploy(1, 'ml.c4.xlarge')
        result = predictor.predict(train_set[0][:10])

        assert len(result) == 10
        for record in result:
            assert record.label['closest_cluster'] is not None
            assert record.label['distance_to_cluster'] is not None
예제 #3
0
def test_tuning_chainer(sagemaker_session):
    with timeout(minutes=TUNING_DEFAULT_TIMEOUT_MINUTES):
        script_path = os.path.join(DATA_DIR, 'chainer_mnist', 'mnist.py')
        data_path = os.path.join(DATA_DIR, 'chainer_mnist')

        estimator = Chainer(entry_point=script_path,
                            role='SageMakerRole',
                            train_instance_count=1,
                            train_instance_type='ml.c4.xlarge',
                            sagemaker_session=sagemaker_session,
                            hyperparameters={'epochs': 1})

        train_input = estimator.sagemaker_session.upload_data(path=os.path.join(data_path, 'train'),
                                                              key_prefix='integ-test-data/chainer_mnist/train')
        test_input = estimator.sagemaker_session.upload_data(path=os.path.join(data_path, 'test'),
                                                             key_prefix='integ-test-data/chainer_mnist/test')

        hyperparameter_ranges = {'alpha': ContinuousParameter(0.001, 0.005)}

        objective_metric_name = 'Validation-accuracy'
        metric_definitions = [
            {'Name': 'Validation-accuracy', 'Regex': '\[J1\s+\d\.\d+\s+\d\.\d+\s+\d\.\d+\s+(\d\.\d+)'}]

        tuner = HyperparameterTuner(estimator, objective_metric_name, hyperparameter_ranges, metric_definitions,
                                    max_jobs=2, max_parallel_jobs=2)

        tuner.fit({'train': train_input, 'test': test_input})

        print('Started hyperparameter tuning job with name:' + tuner.latest_tuning_job.name)

        time.sleep(15)
        tuner.wait()

    best_training_job = tuner.best_training_job()
    with timeout_and_delete_endpoint_by_name(best_training_job, sagemaker_session):
        predictor = tuner.deploy(1, 'ml.c4.xlarge')

        batch_size = 100
        data = np.zeros((batch_size, 784), dtype='float32')
        output = predictor.predict(data)
        assert len(output) == batch_size

        data = np.zeros((batch_size, 1, 28, 28), dtype='float32')
        output = predictor.predict(data)
        assert len(output) == batch_size

        data = np.zeros((batch_size, 28, 28), dtype='float32')
        output = predictor.predict(data)
        assert len(output) == batch_size
예제 #4
0
def test_tuning_lda(sagemaker_session):
    with timeout(minutes=TUNING_DEFAULT_TIMEOUT_MINUTES):
        data_path = os.path.join(DATA_DIR, 'lda')
        data_filename = 'nips-train_1.pbr'

        with open(os.path.join(data_path, data_filename), 'rb') as f:
            all_records = read_records(f)

        # all records must be same
        feature_num = int(all_records[0].features['values'].float32_tensor.shape[0])

        lda = LDA(role='SageMakerRole', train_instance_type='ml.c4.xlarge', num_topics=10,
                  sagemaker_session=sagemaker_session, base_job_name='test-lda')

        record_set = prepare_record_set_from_local_files(data_path, lda.data_location,
                                                         len(all_records), feature_num, sagemaker_session)
        test_record_set = prepare_record_set_from_local_files(data_path, lda.data_location,
                                                              len(all_records), feature_num, sagemaker_session)
        test_record_set.channel = 'test'

        # specify which hp you want to optimize over
        hyperparameter_ranges = {'alpha0': ContinuousParameter(1, 10),
                                 'num_topics': IntegerParameter(1, 2)}
        objective_metric_name = 'test:pwll'

        tuner = HyperparameterTuner(estimator=lda, objective_metric_name=objective_metric_name,
                                    hyperparameter_ranges=hyperparameter_ranges, objective_type='Maximize', max_jobs=2,
                                    max_parallel_jobs=2)

        tuner.fit([record_set, test_record_set], mini_batch_size=1)

        print('Started hyperparameter tuning job with name:' + tuner.latest_tuning_job.name)

        time.sleep(15)
        tuner.wait()

    best_training_job = tuner.best_training_job()
    with timeout_and_delete_endpoint_by_name(best_training_job, sagemaker_session):
        predictor = tuner.deploy(1, 'ml.c4.xlarge')
        predict_input = np.random.rand(1, feature_num)
        result = predictor.predict(predict_input)

        assert len(result) == 1
        for record in result:
            assert record.label['topic_mixture'] is not None
예제 #5
0
def test_tuning_tf(sagemaker_session):
    with timeout(minutes=TUNING_DEFAULT_TIMEOUT_MINUTES):
        script_path = os.path.join(DATA_DIR, 'iris', 'iris-dnn-classifier.py')

        estimator = TensorFlow(entry_point=script_path,
                               role='SageMakerRole',
                               training_steps=1,
                               evaluation_steps=1,
                               hyperparameters={'input_tensor_name': 'inputs'},
                               train_instance_count=1,
                               train_instance_type='ml.c4.xlarge',
                               sagemaker_session=sagemaker_session,
                               base_job_name='tune-tf')

        inputs = sagemaker_session.upload_data(path=DATA_PATH, key_prefix='integ-test-data/tf_iris')
        hyperparameter_ranges = {'learning_rate': ContinuousParameter(0.05, 0.2)}

        objective_metric_name = 'loss'
        metric_definitions = [{'Name': 'loss', 'Regex': 'loss = ([0-9\\.]+)'}]

        tuner = HyperparameterTuner(estimator, objective_metric_name, hyperparameter_ranges, metric_definitions,
                                    objective_type='Minimize', max_jobs=2, max_parallel_jobs=2)

        tuner.fit(inputs)

        print('Started hyperparameter tuning job with name:' + tuner.latest_tuning_job.name)

        time.sleep(15)
        tuner.wait()

    best_training_job = tuner.best_training_job()
    with timeout_and_delete_endpoint_by_name(best_training_job, sagemaker_session):
        predictor = tuner.deploy(1, 'ml.c4.xlarge')

        features = [6.4, 3.2, 4.5, 1.5]
        dict_result = predictor.predict({'inputs': features})
        print('predict result: {}'.format(dict_result))
        list_result = predictor.predict(features)
        print('predict result: {}'.format(list_result))

        assert dict_result == list_result
예제 #6
0
def test_attach_tuning_pytorch(sagemaker_session):
    mnist_dir = os.path.join(DATA_DIR, 'pytorch_mnist')
    mnist_script = os.path.join(mnist_dir, 'mnist.py')

    estimator = PyTorch(entry_point=mnist_script, role='SageMakerRole', train_instance_count=1,
                        train_instance_type='ml.c4.xlarge', sagemaker_session=sagemaker_session)

    with timeout(minutes=TUNING_DEFAULT_TIMEOUT_MINUTES):
        objective_metric_name = 'evaluation-accuracy'
        metric_definitions = [{'Name': 'evaluation-accuracy', 'Regex': 'Overall test accuracy: (\d+)'}]
        hyperparameter_ranges = {'batch-size': IntegerParameter(50, 100)}

        tuner = HyperparameterTuner(estimator, objective_metric_name, hyperparameter_ranges, metric_definitions,
                                    max_jobs=2, max_parallel_jobs=2)

        training_data = estimator.sagemaker_session.upload_data(path=os.path.join(mnist_dir, 'training'),
                                                                key_prefix='integ-test-data/pytorch_mnist/training')
        tuner.fit({'training': training_data})

        tuning_job_name = tuner.latest_tuning_job.name

        print('Started hyperparameter tuning job with name:' + tuning_job_name)

        time.sleep(15)
        tuner.wait()

    attached_tuner = HyperparameterTuner.attach(tuning_job_name, sagemaker_session=sagemaker_session)
    best_training_job = tuner.best_training_job()
    with timeout_and_delete_endpoint_by_name(best_training_job, sagemaker_session):
        predictor = attached_tuner.deploy(1, 'ml.c4.xlarge')
        data = np.zeros(shape=(1, 1, 28, 28), dtype=np.float32)
        predictor.predict(data)

        batch_size = 100
        data = np.random.rand(batch_size, 1, 28, 28).astype(np.float32)
        output = predictor.predict(data)

        assert output.shape == (batch_size, 10)
def test_attach_tuning_pytorch(sagemaker_session, cpu_instance_type):
    mnist_dir = os.path.join(DATA_DIR, "pytorch_mnist")
    mnist_script = os.path.join(mnist_dir, "mnist.py")

    estimator = PyTorch(
        entry_point=mnist_script,
        role="SageMakerRole",
        train_instance_count=1,
        py_version=PYTHON_VERSION,
        train_instance_type=cpu_instance_type,
        sagemaker_session=sagemaker_session,
    )

    with timeout(minutes=TUNING_DEFAULT_TIMEOUT_MINUTES):
        objective_metric_name = "evaluation-accuracy"
        metric_definitions = [{
            "Name": "evaluation-accuracy",
            "Regex": r"Overall test accuracy: (\d+)"
        }]
        hyperparameter_ranges = {"batch-size": IntegerParameter(50, 100)}

        tuner = HyperparameterTuner(
            estimator,
            objective_metric_name,
            hyperparameter_ranges,
            metric_definitions,
            max_jobs=2,
            max_parallel_jobs=2,
            early_stopping_type="Auto",
        )

        training_data = estimator.sagemaker_session.upload_data(
            path=os.path.join(mnist_dir, "training"),
            key_prefix="integ-test-data/pytorch_mnist/training",
        )

        tuning_job_name = unique_name_from_base("pytorch", max_length=32)
        tuner.fit({"training": training_data}, job_name=tuning_job_name)

        print("Started hyperparameter tuning job with name:" + tuning_job_name)

        time.sleep(15)
        tuner.wait()

    endpoint_name = tuning_job_name
    model_name = "model-name-1"
    attached_tuner = HyperparameterTuner.attach(
        tuning_job_name, sagemaker_session=sagemaker_session)
    assert attached_tuner.early_stopping_type == "Auto"

    with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session):
        predictor = attached_tuner.deploy(1,
                                          cpu_instance_type,
                                          endpoint_name=endpoint_name,
                                          model_name=model_name)
    best_training_job = tuner.best_training_job()
    with timeout_and_delete_endpoint_by_name(best_training_job,
                                             sagemaker_session):
        predictor = attached_tuner.deploy(1, cpu_instance_type)
        data = np.zeros(shape=(1, 1, 28, 28), dtype=np.float32)
        predictor.predict(data)

        batch_size = 100
        data = np.random.rand(batch_size, 1, 28, 28).astype(np.float32)
        output = predictor.predict(data)

        assert output.shape == (batch_size, 10)
        _assert_model_name_match(sagemaker_session.sagemaker_client,
                                 endpoint_name, model_name)
def test_tuning_chainer(sagemaker_session, cpu_instance_type):
    with timeout(minutes=TUNING_DEFAULT_TIMEOUT_MINUTES):
        script_path = os.path.join(DATA_DIR, "chainer_mnist", "mnist.py")
        data_path = os.path.join(DATA_DIR, "chainer_mnist")

        estimator = Chainer(
            entry_point=script_path,
            role="SageMakerRole",
            py_version=PYTHON_VERSION,
            train_instance_count=1,
            train_instance_type=cpu_instance_type,
            sagemaker_session=sagemaker_session,
            hyperparameters={"epochs": 1},
        )

        train_input = estimator.sagemaker_session.upload_data(
            path=os.path.join(data_path, "train"),
            key_prefix="integ-test-data/chainer_mnist/train")
        test_input = estimator.sagemaker_session.upload_data(
            path=os.path.join(data_path, "test"),
            key_prefix="integ-test-data/chainer_mnist/test")

        hyperparameter_ranges = {"alpha": ContinuousParameter(0.001, 0.005)}

        objective_metric_name = "Validation-accuracy"
        metric_definitions = [{
            "Name":
            "Validation-accuracy",
            "Regex":
            r"\[J1\s+\d\.\d+\s+\d\.\d+\s+\d\.\d+\s+(\d\.\d+)",
        }]

        tuner = HyperparameterTuner(
            estimator,
            objective_metric_name,
            hyperparameter_ranges,
            metric_definitions,
            max_jobs=2,
            max_parallel_jobs=2,
        )

        tuning_job_name = unique_name_from_base("chainer", max_length=32)
        tuner.fit({
            "train": train_input,
            "test": test_input
        },
                  job_name=tuning_job_name)

        print("Started hyperparameter tuning job with name:" + tuning_job_name)

        time.sleep(15)
        tuner.wait()

    best_training_job = tuner.best_training_job()
    with timeout_and_delete_endpoint_by_name(best_training_job,
                                             sagemaker_session):
        predictor = tuner.deploy(1, cpu_instance_type)

        batch_size = 100
        data = np.zeros((batch_size, 784), dtype="float32")
        output = predictor.predict(data)
        assert len(output) == batch_size

        data = np.zeros((batch_size, 1, 28, 28), dtype="float32")
        output = predictor.predict(data)
        assert len(output) == batch_size

        data = np.zeros((batch_size, 28, 28), dtype="float32")
        output = predictor.predict(data)
        assert len(output) == batch_size
예제 #9
0
파일: train.py 프로젝트: sbuttler/examples
                    })

hyperparameter_ranges = {
    'lr': ContinuousParameter(0.0001, 0.001),
    'hidden_nodes': IntegerParameter(20, 100),
    'batch_size': CategoricalParameter([128, 256, 512]),
    'conv1_channels': CategoricalParameter([32, 64, 128]),
    'conv2_channels': CategoricalParameter([64, 128, 256, 512]),
}

objective_metric_name = 'average test accuracy'
objective_type = 'Maximize'
metric_definitions = [{
    'Name': 'average test accuracy',
    'Regex': 'Test Accuracy: ([0-9\\.]+)'
}]

tuner = HyperparameterTuner(estimator,
                            objective_metric_name,
                            hyperparameter_ranges,
                            metric_definitions,
                            max_jobs=args.max_jobs,
                            max_parallel_jobs=args.max_parallel_jobs,
                            objective_type=objective_type)

tuner.fit({'training': inputs})

if args.wait:
    print("Waiting for sweep to finish")
    tuner.wait()
예제 #10
0
def test_tuning_byo_estimator(sagemaker_session):
    """Use Factorization Machines algorithm as an example here.

    First we need to prepare data for training. We take standard data set, convert it to the
    format that the algorithm can process and upload it to S3.
    Then we create the Estimator and set hyperparamets as required by the algorithm.
    Next, we can call fit() with path to the S3.
    Later the trained model is deployed and prediction is called against the endpoint.
    Default predictor is updated with json serializer and deserializer.
    """
    image_name = registry(sagemaker_session.boto_session.region_name) + '/factorization-machines:1'
    training_data_path = os.path.join(DATA_DIR, 'dummy_tensor')

    with timeout(minutes=TUNING_DEFAULT_TIMEOUT_MINUTES):
        data_path = os.path.join(DATA_DIR, 'one_p_mnist', 'mnist.pkl.gz')
        pickle_args = {} if sys.version_info.major == 2 else {'encoding': 'latin1'}

        with gzip.open(data_path, 'rb') as f:
            train_set, _, _ = pickle.load(f, **pickle_args)

        prefix = 'test_byo_estimator'
        key = 'recordio-pb-data'
        s3_train_data = sagemaker_session.upload_data(path=training_data_path,
                                                      key_prefix=os.path.join(prefix, 'train', key))

        estimator = Estimator(image_name=image_name,
                              role='SageMakerRole', train_instance_count=1,
                              train_instance_type='ml.c4.xlarge',
                              sagemaker_session=sagemaker_session, base_job_name='test-byo')

        estimator.set_hyperparameters(num_factors=10,
                                      feature_dim=784,
                                      mini_batch_size=100,
                                      predictor_type='binary_classifier')

        hyperparameter_ranges = {'mini_batch_size': IntegerParameter(100, 200)}

        tuner = HyperparameterTuner(estimator=estimator, base_tuning_job_name='byo',
                                    objective_metric_name='test:binary_classification_accuracy',
                                    hyperparameter_ranges=hyperparameter_ranges,
                                    max_jobs=2, max_parallel_jobs=2)

        tuner.fit({'train': s3_train_data, 'test': s3_train_data}, include_cls_metadata=False)

        print('Started hyperparameter tuning job with name:' + tuner.latest_tuning_job.name)

        time.sleep(15)
        tuner.wait()

    best_training_job = tuner.best_training_job()
    with timeout_and_delete_endpoint_by_name(best_training_job, sagemaker_session):
        predictor = tuner.deploy(1, 'ml.m4.xlarge', endpoint_name=best_training_job)
        predictor.serializer = _fm_serializer
        predictor.content_type = 'application/json'
        predictor.deserializer = json_deserializer

        result = predictor.predict(train_set[0][:10])

        assert len(result['predictions']) == 10
        for prediction in result['predictions']:
            assert prediction['score'] is not None
예제 #11
0
def test_tuning_byo_estimator(sagemaker_session, cpu_instance_type):
    """Use Factorization Machines algorithm as an example here.

    First we need to prepare data for training. We take standard data set, convert it to the
    format that the algorithm can process and upload it to S3.
    Then we create the Estimator and set hyperparamets as required by the algorithm.
    Next, we can call fit() with path to the S3.
    Later the trained model is deployed and prediction is called against the endpoint.
    Default predictor is updated with json serializer and deserializer.
    """
    image_name = get_image_uri(sagemaker_session.boto_session.region_name, "factorization-machines")
    training_data_path = os.path.join(DATA_DIR, "dummy_tensor")

    with timeout(minutes=TUNING_DEFAULT_TIMEOUT_MINUTES):
        data_path = os.path.join(DATA_DIR, "one_p_mnist", "mnist.pkl.gz")
        pickle_args = {} if sys.version_info.major == 2 else {"encoding": "latin1"}

        with gzip.open(data_path, "rb") as f:
            train_set, _, _ = pickle.load(f, **pickle_args)

        prefix = "test_byo_estimator"
        key = "recordio-pb-data"
        s3_train_data = sagemaker_session.upload_data(
            path=training_data_path, key_prefix=os.path.join(prefix, "train", key)
        )

        estimator = Estimator(
            image_name=image_name,
            role="SageMakerRole",
            train_instance_count=1,
            train_instance_type=cpu_instance_type,
            sagemaker_session=sagemaker_session,
        )

        estimator.set_hyperparameters(
            num_factors=10, feature_dim=784, mini_batch_size=100, predictor_type="binary_classifier"
        )

        hyperparameter_ranges = {"mini_batch_size": IntegerParameter(100, 200)}

        tuner = HyperparameterTuner(
            estimator=estimator,
            objective_metric_name="test:binary_classification_accuracy",
            hyperparameter_ranges=hyperparameter_ranges,
            max_jobs=2,
            max_parallel_jobs=2,
        )

        tuner.fit(
            {"train": s3_train_data, "test": s3_train_data},
            include_cls_metadata=False,
            job_name=unique_name_from_base("byo", 32),
        )

        print("Started hyperparameter tuning job with name:" + tuner.latest_tuning_job.name)

        time.sleep(15)
        tuner.wait()

    best_training_job = tuner.best_training_job()
    with timeout_and_delete_endpoint_by_name(best_training_job, sagemaker_session):
        predictor = tuner.deploy(1, cpu_instance_type, endpoint_name=best_training_job)
        predictor.serializer = _fm_serializer
        predictor.content_type = "application/json"
        predictor.deserializer = json_deserializer

        result = predictor.predict(train_set[0][:10])

        assert len(result["predictions"]) == 10
        for prediction in result["predictions"]:
            assert prediction["score"] is not None
예제 #12
0
        'min_child_weight': IntegerParameter(2, 8),
        'subsample': ContinuousParameter(0.5, 0.9),
        'gamma': ContinuousParameter(0, 10),
    })

# This is a wrapper around the location of our train and validation data, to make sure that SageMaker
# knows our data is in csv format.
s3_input_train = sagemaker.s3_input(s3_data=train_location, content_type='csv')
s3_input_validation = sagemaker.s3_input(s3_data=val_location,
                                         content_type='csv')

xgb_hyperparameter_tuner.fit({
    'train': s3_input_train,
    'validation': s3_input_validation
})
xgb_hyperparameter_tuner.wait()

# Once the hyperamater tuner has finished, we can retrieve information about the best performing model.
xgb_hyperparameter_tuner.best_training_job()

# In addition, since we'd like to set up a batch transform job to test the best model, we can construct a new estimator object from
# the results of the best training job. The `xgb_attached` object below can now be used as though we constructed an estimator with the best performing
# hyperparameters and then fit it to our training data.
xgb_attached = sagemaker.estimator.Estimator.attach(
    xgb_hyperparameter_tuner.best_training_job())

## Step 5: Test the model
xgb_transformer = xgb_attached.transformer(instance_count=1,
                                           instance_type='ml.m4.xlarge')

xgb_transformer.transform(test_location,
def test_tuning_kmeans(sagemaker_session):
    with timeout(minutes=20):
        data_path = os.path.join(DATA_DIR, 'one_p_mnist', 'mnist.pkl.gz')
        pickle_args = {} if sys.version_info.major == 2 else {
            'encoding': 'latin1'
        }

        # Load the data into memory as numpy arrays
        with gzip.open(data_path, 'rb') as f:
            train_set, _, _ = pickle.load(f, **pickle_args)

        kmeans = KMeans(role='SageMakerRole',
                        train_instance_count=1,
                        train_instance_type='ml.c4.xlarge',
                        k=10,
                        sagemaker_session=sagemaker_session,
                        base_job_name='tk',
                        output_path='s3://{}/'.format(
                            sagemaker_session.default_bucket()))

        # set kmeans specific hp
        kmeans.init_method = 'random'
        kmeans.max_iterators = 1
        kmeans.tol = 1
        kmeans.num_trials = 1
        kmeans.local_init_method = 'kmeans++'
        kmeans.half_life_time_size = 1
        kmeans.epochs = 1

        records = kmeans.record_set(train_set[0][:100])
        test_records = kmeans.record_set(train_set[0][:100], channel='test')

        # specify which hp you want to optimize over
        hyperparameter_ranges = {
            'extra_center_factor': IntegerParameter(1, 10),
            'mini_batch_size': IntegerParameter(10, 100),
            'epochs': IntegerParameter(1, 2),
            'init_method': CategoricalParameter(['kmeans++', 'random'])
        }
        objective_metric_name = 'test:msd'

        tuner = HyperparameterTuner(
            estimator=kmeans,
            objective_metric_name=objective_metric_name,
            hyperparameter_ranges=hyperparameter_ranges,
            objective_type='Minimize',
            max_jobs=2,
            max_parallel_jobs=2)

        tuner.fit([records, test_records])

        print('Started hyperparameter tuning job with name:' +
              tuner.latest_tuning_job.name)

        time.sleep(15)
        tuner.wait()

    best_training_job = tuner.best_training_job()
    with timeout_and_delete_endpoint_by_name(best_training_job,
                                             sagemaker_session):
        predictor = tuner.deploy(1, 'ml.c4.xlarge')
        result = predictor.predict(train_set[0][:10])

        assert len(result) == 10
        for record in result:
            assert record.label['closest_cluster'] is not None
            assert record.label['distance_to_cluster'] is not None
def test_tuning_chainer(sagemaker_session):
    with timeout(minutes=15):
        script_path = os.path.join(DATA_DIR, 'chainer_mnist', 'mnist.py')
        data_path = os.path.join(DATA_DIR, 'chainer_mnist')

        estimator = Chainer(entry_point=script_path,
                            role='SageMakerRole',
                            train_instance_count=1,
                            train_instance_type='ml.c4.xlarge',
                            sagemaker_session=sagemaker_session,
                            hyperparameters={'epochs': 1})

        train_input = estimator.sagemaker_session.upload_data(
            path=os.path.join(data_path, 'train'),
            key_prefix='integ-test-data/chainer_mnist/train')
        test_input = estimator.sagemaker_session.upload_data(
            path=os.path.join(data_path, 'test'),
            key_prefix='integ-test-data/chainer_mnist/test')

        hyperparameter_ranges = {'alpha': ContinuousParameter(0.001, 0.005)}

        objective_metric_name = 'Validation-accuracy'
        metric_definitions = [{
            'Name':
            'Validation-accuracy',
            'Regex':
            '\[J1\s+\d\.\d+\s+\d\.\d+\s+\d\.\d+\s+(\d\.\d+)'
        }]

        tuner = HyperparameterTuner(estimator,
                                    objective_metric_name,
                                    hyperparameter_ranges,
                                    metric_definitions,
                                    max_jobs=2,
                                    max_parallel_jobs=2)

        tuner.fit({'train': train_input, 'test': test_input})

        print('Started hyperparameter tuning job with name:' +
              tuner.latest_tuning_job.name)

        time.sleep(15)
        tuner.wait()

    best_training_job = tuner.best_training_job()
    with timeout_and_delete_endpoint_by_name(best_training_job,
                                             sagemaker_session):
        predictor = tuner.deploy(1, 'ml.c4.xlarge')

        batch_size = 100
        data = np.zeros((batch_size, 784), dtype='float32')
        output = predictor.predict(data)
        assert len(output) == batch_size

        data = np.zeros((batch_size, 1, 28, 28), dtype='float32')
        output = predictor.predict(data)
        assert len(output) == batch_size

        data = np.zeros((batch_size, 28, 28), dtype='float32')
        output = predictor.predict(data)
        assert len(output) == batch_size
예제 #15
0
def test_tuning_lda(sagemaker_session):
    with timeout(minutes=TUNING_DEFAULT_TIMEOUT_MINUTES):
        data_path = os.path.join(DATA_DIR, 'lda')
        data_filename = 'nips-train_1.pbr'

        with open(os.path.join(data_path, data_filename), 'rb') as f:
            all_records = read_records(f)

        # all records must be same
        feature_num = int(
            all_records[0].features['values'].float32_tensor.shape[0])

        lda = LDA(role='SageMakerRole',
                  train_instance_type='ml.c4.xlarge',
                  num_topics=10,
                  sagemaker_session=sagemaker_session)

        record_set = prepare_record_set_from_local_files(
            data_path, lda.data_location, len(all_records), feature_num,
            sagemaker_session)
        test_record_set = prepare_record_set_from_local_files(
            data_path, lda.data_location, len(all_records), feature_num,
            sagemaker_session)
        test_record_set.channel = 'test'

        # specify which hp you want to optimize over
        hyperparameter_ranges = {
            'alpha0': ContinuousParameter(1, 10),
            'num_topics': IntegerParameter(1, 2)
        }
        objective_metric_name = 'test:pwll'

        tuner = HyperparameterTuner(
            estimator=lda,
            objective_metric_name=objective_metric_name,
            hyperparameter_ranges=hyperparameter_ranges,
            objective_type='Maximize',
            max_jobs=2,
            max_parallel_jobs=2,
            early_stopping_type='Auto')

        tuning_job_name = unique_name_from_base('test-lda', max_length=32)
        tuner.fit([record_set, test_record_set],
                  mini_batch_size=1,
                  job_name=tuning_job_name)

        latest_tuning_job_name = tuner.latest_tuning_job.name

        print('Started hyperparameter tuning job with name:' +
              latest_tuning_job_name)

        time.sleep(15)
        tuner.wait()

    desc = tuner.latest_tuning_job.sagemaker_session.sagemaker_client \
        .describe_hyper_parameter_tuning_job(HyperParameterTuningJobName=latest_tuning_job_name)
    assert desc['HyperParameterTuningJobConfig'][
        'TrainingJobEarlyStoppingType'] == 'Auto'

    best_training_job = tuner.best_training_job()
    with timeout_and_delete_endpoint_by_name(best_training_job,
                                             sagemaker_session):
        predictor = tuner.deploy(1, 'ml.c4.xlarge')
        predict_input = np.random.rand(1, feature_num)
        result = predictor.predict(predict_input)

        assert len(result) == 1
        for record in result:
            assert record.label['topic_mixture'] is not None
예제 #16
0
def test_tuning_byo_estimator(sagemaker_session):
    """Use Factorization Machines algorithm as an example here.

    First we need to prepare data for training. We take standard data set, convert it to the
    format that the algorithm can process and upload it to S3.
    Then we create the Estimator and set hyperparamets as required by the algorithm.
    Next, we can call fit() with path to the S3.
    Later the trained model is deployed and prediction is called against the endpoint.
    Default predictor is updated with json serializer and deserializer.
    """
    image_name = registry(sagemaker_session.boto_session.region_name) + '/factorization-machines:1'
    training_data_path = os.path.join(DATA_DIR, 'dummy_tensor')

    with timeout(minutes=TUNING_DEFAULT_TIMEOUT_MINUTES):
        data_path = os.path.join(DATA_DIR, 'one_p_mnist', 'mnist.pkl.gz')
        pickle_args = {} if sys.version_info.major == 2 else {'encoding': 'latin1'}

        with gzip.open(data_path, 'rb') as f:
            train_set, _, _ = pickle.load(f, **pickle_args)

        prefix = 'test_byo_estimator'
        key = 'recordio-pb-data'
        s3_train_data = sagemaker_session.upload_data(path=training_data_path,
                                                      key_prefix=os.path.join(prefix, 'train', key))

        estimator = Estimator(image_name=image_name,
                              role='SageMakerRole', train_instance_count=1,
                              train_instance_type='ml.c4.xlarge',
                              sagemaker_session=sagemaker_session, base_job_name='test-byo')

        estimator.set_hyperparameters(num_factors=10,
                                      feature_dim=784,
                                      mini_batch_size=100,
                                      predictor_type='binary_classifier')

        hyperparameter_ranges = {'mini_batch_size': IntegerParameter(100, 200)}

        tuner = HyperparameterTuner(estimator=estimator, base_tuning_job_name='byo',
                                    objective_metric_name='test:binary_classification_accuracy',
                                    hyperparameter_ranges=hyperparameter_ranges,
                                    max_jobs=2, max_parallel_jobs=2)

        tuner.fit({'train': s3_train_data, 'test': s3_train_data}, include_cls_metadata=False)

        print('Started hyperparameter tuning job with name:' + tuner.latest_tuning_job.name)

        time.sleep(15)
        tuner.wait()

    best_training_job = tuner.best_training_job()
    with timeout_and_delete_endpoint_by_name(best_training_job, sagemaker_session):
        predictor = tuner.deploy(1, 'ml.m4.xlarge', endpoint_name=best_training_job)
        predictor.serializer = _fm_serializer
        predictor.content_type = 'application/json'
        predictor.deserializer = json_deserializer

        result = predictor.predict(train_set[0][:10])

        assert len(result['predictions']) == 10
        for prediction in result['predictions']:
            assert prediction['score'] is not None
예제 #17
0
        objective_metric_name='test:binary_classification_accuracy',
        hyperparameter_ranges={
            'epochs': IntegerParameter(1, 200),
            'mini_batch_size': IntegerParameter(10, 10000),
            'factors_wd': ContinuousParameter(1e-8, 512)
        },
        max_jobs=4,
        max_parallel_jobs=4)

    my_tuner.fit({
        'train': train_data,
        'test': test_data
    },
                 include_cls_metadata=False)

    my_tuner.wait()

    #sm_session = sagemaker.Session()
    #best_log = sm_session.logs_for_job(my_tuner.best_training_job())
    #print(best_log)
    best_model = '{}/{}/output/model.tar.gz'.format(
        output_prefix, my_tuner.best_training_job())

print('Best model: {}'.format(best_model))
#
# Save config files to be used later for qa and prod sagemaker endpoint configurations
# and for prediction tests
#
config_data_qa = {
    "Parameters": {
        "BucketName": bucket,
def test_tuning_lda(sagemaker_session, cpu_instance_type):
    with timeout(minutes=TUNING_DEFAULT_TIMEOUT_MINUTES):
        data_path = os.path.join(DATA_DIR, "lda")
        data_filename = "nips-train_1.pbr"

        with open(os.path.join(data_path, data_filename), "rb") as f:
            all_records = read_records(f)

        # all records must be same
        feature_num = int(
            all_records[0].features["values"].float32_tensor.shape[0])

        lda = LDA(
            role="SageMakerRole",
            train_instance_type=cpu_instance_type,
            num_topics=10,
            sagemaker_session=sagemaker_session,
        )

        record_set = prepare_record_set_from_local_files(
            data_path, lda.data_location, len(all_records), feature_num,
            sagemaker_session)
        test_record_set = prepare_record_set_from_local_files(
            data_path, lda.data_location, len(all_records), feature_num,
            sagemaker_session)
        test_record_set.channel = "test"

        # specify which hp you want to optimize over
        hyperparameter_ranges = {
            "alpha0": ContinuousParameter(1, 10),
            "num_topics": IntegerParameter(1, 2),
        }
        objective_metric_name = "test:pwll"

        tuner = HyperparameterTuner(
            estimator=lda,
            objective_metric_name=objective_metric_name,
            hyperparameter_ranges=hyperparameter_ranges,
            objective_type="Maximize",
            max_jobs=2,
            max_parallel_jobs=2,
            early_stopping_type="Auto",
        )

        tuning_job_name = unique_name_from_base("test-lda", max_length=32)
        tuner.fit([record_set, test_record_set],
                  mini_batch_size=1,
                  job_name=tuning_job_name)

        latest_tuning_job_name = tuner.latest_tuning_job.name

        print("Started hyperparameter tuning job with name:" +
              latest_tuning_job_name)

        time.sleep(15)
        tuner.wait()

    attached_tuner = HyperparameterTuner.attach(
        tuning_job_name, sagemaker_session=sagemaker_session)
    assert attached_tuner.early_stopping_type == "Auto"
    assert attached_tuner.estimator.alpha0 == 1.0
    assert attached_tuner.estimator.num_topics == 1

    best_training_job = attached_tuner.best_training_job()

    with timeout_and_delete_endpoint_by_name(best_training_job,
                                             sagemaker_session):
        predictor = tuner.deploy(1, cpu_instance_type)
        predict_input = np.random.rand(1, feature_num)
        result = predictor.predict(predict_input)

        assert len(result) == 1
        for record in result:
            assert record.label["topic_mixture"] is not None
예제 #19
0
class AwsEstimator(AwsBase, ABC):
    """
    The general implementation of a AWS Estimator model:
    https://sagemaker.readthedocs.io/en/stable/estimators.html
    https://docs.aws.amazon.com/sagemaker/latest/dg/algos.html

    This is an abstract class. Please subclass it and set the following class parameters:
        * container_name
        * default_hyperparameters
        * name
    """

    default_hyperparameters = NotImplemented
    default_hyperparameter_tuning = NotImplemented
    default_tuning_job_config = {
        "max_jobs": 3,
        "max_parallel_jobs": 3,
        "objective_metric_name": "validation:rmse",
        "objective_type": "Minimize",
    }
    container_name = NotImplemented
    name = "aws-estimator"

    def __init__(
        self,
        data: DataLoader,
        aws_executor: Sagemaker,
        output_path: Optional[str] = None,
        local_save_folder: Optional[str] = None,
    ) -> None:
        """
        Initializes a AWS Estimator with data and an executor.
        This will not yet do any training or data uploading.
        """
        LOGGER.info(f"Initializing AWS Estimator {self.name} model")

        super().__init__(data, aws_executor, output_path, local_save_folder)

        self._model: Estimator = None
        self._transformer: Transformer = None
        self._predictor: RealTimePredictor = None
        self._tuner: HyperparameterTuner = None

    def train(self, hyperparameters: Dict = {}) -> None:
        """
        Trains the model, with the data provided

        Arguments:
            hyperparameters: The hyperparameters to provide to the Estimator model.
                See https://docs.aws.amazon.com/sagemaker/latest/dg/algos.html
                And specific implementations
        """
        LOGGER.info("Starting to train model.")
        self._model = self._get_model(hyperparameters)

        # Get the data and upload to S3
        Y_train = self.data.train_data.loc
        X_train = self.data.train_data.loc[:, self.data.feature_columns]
        s3_input_train = self._prepare_data("train", X_train, Y_train)

        Y_validation = self.data.validation_data.loc[:,
                                                     self.data.output_column]
        X_validation = self.data.validation_data.loc[:,
                                                     self.data.feature_columns]
        s3_input_validation = self._prepare_data("validation", X_validation,
                                                 Y_validation)

        LOGGER.info("Starting to fit model")
        self._model.fit({
            "train": s3_input_train,
            "validation": s3_input_validation
        })
        LOGGER.info("Done with fitting model")

    def _get_model(self, hyperparameters: Dict = {}) -> Estimator:
        """
        Initializes the model. This can be used to train later or attach an existing model

        Arguments:
            hyperparameters: The hyperparameters for the Estimator model

        Returns:
            model: The initialized model
        """
        container = get_image_uri(self.executor.boto_session.region_name,
                                  self.container_name)
        model = Estimator(
            container,
            **self.executor.default_model_kwargs,
            output_path=self.output_path,
        )
        used_hyperparameters = self.default_hyperparameters
        used_hyperparameters["feature_dim"] = len(self.data.feature_columns)
        used_hyperparameters.update(hyperparameters)

        model.set_hyperparameters(**used_hyperparameters)
        return model

    def load_model(self, model_name: str = "") -> None:
        """
        Load the already trained model to not have to train again.

        Arguments:
            model_name: The name of the training job, as provided by AWS
        """
        LOGGER.info(f"Loading already trained model {model_name}")
        self._model = Estimator.attach(training_job_name=model_name,
                                       sagemaker_session=self.executor.session)

    def execute_prediction(self,
                           data: DataFrame,
                           name: str = "test") -> DataFrame:
        """
        Predict based on an already trained model.
        Loads the existing model if it exists.
        Also adds the output data to the cache.

        Arguments:
            data: the data to load. If not provided, is defaulted to the local data
            name: The same of the predictions to save on S3.

        Returns:
            The predicted dataframe
        """
        # Upload to S3
        s3_location_test = self._prepare_data(name, data, s3_input_type=False)

        # Start the job
        LOGGER.info("Creating the transformer job")
        transformer = self._get_transformer()
        transformer.transform(s3_location_test,
                              content_type="text/csv",
                              split_type="Line")
        LOGGER.info("Waiting for the transformer job")
        transformer.wait()

        # Download the data
        LOGGER.info("Loading the results of the transformer job")
        Y_test = self._load_results(name)

        return Y_test

    def _get_transformer(self) -> Transformer:
        """
        Returns a transformer based on the current model
        """
        assert (self._model is not None
                ), "Cannot create a transformer if the model is not yet set."
        if self._tuner is not None:
            LOGGER.info("Loading best model from tuning job")
            self.load_model(self._tuner.best_training_job())
        return self._model.transformer(
            **self.executor.default_transformer_kwargs,
            output_path=f"{self.output_path}",
        )

    def tune(
        self,
        tuning_job_parameters: Dict[str, Any] = {},
        hyperparameters: Dict[str, Any] = None,
        hyperparameter_tuning: Dict[str, Any] = None,
    ):
        """
        Tunes the current Estimator with the provided hyperparameters
        """
        LOGGER.info("Starting the hyperparameter tuning.")

        if hyperparameters is None:
            hyperparameters = self.default_hyperparameters
        hyperparameters["feature_dim"] = len(self.data.feature_columns)
        if hyperparameter_tuning is None:
            hyperparameter_tuning = self.default_hyperparameter_tuning
        used_tuning_job_parameters = {
            **self.default_tuning_job_config,
            **tuning_job_parameters,
        }

        if self._model is None:
            self._model = self._get_model(hyperparameters)

        self._tuner = HyperparameterTuner(
            estimator=self._model,
            **used_tuning_job_parameters,
            hyperparameter_ranges=hyperparameter_tuning,
        )

        # Get the data and upload to S3
        Y_train = self.data.train_data.loc[:, self.data.output_column]
        X_train = self.data.train_data.loc[:, self.data.feature_columns]
        s3_input_train = self._prepare_data("train", X_train, Y_train)

        Y_validation = self.data.validation_data.loc[:,
                                                     self.data.output_column]
        X_validation = self.data.validation_data.loc[:,
                                                     self.data.feature_columns]
        s3_input_validation = self._prepare_data("validation", X_validation,
                                                 Y_validation)

        LOGGER.info("Starting to tune hyperparameters")
        self._tuner.fit({
            "train": s3_input_train,
            "validation": s3_input_validation
        })
        self._tuner.wait()
        LOGGER.info("Done with the tuning job")