Ejemplo n.º 1
0
def test_tuning_mxnet(sagemaker_session):
    with timeout(minutes=TUNING_DEFAULT_TIMEOUT_MINUTES):
        script_path = os.path.join(DATA_DIR, 'mxnet_mnist', 'tuning.py')
        data_path = os.path.join(DATA_DIR, 'mxnet_mnist')

        estimator = MXNet(entry_point=script_path,
                          role='SageMakerRole',
                          train_instance_count=1,
                          train_instance_type='ml.m4.xlarge',
                          sagemaker_session=sagemaker_session,
                          base_job_name='tune-mxnet')

        hyperparameter_ranges = {'learning_rate': ContinuousParameter(0.01, 0.2)}
        objective_metric_name = 'Validation-accuracy'
        metric_definitions = [{'Name': 'Validation-accuracy', 'Regex': 'Validation-accuracy=([0-9\\.]+)'}]
        tuner = HyperparameterTuner(estimator, objective_metric_name, hyperparameter_ranges, metric_definitions,
                                    max_jobs=4, max_parallel_jobs=2)

        train_input = estimator.sagemaker_session.upload_data(path=os.path.join(data_path, 'train'),
                                                              key_prefix='integ-test-data/mxnet_mnist/train')
        test_input = estimator.sagemaker_session.upload_data(path=os.path.join(data_path, 'test'),
                                                             key_prefix='integ-test-data/mxnet_mnist/test')
        tuner.fit({'train': train_input, 'test': test_input})

        print('Started hyperparameter tuning job with name:' + tuner.latest_tuning_job.name)

        time.sleep(15)
        tuner.wait()

    best_training_job = tuner.best_training_job()
    with timeout_and_delete_endpoint_by_name(best_training_job, sagemaker_session):
        predictor = tuner.deploy(1, 'ml.c4.xlarge')
        data = np.zeros(shape=(1, 1, 28, 28))
        predictor.predict(data)
Ejemplo n.º 2
0
def test_attach_tuning_pytorch(sagemaker_session):
    mnist_dir = os.path.join(DATA_DIR, "pytorch_mnist")
    mnist_script = os.path.join(mnist_dir, "mnist.py")

    estimator = PyTorch(
        entry_point=mnist_script,
        role="SageMakerRole",
        train_instance_count=1,
        py_version=PYTHON_VERSION,
        train_instance_type="ml.c4.xlarge",
        sagemaker_session=sagemaker_session,
    )

    with timeout(minutes=TUNING_DEFAULT_TIMEOUT_MINUTES):
        objective_metric_name = "evaluation-accuracy"
        metric_definitions = [{
            "Name": "evaluation-accuracy",
            "Regex": r"Overall test accuracy: (\d+)"
        }]
        hyperparameter_ranges = {"batch-size": IntegerParameter(50, 100)}

        tuner = HyperparameterTuner(
            estimator,
            objective_metric_name,
            hyperparameter_ranges,
            metric_definitions,
            max_jobs=2,
            max_parallel_jobs=2,
            early_stopping_type="Auto",
        )

        training_data = estimator.sagemaker_session.upload_data(
            path=os.path.join(mnist_dir, "training"),
            key_prefix="integ-test-data/pytorch_mnist/training",
        )

        tuning_job_name = unique_name_from_base("pytorch", max_length=32)
        tuner.fit({"training": training_data}, job_name=tuning_job_name)

        print("Started hyperparameter tuning job with name:" + tuning_job_name)

        time.sleep(15)
        tuner.wait()

    attached_tuner = HyperparameterTuner.attach(
        tuning_job_name, sagemaker_session=sagemaker_session)
    assert attached_tuner.early_stopping_type == "Auto"

    best_training_job = tuner.best_training_job()
    with timeout_and_delete_endpoint_by_name(best_training_job,
                                             sagemaker_session):
        predictor = attached_tuner.deploy(1, "ml.c4.xlarge")
        data = np.zeros(shape=(1, 1, 28, 28), dtype=np.float32)
        predictor.predict(data)

        batch_size = 100
        data = np.random.rand(batch_size, 1, 28, 28).astype(np.float32)
        output = predictor.predict(data)

        assert output.shape == (batch_size, 10)
Ejemplo n.º 3
0
def test_tuning_mxnet(sagemaker_session):
    with timeout(minutes=15):
        script_path = os.path.join(DATA_DIR, 'mxnet_mnist', 'tuning.py')
        data_path = os.path.join(DATA_DIR, 'mxnet_mnist')

        estimator = MXNet(entry_point=script_path,
                          role='SageMakerRole',
                          train_instance_count=1,
                          train_instance_type='ml.m4.xlarge',
                          sagemaker_session=sagemaker_session,
                          base_job_name='tune-mxnet')

        hyperparameter_ranges = {'learning_rate': ContinuousParameter(0.01, 0.2)}
        objective_metric_name = 'Validation-accuracy'
        metric_definitions = [{'Name': 'Validation-accuracy', 'Regex': 'Validation-accuracy=([0-9\\.]+)'}]
        tuner = HyperparameterTuner(estimator, objective_metric_name, hyperparameter_ranges, metric_definitions,
                                    max_jobs=4, max_parallel_jobs=2)

        train_input = estimator.sagemaker_session.upload_data(path=os.path.join(data_path, 'train'),
                                                              key_prefix='integ-test-data/mxnet_mnist/train')
        test_input = estimator.sagemaker_session.upload_data(path=os.path.join(data_path, 'test'),
                                                             key_prefix='integ-test-data/mxnet_mnist/test')
        tuner.fit({'train': train_input, 'test': test_input})

        print('Started hyperparameter tuning job with name:' + tuner.latest_tuning_job.name)

        time.sleep(15)
        tuner.wait()

    best_training_job = tuner.best_training_job()
    with timeout_and_delete_endpoint_by_name(best_training_job, sagemaker_session):
        predictor = tuner.deploy(1, 'ml.c4.xlarge')
        data = np.zeros(shape=(1, 1, 28, 28))
        predictor.predict(data)
Ejemplo n.º 4
0
def test_tuning_lda(sagemaker_session):
    with timeout(minutes=TUNING_DEFAULT_TIMEOUT_MINUTES):
        data_path = os.path.join(DATA_DIR, 'lda')
        data_filename = 'nips-train_1.pbr'

        with open(os.path.join(data_path, data_filename), 'rb') as f:
            all_records = read_records(f)

        # all records must be same
        feature_num = int(
            all_records[0].features['values'].float32_tensor.shape[0])

        lda = LDA(role='SageMakerRole',
                  train_instance_type='ml.c4.xlarge',
                  num_topics=10,
                  sagemaker_session=sagemaker_session,
                  base_job_name='test-lda')

        record_set = prepare_record_set_from_local_files(
            data_path, lda.data_location, len(all_records), feature_num,
            sagemaker_session)
        test_record_set = prepare_record_set_from_local_files(
            data_path, lda.data_location, len(all_records), feature_num,
            sagemaker_session)
        test_record_set.channel = 'test'

        # specify which hp you want to optimize over
        hyperparameter_ranges = {
            'alpha0': ContinuousParameter(1, 10),
            'num_topics': IntegerParameter(1, 2)
        }
        objective_metric_name = 'test:pwll'

        tuner = HyperparameterTuner(
            estimator=lda,
            objective_metric_name=objective_metric_name,
            hyperparameter_ranges=hyperparameter_ranges,
            objective_type='Maximize',
            max_jobs=2,
            max_parallel_jobs=2)

        tuner.fit([record_set, test_record_set], mini_batch_size=1)

        print('Started hyperparameter tuning job with name:' +
              tuner.latest_tuning_job.name)

        time.sleep(15)
        tuner.wait()

    best_training_job = tuner.best_training_job()
    with timeout_and_delete_endpoint_by_name(best_training_job,
                                             sagemaker_session):
        predictor = tuner.deploy(1, 'ml.c4.xlarge')
        predict_input = np.random.rand(1, feature_num)
        result = predictor.predict(predict_input)

        assert len(result) == 1
        for record in result:
            assert record.label['topic_mixture'] is not None
Ejemplo n.º 5
0
def test_tuning_mxnet(
    sagemaker_session,
    mxnet_training_latest_version,
    mxnet_training_latest_py_version,
    cpu_instance_type,
):
    with timeout(minutes=TUNING_DEFAULT_TIMEOUT_MINUTES):
        script_path = os.path.join(DATA_DIR, "mxnet_mnist", "mnist.py")
        data_path = os.path.join(DATA_DIR, "mxnet_mnist")

        estimator = MXNet(
            entry_point=script_path,
            role="SageMakerRole",
            py_version=mxnet_training_latest_py_version,
            instance_count=1,
            instance_type=cpu_instance_type,
            framework_version=mxnet_training_latest_version,
            sagemaker_session=sagemaker_session,
        )

        hyperparameter_ranges = {
            "learning-rate": ContinuousParameter(0.01, 0.2)
        }
        objective_metric_name = "Validation-accuracy"
        metric_definitions = [{
            "Name": "Validation-accuracy",
            "Regex": "Validation-accuracy=([0-9\\.]+)"
        }]
        tuner = HyperparameterTuner(
            estimator,
            objective_metric_name,
            hyperparameter_ranges,
            metric_definitions,
            max_jobs=4,
            max_parallel_jobs=2,
        )

        train_input = estimator.sagemaker_session.upload_data(
            path=os.path.join(data_path, "train"),
            key_prefix="integ-test-data/mxnet_mnist/train")
        test_input = estimator.sagemaker_session.upload_data(
            path=os.path.join(data_path, "test"),
            key_prefix="integ-test-data/mxnet_mnist/test")

        tuning_job_name = unique_name_from_base("tune-mxnet", max_length=32)
        print("Started hyperparameter tuning job with name:" + tuning_job_name)
        tuner.fit({
            "train": train_input,
            "test": test_input
        },
                  job_name=tuning_job_name)

    best_training_job = tuner.best_training_job()
    with timeout_and_delete_endpoint_by_name(best_training_job,
                                             sagemaker_session):
        predictor = tuner.deploy(1, cpu_instance_type)
        data = np.zeros(shape=(1, 1, 28, 28))
        predictor.predict(data)
Ejemplo n.º 6
0
def test_tuning_kmeans_fsx(efs_fsx_setup, sagemaker_session,
                           cpu_instance_type):
    subnets = [efs_fsx_setup.subnet_id]
    security_group_ids = efs_fsx_setup.security_group_ids
    role = efs_fsx_setup.role_name
    kmeans = KMeans(
        role=role,
        train_instance_count=TRAIN_INSTANCE_COUNT,
        train_instance_type=cpu_instance_type,
        k=K,
        sagemaker_session=sagemaker_session,
        subnets=subnets,
        security_group_ids=security_group_ids,
    )

    hyperparameter_ranges = {
        "extra_center_factor": IntegerParameter(4, 10),
        "mini_batch_size": IntegerParameter(10, 100),
        "epochs": IntegerParameter(1, 2),
        "init_method": CategoricalParameter(["kmeans++", "random"]),
    }

    with timeout(minutes=TUNING_DEFAULT_TIMEOUT_MINUTES):
        tuner = HyperparameterTuner(
            estimator=kmeans,
            objective_metric_name=OBJECTIVE_METRIC_NAME,
            hyperparameter_ranges=hyperparameter_ranges,
            objective_type="Minimize",
            max_jobs=MAX_JOBS,
            max_parallel_jobs=MAX_PARALLEL_JOBS,
        )

        file_system_fsx_id = efs_fsx_setup.file_system_fsx_id
        train_records = FileSystemRecordSet(
            file_system_id=file_system_fsx_id,
            file_system_type="FSxLustre",
            directory_path=FSX_DIR_PATH,
            num_records=NUM_RECORDS,
            feature_dim=FEATURE_DIM,
        )

        test_records = FileSystemRecordSet(
            file_system_id=file_system_fsx_id,
            file_system_type="FSxLustre",
            directory_path=FSX_DIR_PATH,
            num_records=NUM_RECORDS,
            feature_dim=FEATURE_DIM,
            channel="test",
        )

        job_name = unique_name_from_base("tune-kmeans-fsx")
        tuner.fit([train_records, test_records], job_name=job_name)
        tuner.wait()
        best_training_job = tuner.best_training_job()
        assert best_training_job
Ejemplo n.º 7
0
def test_attach_tuning_pytorch(sagemaker_session):
    mnist_dir = os.path.join(DATA_DIR, 'pytorch_mnist')
    mnist_script = os.path.join(mnist_dir, 'mnist.py')

    estimator = PyTorch(entry_point=mnist_script,
                        role='SageMakerRole',
                        train_instance_count=1,
                        py_version=PYTHON_VERSION,
                        train_instance_type='ml.c4.xlarge',
                        sagemaker_session=sagemaker_session)

    with timeout(minutes=TUNING_DEFAULT_TIMEOUT_MINUTES):
        objective_metric_name = 'evaluation-accuracy'
        metric_definitions = [{
            'Name': 'evaluation-accuracy',
            'Regex': r'Overall test accuracy: (\d+)'
        }]
        hyperparameter_ranges = {'batch-size': IntegerParameter(50, 100)}

        tuner = HyperparameterTuner(estimator,
                                    objective_metric_name,
                                    hyperparameter_ranges,
                                    metric_definitions,
                                    max_jobs=2,
                                    max_parallel_jobs=2,
                                    early_stopping_type='Auto')

        training_data = estimator.sagemaker_session.upload_data(
            path=os.path.join(mnist_dir, 'training'),
            key_prefix='integ-test-data/pytorch_mnist/training')

        tuning_job_name = unique_name_from_base('pytorch', max_length=32)
        tuner.fit({'training': training_data}, job_name=tuning_job_name)

        print('Started hyperparameter tuning job with name:' + tuning_job_name)

        time.sleep(15)
        tuner.wait()

    attached_tuner = HyperparameterTuner.attach(
        tuning_job_name, sagemaker_session=sagemaker_session)
    assert attached_tuner.early_stopping_type == 'Auto'

    best_training_job = tuner.best_training_job()
    with timeout_and_delete_endpoint_by_name(best_training_job,
                                             sagemaker_session):
        predictor = attached_tuner.deploy(1, 'ml.c4.xlarge')
        data = np.zeros(shape=(1, 1, 28, 28), dtype=np.float32)
        predictor.predict(data)

        batch_size = 100
        data = np.random.rand(batch_size, 1, 28, 28).astype(np.float32)
        output = predictor.predict(data)

        assert output.shape == (batch_size, 10)
Ejemplo n.º 8
0
def test_tuning_tf(sagemaker_session):
    with timeout(minutes=TUNING_DEFAULT_TIMEOUT_MINUTES):
        script_path = os.path.join(DATA_DIR, "iris", "iris-dnn-classifier.py")

        estimator = TensorFlow(
            entry_point=script_path,
            role="SageMakerRole",
            training_steps=1,
            evaluation_steps=1,
            hyperparameters={"input_tensor_name": "inputs"},
            train_instance_count=1,
            train_instance_type="ml.c4.xlarge",
            sagemaker_session=sagemaker_session,
        )

        inputs = sagemaker_session.upload_data(
            path=DATA_PATH, key_prefix="integ-test-data/tf_iris")
        hyperparameter_ranges = {
            "learning_rate": ContinuousParameter(0.05, 0.2)
        }

        objective_metric_name = "loss"
        metric_definitions = [{"Name": "loss", "Regex": "loss = ([0-9\\.]+)"}]

        tuner = HyperparameterTuner(
            estimator,
            objective_metric_name,
            hyperparameter_ranges,
            metric_definitions,
            objective_type="Minimize",
            max_jobs=2,
            max_parallel_jobs=2,
        )

        tuning_job_name = unique_name_from_base("tune-tf", max_length=32)
        tuner.fit(inputs, job_name=tuning_job_name)

        print("Started hyperparameter tuning job with name:" + tuning_job_name)

        time.sleep(15)
        tuner.wait()

    best_training_job = tuner.best_training_job()
    with timeout_and_delete_endpoint_by_name(best_training_job,
                                             sagemaker_session):
        predictor = tuner.deploy(1, "ml.c4.xlarge")

        features = [6.4, 3.2, 4.5, 1.5]
        dict_result = predictor.predict({"inputs": features})
        print("predict result: {}".format(dict_result))
        list_result = predictor.predict(features)
        print("predict result: {}".format(list_result))

        assert dict_result == list_result
Ejemplo n.º 9
0
def test_tuning_kmeans(sagemaker_session):
    with timeout(minutes=TUNING_DEFAULT_TIMEOUT_MINUTES):
        data_path = os.path.join(DATA_DIR, 'one_p_mnist', 'mnist.pkl.gz')
        pickle_args = {} if sys.version_info.major == 2 else {'encoding': 'latin1'}

        # Load the data into memory as numpy arrays
        with gzip.open(data_path, 'rb') as f:
            train_set, _, _ = pickle.load(f, **pickle_args)

        kmeans = KMeans(role='SageMakerRole', train_instance_count=1,
                        train_instance_type='ml.c4.xlarge',
                        k=10, sagemaker_session=sagemaker_session, base_job_name='tk',
                        output_path='s3://{}/'.format(sagemaker_session.default_bucket()))

        # set kmeans specific hp
        kmeans.init_method = 'random'
        kmeans.max_iterators = 1
        kmeans.tol = 1
        kmeans.num_trials = 1
        kmeans.local_init_method = 'kmeans++'
        kmeans.half_life_time_size = 1
        kmeans.epochs = 1

        records = kmeans.record_set(train_set[0][:100])
        test_records = kmeans.record_set(train_set[0][:100], channel='test')

        # specify which hp you want to optimize over
        hyperparameter_ranges = {'extra_center_factor': IntegerParameter(1, 10),
                                 'mini_batch_size': IntegerParameter(10, 100),
                                 'epochs': IntegerParameter(1, 2),
                                 'init_method': CategoricalParameter(['kmeans++', 'random'])}
        objective_metric_name = 'test:msd'

        tuner = HyperparameterTuner(estimator=kmeans, objective_metric_name=objective_metric_name,
                                    hyperparameter_ranges=hyperparameter_ranges, objective_type='Minimize', max_jobs=2,
                                    max_parallel_jobs=2)

        tuner.fit([records, test_records])

        print('Started hyperparameter tuning job with name:' + tuner.latest_tuning_job.name)

        time.sleep(15)
        tuner.wait()

    best_training_job = tuner.best_training_job()
    with timeout_and_delete_endpoint_by_name(best_training_job, sagemaker_session):
        predictor = tuner.deploy(1, 'ml.c4.xlarge')
        result = predictor.predict(train_set[0][:10])

        assert len(result) == 10
        for record in result:
            assert record.label['closest_cluster'] is not None
            assert record.label['distance_to_cluster'] is not None
Ejemplo n.º 10
0
def test_tuning_kmeans(sagemaker_session):
    with timeout(minutes=20):
        data_path = os.path.join(DATA_DIR, 'one_p_mnist', 'mnist.pkl.gz')
        pickle_args = {} if sys.version_info.major == 2 else {'encoding': 'latin1'}

        # Load the data into memory as numpy arrays
        with gzip.open(data_path, 'rb') as f:
            train_set, _, _ = pickle.load(f, **pickle_args)

        kmeans = KMeans(role='SageMakerRole', train_instance_count=1,
                        train_instance_type='ml.c4.xlarge',
                        k=10, sagemaker_session=sagemaker_session, base_job_name='tk',
                        output_path='s3://{}/'.format(sagemaker_session.default_bucket()))

        # set kmeans specific hp
        kmeans.init_method = 'random'
        kmeans.max_iterators = 1
        kmeans.tol = 1
        kmeans.num_trials = 1
        kmeans.local_init_method = 'kmeans++'
        kmeans.half_life_time_size = 1
        kmeans.epochs = 1

        records = kmeans.record_set(train_set[0][:100])
        test_records = kmeans.record_set(train_set[0][:100], channel='test')

        # specify which hp you want to optimize over
        hyperparameter_ranges = {'extra_center_factor': IntegerParameter(1, 10),
                                 'mini_batch_size': IntegerParameter(10, 100),
                                 'epochs': IntegerParameter(1, 2),
                                 'init_method': CategoricalParameter(['kmeans++', 'random'])}
        objective_metric_name = 'test:msd'

        tuner = HyperparameterTuner(estimator=kmeans, objective_metric_name=objective_metric_name,
                                    hyperparameter_ranges=hyperparameter_ranges, objective_type='Minimize', max_jobs=2,
                                    max_parallel_jobs=2)

        tuner.fit([records, test_records])

        print('Started hyperparameter tuning job with name:' + tuner.latest_tuning_job.name)

        time.sleep(15)
        tuner.wait()

    best_training_job = tuner.best_training_job()
    with timeout_and_delete_endpoint_by_name(best_training_job, sagemaker_session):
        predictor = tuner.deploy(1, 'ml.c4.xlarge')
        result = predictor.predict(train_set[0][:10])

        assert len(result) == 10
        for record in result:
            assert record.label['closest_cluster'] is not None
            assert record.label['distance_to_cluster'] is not None
Ejemplo n.º 11
0
def test_tuning_chainer(sagemaker_session):
    with timeout(minutes=TUNING_DEFAULT_TIMEOUT_MINUTES):
        script_path = os.path.join(DATA_DIR, 'chainer_mnist', 'mnist.py')
        data_path = os.path.join(DATA_DIR, 'chainer_mnist')

        estimator = Chainer(entry_point=script_path,
                            role='SageMakerRole',
                            py_version=PYTHON_VERSION,
                            train_instance_count=1,
                            train_instance_type='ml.c4.xlarge',
                            sagemaker_session=sagemaker_session,
                            hyperparameters={'epochs': 1})

        train_input = estimator.sagemaker_session.upload_data(path=os.path.join(data_path, 'train'),
                                                              key_prefix='integ-test-data/chainer_mnist/train')
        test_input = estimator.sagemaker_session.upload_data(path=os.path.join(data_path, 'test'),
                                                             key_prefix='integ-test-data/chainer_mnist/test')

        hyperparameter_ranges = {'alpha': ContinuousParameter(0.001, 0.005)}

        objective_metric_name = 'Validation-accuracy'
        metric_definitions = [
            {'Name': 'Validation-accuracy',
             'Regex': r'\[J1\s+\d\.\d+\s+\d\.\d+\s+\d\.\d+\s+(\d\.\d+)'}]

        tuner = HyperparameterTuner(estimator, objective_metric_name, hyperparameter_ranges,
                                    metric_definitions,
                                    max_jobs=2, max_parallel_jobs=2)

        tuning_job_name = unique_name_from_base('chainer', max_length=32)
        tuner.fit({'train': train_input, 'test': test_input}, job_name=tuning_job_name)

        print('Started hyperparameter tuning job with name:' + tuning_job_name)

        time.sleep(15)
        tuner.wait()

    best_training_job = tuner.best_training_job()
    with timeout_and_delete_endpoint_by_name(best_training_job, sagemaker_session):
        predictor = tuner.deploy(1, 'ml.c4.xlarge')

        batch_size = 100
        data = np.zeros((batch_size, 784), dtype='float32')
        output = predictor.predict(data)
        assert len(output) == batch_size

        data = np.zeros((batch_size, 1, 28, 28), dtype='float32')
        output = predictor.predict(data)
        assert len(output) == batch_size

        data = np.zeros((batch_size, 28, 28), dtype='float32')
        output = predictor.predict(data)
        assert len(output) == batch_size
Ejemplo n.º 12
0
def test_tuning_tf(sagemaker_session):
    with timeout(minutes=TUNING_DEFAULT_TIMEOUT_MINUTES):
        script_path = os.path.join(DATA_DIR, 'iris', 'iris-dnn-classifier.py')

        estimator = TensorFlow(entry_point=script_path,
                               role='SageMakerRole',
                               training_steps=1,
                               evaluation_steps=1,
                               hyperparameters={'input_tensor_name': 'inputs'},
                               train_instance_count=1,
                               train_instance_type='ml.c4.xlarge',
                               sagemaker_session=sagemaker_session,
                               base_job_name='tune-tf')

        inputs = sagemaker_session.upload_data(
            path=DATA_PATH, key_prefix='integ-test-data/tf_iris')
        hyperparameter_ranges = {
            'learning_rate': ContinuousParameter(0.05, 0.2)
        }

        objective_metric_name = 'loss'
        metric_definitions = [{'Name': 'loss', 'Regex': 'loss = ([0-9\\.]+)'}]

        tuner = HyperparameterTuner(estimator,
                                    objective_metric_name,
                                    hyperparameter_ranges,
                                    metric_definitions,
                                    objective_type='Minimize',
                                    max_jobs=2,
                                    max_parallel_jobs=2)

        tuner.fit(inputs)

        print('Started hyperparameter tuning job with name:' +
              tuner.latest_tuning_job.name)

        time.sleep(15)
        tuner.wait()

    best_training_job = tuner.best_training_job()
    with timeout_and_delete_endpoint_by_name(best_training_job,
                                             sagemaker_session):
        predictor = tuner.deploy(1, 'ml.c4.xlarge')

        features = [6.4, 3.2, 4.5, 1.5]
        dict_result = predictor.predict({'inputs': features})
        print('predict result: {}'.format(dict_result))
        list_result = predictor.predict(features)
        print('predict result: {}'.format(list_result))

        assert dict_result == list_result
def test_tuning_tf_lustre(
    efs_fsx_setup,
    sagemaker_session,
    cpu_instance_type,
    tensorflow_training_latest_version,
    tensorflow_training_latest_py_version,
):
    role = efs_fsx_setup["role_name"]
    subnets = [efs_fsx_setup["subnet_id"]]
    security_group_ids = efs_fsx_setup["security_group_ids"]

    estimator = TensorFlow(
        entry_point=SCRIPT,
        role=role,
        instance_count=1,
        instance_type=cpu_instance_type,
        sagemaker_session=sagemaker_session,
        framework_version=tensorflow_training_latest_version,
        py_version=tensorflow_training_latest_py_version,
        subnets=subnets,
        security_group_ids=security_group_ids,
    )

    hyperparameter_ranges = {"epochs": IntegerParameter(1, 2)}
    objective_metric_name = "accuracy"
    metric_definitions = [{
        "Name": objective_metric_name,
        "Regex": "accuracy = ([0-9\\.]+)"
    }]
    tuner = HyperparameterTuner(
        estimator,
        objective_metric_name,
        hyperparameter_ranges,
        metric_definitions,
        max_jobs=MAX_JOBS,
        max_parallel_jobs=MAX_PARALLEL_JOBS,
    )

    file_system_fsx_id = efs_fsx_setup["file_system_fsx_id"]
    file_system_input = FileSystemInput(file_system_id=file_system_fsx_id,
                                        file_system_type="FSxLustre",
                                        directory_path=FSX_DIR_PATH)

    with timeout(minutes=TUNING_DEFAULT_TIMEOUT_MINUTES):
        tuning_job_name = unique_name_from_base(
            "test-tuning-tf-script-mode-lustre", max_length=32)
        tuner.fit(file_system_input, job_name=tuning_job_name)
        time.sleep(15)
        tuner.wait()
    best_training_job = tuner.best_training_job()
    assert best_training_job
Ejemplo n.º 14
0
def test_tuning_chainer(sagemaker_session):
    with timeout(minutes=TUNING_DEFAULT_TIMEOUT_MINUTES):
        script_path = os.path.join(DATA_DIR, 'chainer_mnist', 'mnist.py')
        data_path = os.path.join(DATA_DIR, 'chainer_mnist')

        estimator = Chainer(entry_point=script_path,
                            role='SageMakerRole',
                            train_instance_count=1,
                            train_instance_type='ml.c4.xlarge',
                            sagemaker_session=sagemaker_session,
                            hyperparameters={'epochs': 1})

        train_input = estimator.sagemaker_session.upload_data(path=os.path.join(data_path, 'train'),
                                                              key_prefix='integ-test-data/chainer_mnist/train')
        test_input = estimator.sagemaker_session.upload_data(path=os.path.join(data_path, 'test'),
                                                             key_prefix='integ-test-data/chainer_mnist/test')

        hyperparameter_ranges = {'alpha': ContinuousParameter(0.001, 0.005)}

        objective_metric_name = 'Validation-accuracy'
        metric_definitions = [
            {'Name': 'Validation-accuracy', 'Regex': '\[J1\s+\d\.\d+\s+\d\.\d+\s+\d\.\d+\s+(\d\.\d+)'}]

        tuner = HyperparameterTuner(estimator, objective_metric_name, hyperparameter_ranges, metric_definitions,
                                    max_jobs=2, max_parallel_jobs=2)

        tuner.fit({'train': train_input, 'test': test_input})

        print('Started hyperparameter tuning job with name:' + tuner.latest_tuning_job.name)

        time.sleep(15)
        tuner.wait()

    best_training_job = tuner.best_training_job()
    with timeout_and_delete_endpoint_by_name(best_training_job, sagemaker_session):
        predictor = tuner.deploy(1, 'ml.c4.xlarge')

        batch_size = 100
        data = np.zeros((batch_size, 784), dtype='float32')
        output = predictor.predict(data)
        assert len(output) == batch_size

        data = np.zeros((batch_size, 1, 28, 28), dtype='float32')
        output = predictor.predict(data)
        assert len(output) == batch_size

        data = np.zeros((batch_size, 28, 28), dtype='float32')
        output = predictor.predict(data)
        assert len(output) == batch_size
Ejemplo n.º 15
0
def test_tuning_lda(sagemaker_session):
    with timeout(minutes=TUNING_DEFAULT_TIMEOUT_MINUTES):
        data_path = os.path.join(DATA_DIR, 'lda')
        data_filename = 'nips-train_1.pbr'

        with open(os.path.join(data_path, data_filename), 'rb') as f:
            all_records = read_records(f)

        # all records must be same
        feature_num = int(all_records[0].features['values'].float32_tensor.shape[0])

        lda = LDA(role='SageMakerRole', train_instance_type='ml.c4.xlarge', num_topics=10,
                  sagemaker_session=sagemaker_session, base_job_name='test-lda')

        record_set = prepare_record_set_from_local_files(data_path, lda.data_location,
                                                         len(all_records), feature_num, sagemaker_session)
        test_record_set = prepare_record_set_from_local_files(data_path, lda.data_location,
                                                              len(all_records), feature_num, sagemaker_session)
        test_record_set.channel = 'test'

        # specify which hp you want to optimize over
        hyperparameter_ranges = {'alpha0': ContinuousParameter(1, 10),
                                 'num_topics': IntegerParameter(1, 2)}
        objective_metric_name = 'test:pwll'

        tuner = HyperparameterTuner(estimator=lda, objective_metric_name=objective_metric_name,
                                    hyperparameter_ranges=hyperparameter_ranges, objective_type='Maximize', max_jobs=2,
                                    max_parallel_jobs=2)

        tuner.fit([record_set, test_record_set], mini_batch_size=1)

        print('Started hyperparameter tuning job with name:' + tuner.latest_tuning_job.name)

        time.sleep(15)
        tuner.wait()

    best_training_job = tuner.best_training_job()
    with timeout_and_delete_endpoint_by_name(best_training_job, sagemaker_session):
        predictor = tuner.deploy(1, 'ml.c4.xlarge')
        predict_input = np.random.rand(1, feature_num)
        result = predictor.predict(predict_input)

        assert len(result) == 1
        for record in result:
            assert record.label['topic_mixture'] is not None
Ejemplo n.º 16
0
def test_tuning_tf(sagemaker_session):
    with timeout(minutes=TUNING_DEFAULT_TIMEOUT_MINUTES):
        script_path = os.path.join(DATA_DIR, 'iris', 'iris-dnn-classifier.py')

        estimator = TensorFlow(entry_point=script_path,
                               role='SageMakerRole',
                               training_steps=1,
                               evaluation_steps=1,
                               hyperparameters={'input_tensor_name': 'inputs'},
                               train_instance_count=1,
                               train_instance_type='ml.c4.xlarge',
                               sagemaker_session=sagemaker_session,
                               base_job_name='tune-tf')

        inputs = sagemaker_session.upload_data(path=DATA_PATH, key_prefix='integ-test-data/tf_iris')
        hyperparameter_ranges = {'learning_rate': ContinuousParameter(0.05, 0.2)}

        objective_metric_name = 'loss'
        metric_definitions = [{'Name': 'loss', 'Regex': 'loss = ([0-9\\.]+)'}]

        tuner = HyperparameterTuner(estimator, objective_metric_name, hyperparameter_ranges, metric_definitions,
                                    objective_type='Minimize', max_jobs=2, max_parallel_jobs=2)

        tuner.fit(inputs)

        print('Started hyperparameter tuning job with name:' + tuner.latest_tuning_job.name)

        time.sleep(15)
        tuner.wait()

    best_training_job = tuner.best_training_job()
    with timeout_and_delete_endpoint_by_name(best_training_job, sagemaker_session):
        predictor = tuner.deploy(1, 'ml.c4.xlarge')

        features = [6.4, 3.2, 4.5, 1.5]
        dict_result = predictor.predict({'inputs': features})
        print('predict result: {}'.format(dict_result))
        list_result = predictor.predict(features)
        print('predict result: {}'.format(list_result))

        assert dict_result == list_result
Ejemplo n.º 17
0
def test_attach_tuning_pytorch(sagemaker_session):
    mnist_dir = os.path.join(DATA_DIR, 'pytorch_mnist')
    mnist_script = os.path.join(mnist_dir, 'mnist.py')

    estimator = PyTorch(entry_point=mnist_script, role='SageMakerRole', train_instance_count=1,
                        train_instance_type='ml.c4.xlarge', sagemaker_session=sagemaker_session)

    with timeout(minutes=TUNING_DEFAULT_TIMEOUT_MINUTES):
        objective_metric_name = 'evaluation-accuracy'
        metric_definitions = [{'Name': 'evaluation-accuracy', 'Regex': 'Overall test accuracy: (\d+)'}]
        hyperparameter_ranges = {'batch-size': IntegerParameter(50, 100)}

        tuner = HyperparameterTuner(estimator, objective_metric_name, hyperparameter_ranges, metric_definitions,
                                    max_jobs=2, max_parallel_jobs=2)

        training_data = estimator.sagemaker_session.upload_data(path=os.path.join(mnist_dir, 'training'),
                                                                key_prefix='integ-test-data/pytorch_mnist/training')
        tuner.fit({'training': training_data})

        tuning_job_name = tuner.latest_tuning_job.name

        print('Started hyperparameter tuning job with name:' + tuning_job_name)

        time.sleep(15)
        tuner.wait()

    attached_tuner = HyperparameterTuner.attach(tuning_job_name, sagemaker_session=sagemaker_session)
    best_training_job = tuner.best_training_job()
    with timeout_and_delete_endpoint_by_name(best_training_job, sagemaker_session):
        predictor = attached_tuner.deploy(1, 'ml.c4.xlarge')
        data = np.zeros(shape=(1, 1, 28, 28), dtype=np.float32)
        predictor.predict(data)

        batch_size = 100
        data = np.random.rand(batch_size, 1, 28, 28).astype(np.float32)
        output = predictor.predict(data)

        assert output.shape == (batch_size, 10)
Ejemplo n.º 18
0
def test_tuning_byo_estimator(sagemaker_session, cpu_instance_type):
    """Use Factorization Machines algorithm as an example here.

    First we need to prepare data for training. We take standard data set, convert it to the
    format that the algorithm can process and upload it to S3.
    Then we create the Estimator and set hyperparamets as required by the algorithm.
    Next, we can call fit() with path to the S3.
    Later the trained model is deployed and prediction is called against the endpoint.
    Default predictor is updated with json serializer and deserializer.
    """
    image_uri = image_uris.retrieve("factorization-machines",
                                    sagemaker_session.boto_region_name)
    training_data_path = os.path.join(DATA_DIR, "dummy_tensor")

    with timeout(minutes=TUNING_DEFAULT_TIMEOUT_MINUTES):
        prefix = "test_byo_estimator"
        key = "recordio-pb-data"
        s3_train_data = sagemaker_session.upload_data(path=training_data_path,
                                                      key_prefix=os.path.join(
                                                          prefix, "train",
                                                          key))

        estimator = Estimator(
            image_uri=image_uri,
            role="SageMakerRole",
            instance_count=1,
            instance_type=cpu_instance_type,
            sagemaker_session=sagemaker_session,
        )

        estimator.set_hyperparameters(num_factors=10,
                                      feature_dim=784,
                                      mini_batch_size=100,
                                      predictor_type="binary_classifier")

        hyperparameter_ranges = {"mini_batch_size": IntegerParameter(100, 200)}

        tuner = HyperparameterTuner(
            estimator=estimator,
            objective_metric_name="test:binary_classification_accuracy",
            hyperparameter_ranges=hyperparameter_ranges,
            max_jobs=2,
            max_parallel_jobs=2,
        )

        tuning_job_name = unique_name_from_base("byo", 32)
        print("Started hyperparameter tuning job with name {}:".format(
            tuning_job_name))
        tuner.fit(
            {
                "train": s3_train_data,
                "test": s3_train_data
            },
            include_cls_metadata=False,
            job_name=tuning_job_name,
        )

    best_training_job = tuner.best_training_job()
    with timeout_and_delete_endpoint_by_name(best_training_job,
                                             sagemaker_session):
        predictor = tuner.deploy(
            1,
            cpu_instance_type,
            endpoint_name=best_training_job,
            serializer=_FactorizationMachineSerializer(),
            deserializer=JSONDeserializer(),
        )

        result = predictor.predict(datasets.one_p_mnist()[0][:10])

        assert len(result["predictions"]) == 10
        for prediction in result["predictions"]:
            assert prediction["score"] is not None
Ejemplo n.º 19
0
def test_tuning_chainer(sagemaker_session, chainer_latest_version,
                        chainer_latest_py_version, cpu_instance_type):
    with timeout(minutes=TUNING_DEFAULT_TIMEOUT_MINUTES):
        script_path = os.path.join(DATA_DIR, "chainer_mnist", "mnist.py")
        data_path = os.path.join(DATA_DIR, "chainer_mnist")

        estimator = Chainer(
            entry_point=script_path,
            role="SageMakerRole",
            framework_version=chainer_latest_version,
            py_version=chainer_latest_py_version,
            instance_count=1,
            instance_type=cpu_instance_type,
            sagemaker_session=sagemaker_session,
            hyperparameters={"epochs": 1},
        )

        train_input = estimator.sagemaker_session.upload_data(
            path=os.path.join(data_path, "train"),
            key_prefix="integ-test-data/chainer_mnist/train")
        test_input = estimator.sagemaker_session.upload_data(
            path=os.path.join(data_path, "test"),
            key_prefix="integ-test-data/chainer_mnist/test")

        hyperparameter_ranges = {"alpha": ContinuousParameter(0.001, 0.005)}

        objective_metric_name = "Validation-accuracy"
        metric_definitions = [{
            "Name":
            "Validation-accuracy",
            "Regex":
            r"\[J1\s+\d\.\d+\s+\d\.\d+\s+\d\.\d+\s+(\d\.\d+)",
        }]

        tuner = HyperparameterTuner(
            estimator,
            objective_metric_name,
            hyperparameter_ranges,
            metric_definitions,
            max_jobs=2,
            max_parallel_jobs=2,
        )

        tuning_job_name = unique_name_from_base("chainer", max_length=32)
        print("Started hyperparameter tuning job with name: {}".format(
            tuning_job_name))
        tuner.fit({
            "train": train_input,
            "test": test_input
        },
                  job_name=tuning_job_name)

    best_training_job = tuner.best_training_job()
    with timeout_and_delete_endpoint_by_name(best_training_job,
                                             sagemaker_session):
        predictor = tuner.deploy(1, cpu_instance_type)

        batch_size = 100
        data = np.zeros((batch_size, 784), dtype="float32")
        output = predictor.predict(data)
        assert len(output) == batch_size

        data = np.zeros((batch_size, 1, 28, 28), dtype="float32")
        output = predictor.predict(data)
        assert len(output) == batch_size

        data = np.zeros((batch_size, 28, 28), dtype="float32")
        output = predictor.predict(data)
        assert len(output) == batch_size
Ejemplo n.º 20
0
        objective_metric_name='test:binary_classification_accuracy',
        hyperparameter_ranges={
            'epochs': IntegerParameter(1, 200),
            'mini_batch_size': IntegerParameter(10, 10000),
            'factors_wd': ContinuousParameter(1e-8, 512)},
        max_jobs=4,
        max_parallel_jobs=4)

    my_tuner.fit({'train': train_data, 'test': test_data}, include_cls_metadata = False)

    my_tuner.wait()

    #sm_session = sagemaker.Session()
    #best_log = sm_session.logs_for_job(my_tuner.best_training_job())
    #print(best_log)
    best_model = '{}/{}/output/model.tar.gz'.format(output_prefix, my_tuner.best_training_job())

print('Best model: {}'.format(best_model))
#
# Save config files to be used later for qa and prod sagemaker endpoint configurations
# and for prediction tests
#
config_data_qa = {
  "Parameters":
    {
        "BucketName": bucket,
        "CommitID": commit_id,
        "Environment": "qa",
        "ParentStackName": stack_name,
        "ModelData": best_model,
        "ContainerImage": containers[boto3.Session().region_name],
Ejemplo n.º 21
0
def test_tuning_byo_estimator(sagemaker_session):
    """Use Factorization Machines algorithm as an example here.

    First we need to prepare data for training. We take standard data set, convert it to the
    format that the algorithm can process and upload it to S3.
    Then we create the Estimator and set hyperparamets as required by the algorithm.
    Next, we can call fit() with path to the S3.
    Later the trained model is deployed and prediction is called against the endpoint.
    Default predictor is updated with json serializer and deserializer.
    """
    image_name = registry(sagemaker_session.boto_session.region_name
                          ) + "/factorization-machines:1"
    training_data_path = os.path.join(DATA_DIR, "dummy_tensor")

    with timeout(minutes=TUNING_DEFAULT_TIMEOUT_MINUTES):
        data_path = os.path.join(DATA_DIR, "one_p_mnist", "mnist.pkl.gz")
        pickle_args = {} if sys.version_info.major == 2 else {
            "encoding": "latin1"
        }

        with gzip.open(data_path, "rb") as f:
            train_set, _, _ = pickle.load(f, **pickle_args)

        prefix = "test_byo_estimator"
        key = "recordio-pb-data"
        s3_train_data = sagemaker_session.upload_data(path=training_data_path,
                                                      key_prefix=os.path.join(
                                                          prefix, "train",
                                                          key))

        estimator = Estimator(
            image_name=image_name,
            role="SageMakerRole",
            train_instance_count=1,
            train_instance_type="ml.c4.xlarge",
            sagemaker_session=sagemaker_session,
        )

        estimator.set_hyperparameters(num_factors=10,
                                      feature_dim=784,
                                      mini_batch_size=100,
                                      predictor_type="binary_classifier")

        hyperparameter_ranges = {"mini_batch_size": IntegerParameter(100, 200)}

        tuner = HyperparameterTuner(
            estimator=estimator,
            objective_metric_name="test:binary_classification_accuracy",
            hyperparameter_ranges=hyperparameter_ranges,
            max_jobs=2,
            max_parallel_jobs=2,
        )

        tuner.fit(
            {
                "train": s3_train_data,
                "test": s3_train_data
            },
            include_cls_metadata=False,
            job_name=unique_name_from_base("byo", 32),
        )

        print("Started hyperparameter tuning job with name:" +
              tuner.latest_tuning_job.name)

        time.sleep(15)
        tuner.wait()

    best_training_job = tuner.best_training_job()
    with timeout_and_delete_endpoint_by_name(best_training_job,
                                             sagemaker_session):
        predictor = tuner.deploy(1,
                                 "ml.m4.xlarge",
                                 endpoint_name=best_training_job)
        predictor.serializer = _fm_serializer
        predictor.content_type = "application/json"
        predictor.deserializer = json_deserializer

        result = predictor.predict(train_set[0][:10])

        assert len(result["predictions"]) == 10
        for prediction in result["predictions"]:
            assert prediction["score"] is not None
Ejemplo n.º 22
0
                            'epochs': 6,
                            'backend': 'gloo'
                        })

    hyperparameter_ranges = {
        'lr': ContinuousParameter(0.001, 0.1),
        'batch-size': CategoricalParameter([32, 64, 128, 256, 512])
    }

    objective_metric_name = 'average test loss'
    objective_type = 'Minimize'
    metric_definitions = [{
        'Name': 'average test loss',
        'Regex': 'Test set: Average loss: ([0-9\\.]+)'
    }]

    tuner = HyperparameterTuner(estimator,
                                objective_metric_name,
                                hyperparameter_ranges,
                                metric_definitions,
                                max_jobs=3,
                                max_parallel_jobs=3,
                                objective_type=objective_type,
                                base_tuning_job_name="fashion-mnist")

    tuner.fit({'training': inputs})

    tuner.wait()

    print(tuner.best_training_job())
Ejemplo n.º 23
0
def test_tuning_lda(sagemaker_session):
    with timeout(minutes=TUNING_DEFAULT_TIMEOUT_MINUTES):
        data_path = os.path.join(DATA_DIR, "lda")
        data_filename = "nips-train_1.pbr"

        with open(os.path.join(data_path, data_filename), "rb") as f:
            all_records = read_records(f)

        # all records must be same
        feature_num = int(
            all_records[0].features["values"].float32_tensor.shape[0])

        lda = LDA(
            role="SageMakerRole",
            train_instance_type="ml.c4.xlarge",
            num_topics=10,
            sagemaker_session=sagemaker_session,
        )

        record_set = prepare_record_set_from_local_files(
            data_path, lda.data_location, len(all_records), feature_num,
            sagemaker_session)
        test_record_set = prepare_record_set_from_local_files(
            data_path, lda.data_location, len(all_records), feature_num,
            sagemaker_session)
        test_record_set.channel = "test"

        # specify which hp you want to optimize over
        hyperparameter_ranges = {
            "alpha0": ContinuousParameter(1, 10),
            "num_topics": IntegerParameter(1, 2),
        }
        objective_metric_name = "test:pwll"

        tuner = HyperparameterTuner(
            estimator=lda,
            objective_metric_name=objective_metric_name,
            hyperparameter_ranges=hyperparameter_ranges,
            objective_type="Maximize",
            max_jobs=2,
            max_parallel_jobs=2,
            early_stopping_type="Auto",
        )

        tuning_job_name = unique_name_from_base("test-lda", max_length=32)
        tuner.fit([record_set, test_record_set],
                  mini_batch_size=1,
                  job_name=tuning_job_name)

        latest_tuning_job_name = tuner.latest_tuning_job.name

        print("Started hyperparameter tuning job with name:" +
              latest_tuning_job_name)

        time.sleep(15)
        tuner.wait()

    desc = tuner.latest_tuning_job.sagemaker_session.sagemaker_client.describe_hyper_parameter_tuning_job(
        HyperParameterTuningJobName=latest_tuning_job_name)
    assert desc["HyperParameterTuningJobConfig"][
        "TrainingJobEarlyStoppingType"] == "Auto"

    best_training_job = tuner.best_training_job()
    with timeout_and_delete_endpoint_by_name(best_training_job,
                                             sagemaker_session):
        predictor = tuner.deploy(1, "ml.c4.xlarge")
        predict_input = np.random.rand(1, feature_num)
        result = predictor.predict(predict_input)

        assert len(result) == 1
        for record in result:
            assert record.label["topic_mixture"] is not None
Ejemplo n.º 24
0
        max_jobs=4,
        max_parallel_jobs=4)

    my_tuner.fit({
        'train': train_data,
        'test': test_data
    },
                 include_cls_metadata=False)

    my_tuner.wait()

    #sm_session = sagemaker.Session()
    #best_log = sm_session.logs_for_job(my_tuner.best_training_job())
    #print(best_log)
    best_model = '{}/{}/output/model.tar.gz'.format(
        output_prefix, my_tuner.best_training_job())

print('Best model: {}'.format(best_model))
#
# Save config files to be used later for qa and prod sagemaker endpoint configurations
# and for prediction tests
#
config_data_qa = {
    "Parameters": {
        "BucketName": bucket,
        "CommitID": commit_id,
        "Environment": "qa",
        "ParentStackName": stack_name,
        "ModelData": best_model,
        "ContainerImage": containers[boto3.Session().region_name],
        "Timestamp": current_timestamp
            'Value': 'ml.m5.xlarge / 4 / 1 / 1'
        }, {
            'Key': 'hypers choice',
            'Value': 'likelihood: deterministic-L1 / student-T'
        }])
    data_channels = {
        "train": "s3://{}/train/train-400.json".format(s3_data_path),
        "test": "s3://{}/test/test-400.json".format(s3_data_path)
    }
    tuner.fit(inputs=data_channels, wait=True)
    # tuner.fit(inputs=data_channels, wait=False)
    # boto3.client('sagemaker').describe_hyper_parameter_tuning_job(
    #                             HyperParameterTuningJobName=tuner.latest_tuning_job.job_name)

    # deploy endpoint using training model
    job_name = tuner.best_training_job(
    )  # 此处不能用tuner.latest_tuning_job.job_name,
    # 否则得到的不是job_name,而是Hyperparameter tuning jobs的name,会比job_name差几个最后的字符, 就会报"botocore.exceptions.ClientError: An
    # error occurred (ValidationException) when calling the DescribeTrainingJob operation: Requested resource not
    # found."的错误。
    endpoint_name = sagemaker_session.endpoint_from_job(
        job_name=job_name,
        initial_instance_count=1,
        instance_type='ml.m5.xlarge',
        image_uri=image_name,
        role=role,
        wait=True,
        name=job_name
    )  # name 必须与 job_name一致,否则会报"ValueError: Shape of passed values is (28, 9), indices imply (24,
    # 9)"的错误,括号内数字可能不同。

else:
Ejemplo n.º 26
0
# perform inference on a large number of samples. An example of this in industry
# might be performing an end of month report. This method of inference can also
# be useful to us as it means to can perform inference on our entire test set.
#
# Remember that in order to create a transformer object to perform the batch
# transform job, we need a trained estimator object. We can do that using the
# `attach()` method, creating an estimator object which is attached to the best
# trained job.

# In[ ]:

# TODO: Create a new estimator object attached to the best training job found
# during hyperparameter tuning

xgb_attached = sagemaker.estimator.Estimator.attach(
    xgb_hyperparameter_tuner.best_training_job())

# Now that we have an estimator object attached to the correct training job, we
# can proceed as we normally would and create a transformer object.

# In[ ]:

# TODO: Create a transformer object from the attached estimator. Using an instance
# count of 1 and an instance type of ml.m4.xlarge should be more than enough.

xgb_transformer = xgb_attached.transformer(instance_count=1,
                                           instance_type='ml.m4.xlarge')

# Next we actually perform the transform job. When doing so we need to make sure
# to specify the type of data we are sending so that it is serialized correctly
# in the background. In our case we are providing our model with csv data so we
Ejemplo n.º 27
0
def test_tuning_byo_estimator(sagemaker_session):
    """Use Factorization Machines algorithm as an example here.

    First we need to prepare data for training. We take standard data set, convert it to the
    format that the algorithm can process and upload it to S3.
    Then we create the Estimator and set hyperparamets as required by the algorithm.
    Next, we can call fit() with path to the S3.
    Later the trained model is deployed and prediction is called against the endpoint.
    Default predictor is updated with json serializer and deserializer.
    """
    image_name = registry(sagemaker_session.boto_session.region_name
                          ) + '/factorization-machines:1'
    training_data_path = os.path.join(DATA_DIR, 'dummy_tensor')

    with timeout(minutes=TUNING_DEFAULT_TIMEOUT_MINUTES):
        data_path = os.path.join(DATA_DIR, 'one_p_mnist', 'mnist.pkl.gz')
        pickle_args = {} if sys.version_info.major == 2 else {
            'encoding': 'latin1'
        }

        with gzip.open(data_path, 'rb') as f:
            train_set, _, _ = pickle.load(f, **pickle_args)

        prefix = 'test_byo_estimator'
        key = 'recordio-pb-data'
        s3_train_data = sagemaker_session.upload_data(path=training_data_path,
                                                      key_prefix=os.path.join(
                                                          prefix, 'train',
                                                          key))

        estimator = Estimator(image_name=image_name,
                              role='SageMakerRole',
                              train_instance_count=1,
                              train_instance_type='ml.c4.xlarge',
                              sagemaker_session=sagemaker_session,
                              base_job_name='test-byo')

        estimator.set_hyperparameters(num_factors=10,
                                      feature_dim=784,
                                      mini_batch_size=100,
                                      predictor_type='binary_classifier')

        hyperparameter_ranges = {'mini_batch_size': IntegerParameter(100, 200)}

        tuner = HyperparameterTuner(
            estimator=estimator,
            base_tuning_job_name='byo',
            objective_metric_name='test:binary_classification_accuracy',
            hyperparameter_ranges=hyperparameter_ranges,
            max_jobs=2,
            max_parallel_jobs=2)

        tuner.fit({
            'train': s3_train_data,
            'test': s3_train_data
        },
                  include_cls_metadata=False)

        print('Started hyperparameter tuning job with name:' +
              tuner.latest_tuning_job.name)

        time.sleep(15)
        tuner.wait()

    best_training_job = tuner.best_training_job()
    with timeout_and_delete_endpoint_by_name(best_training_job,
                                             sagemaker_session):
        predictor = tuner.deploy(1,
                                 'ml.m4.xlarge',
                                 endpoint_name=best_training_job)
        predictor.serializer = _fm_serializer
        predictor.content_type = 'application/json'
        predictor.deserializer = json_deserializer

        result = predictor.predict(train_set[0][:10])

        assert len(result['predictions']) == 10
        for prediction in result['predictions']:
            assert prediction['score'] is not None
Ejemplo n.º 28
0
def test_tuning_byo_estimator(sagemaker_session):
    """Use Factorization Machines algorithm as an example here.

    First we need to prepare data for training. We take standard data set, convert it to the
    format that the algorithm can process and upload it to S3.
    Then we create the Estimator and set hyperparamets as required by the algorithm.
    Next, we can call fit() with path to the S3.
    Later the trained model is deployed and prediction is called against the endpoint.
    Default predictor is updated with json serializer and deserializer.
    """
    image_name = registry(sagemaker_session.boto_session.region_name) + '/factorization-machines:1'
    training_data_path = os.path.join(DATA_DIR, 'dummy_tensor')

    with timeout(minutes=TUNING_DEFAULT_TIMEOUT_MINUTES):
        data_path = os.path.join(DATA_DIR, 'one_p_mnist', 'mnist.pkl.gz')
        pickle_args = {} if sys.version_info.major == 2 else {'encoding': 'latin1'}

        with gzip.open(data_path, 'rb') as f:
            train_set, _, _ = pickle.load(f, **pickle_args)

        prefix = 'test_byo_estimator'
        key = 'recordio-pb-data'
        s3_train_data = sagemaker_session.upload_data(path=training_data_path,
                                                      key_prefix=os.path.join(prefix, 'train', key))

        estimator = Estimator(image_name=image_name,
                              role='SageMakerRole', train_instance_count=1,
                              train_instance_type='ml.c4.xlarge',
                              sagemaker_session=sagemaker_session, base_job_name='test-byo')

        estimator.set_hyperparameters(num_factors=10,
                                      feature_dim=784,
                                      mini_batch_size=100,
                                      predictor_type='binary_classifier')

        hyperparameter_ranges = {'mini_batch_size': IntegerParameter(100, 200)}

        tuner = HyperparameterTuner(estimator=estimator, base_tuning_job_name='byo',
                                    objective_metric_name='test:binary_classification_accuracy',
                                    hyperparameter_ranges=hyperparameter_ranges,
                                    max_jobs=2, max_parallel_jobs=2)

        tuner.fit({'train': s3_train_data, 'test': s3_train_data}, include_cls_metadata=False)

        print('Started hyperparameter tuning job with name:' + tuner.latest_tuning_job.name)

        time.sleep(15)
        tuner.wait()

    best_training_job = tuner.best_training_job()
    with timeout_and_delete_endpoint_by_name(best_training_job, sagemaker_session):
        predictor = tuner.deploy(1, 'ml.m4.xlarge', endpoint_name=best_training_job)
        predictor.serializer = _fm_serializer
        predictor.content_type = 'application/json'
        predictor.deserializer = json_deserializer

        result = predictor.predict(train_set[0][:10])

        assert len(result['predictions']) == 10
        for prediction in result['predictions']:
            assert prediction['score'] is not None