def test_attach_tuning_job_with_estimator_from_kwarg(sagemaker_session):
    job_details = copy.deepcopy(TUNING_JOB_DETAILS)
    sagemaker_session.sagemaker_client.describe_hyper_parameter_tuning_job = Mock(name='describe_tuning_job',
                                                                                  return_value=job_details)
    tuner = HyperparameterTuner.attach(JOB_NAME, sagemaker_session=sagemaker_session,
                                       estimator_cls='sagemaker.estimator.Estimator')
    assert isinstance(tuner.estimator, Estimator)
def test_attach_tuning_job_with_estimator_from_kwarg(sagemaker_session):
    job_details = copy.deepcopy(TUNING_JOB_DETAILS)
    sagemaker_session.sagemaker_client.describe_hyper_parameter_tuning_job = Mock(name='describe_tuning_job',
                                                                                  return_value=job_details)
    tuner = HyperparameterTuner.attach(JOB_NAME, sagemaker_session=sagemaker_session,
                                       estimator_cls='sagemaker.estimator.Estimator')
    assert isinstance(tuner.estimator, Estimator)
Example #3
0
def test_attach_tuning_job_with_estimator_from_hyperparameters(sagemaker_session):
    job_details = copy.deepcopy(TUNING_JOB_DETAILS)
    sagemaker_session.sagemaker_client.describe_hyper_parameter_tuning_job = Mock(
        name="describe_tuning_job", return_value=job_details
    )
    tuner = HyperparameterTuner.attach(JOB_NAME, sagemaker_session=sagemaker_session)

    assert tuner.latest_tuning_job.name == JOB_NAME
    assert tuner.objective_metric_name == OBJECTIVE_METRIC_NAME
    assert tuner.max_jobs == 1
    assert tuner.max_parallel_jobs == 1
    assert tuner.metric_definitions == METRIC_DEFINITIONS
    assert tuner.strategy == "Bayesian"
    assert tuner.objective_type == "Minimize"
    assert tuner.early_stopping_type == "Off"

    assert isinstance(tuner.estimator, PCA)
    assert tuner.estimator.role == ROLE
    assert tuner.estimator.train_instance_count == 1
    assert tuner.estimator.train_max_run == 24 * 60 * 60
    assert tuner.estimator.input_mode == "File"
    assert tuner.estimator.output_path == BUCKET_NAME
    assert tuner.estimator.output_kms_key == ""

    assert "_tuning_objective_metric" not in tuner.estimator.hyperparameters()
    assert tuner.estimator.hyperparameters()["num_components"] == "10"
def test_attach_tuning_pytorch(sagemaker_session):
    mnist_dir = os.path.join(DATA_DIR, "pytorch_mnist")
    mnist_script = os.path.join(mnist_dir, "mnist.py")

    estimator = PyTorch(
        entry_point=mnist_script,
        role="SageMakerRole",
        train_instance_count=1,
        py_version=PYTHON_VERSION,
        train_instance_type="ml.c4.xlarge",
        sagemaker_session=sagemaker_session,
    )

    with timeout(minutes=TUNING_DEFAULT_TIMEOUT_MINUTES):
        objective_metric_name = "evaluation-accuracy"
        metric_definitions = [{
            "Name": "evaluation-accuracy",
            "Regex": r"Overall test accuracy: (\d+)"
        }]
        hyperparameter_ranges = {"batch-size": IntegerParameter(50, 100)}

        tuner = HyperparameterTuner(
            estimator,
            objective_metric_name,
            hyperparameter_ranges,
            metric_definitions,
            max_jobs=2,
            max_parallel_jobs=2,
            early_stopping_type="Auto",
        )

        training_data = estimator.sagemaker_session.upload_data(
            path=os.path.join(mnist_dir, "training"),
            key_prefix="integ-test-data/pytorch_mnist/training",
        )

        tuning_job_name = unique_name_from_base("pytorch", max_length=32)
        tuner.fit({"training": training_data}, job_name=tuning_job_name)

        print("Started hyperparameter tuning job with name:" + tuning_job_name)

        time.sleep(15)
        tuner.wait()

    attached_tuner = HyperparameterTuner.attach(
        tuning_job_name, sagemaker_session=sagemaker_session)
    assert attached_tuner.early_stopping_type == "Auto"

    best_training_job = tuner.best_training_job()
    with timeout_and_delete_endpoint_by_name(best_training_job,
                                             sagemaker_session):
        predictor = attached_tuner.deploy(1, "ml.c4.xlarge")
        data = np.zeros(shape=(1, 1, 28, 28), dtype=np.float32)
        predictor.predict(data)

        batch_size = 100
        data = np.random.rand(batch_size, 1, 28, 28).astype(np.float32)
        output = predictor.predict(data)

        assert output.shape == (batch_size, 10)
def test_attach_tuning_job_with_estimator_from_image(sagemaker_session):
    job_details = copy.deepcopy(TUNING_JOB_DETAILS)
    job_details['TrainingJobDefinition']['AlgorithmSpecification']['TrainingImage'] = '1111.amazonaws.com/pca:1'
    sagemaker_session.sagemaker_client.describe_hyper_parameter_tuning_job = Mock(name='describe_tuning_job',
                                                                                  return_value=job_details)

    tuner = HyperparameterTuner.attach(JOB_NAME, sagemaker_session=sagemaker_session)
    assert isinstance(tuner.estimator, PCA)
def test_attach_tuning_job_with_estimator_from_image(sagemaker_session):
    job_details = copy.deepcopy(TUNING_JOB_DETAILS)
    job_details['TrainingJobDefinition']['AlgorithmSpecification']['TrainingImage'] = '1111.amazonaws.com/pca:1'
    sagemaker_session.sagemaker_client.describe_hyper_parameter_tuning_job = Mock(name='describe_tuning_job',
                                                                                  return_value=job_details)

    tuner = HyperparameterTuner.attach(JOB_NAME, sagemaker_session=sagemaker_session)
    assert isinstance(tuner.estimator, PCA)
def test_attach_with_no_specified_estimator(sagemaker_session):
    job_details = copy.deepcopy(TUNING_JOB_DETAILS)
    del job_details['TrainingJobDefinition']['StaticHyperParameters']['sagemaker_estimator_module']
    del job_details['TrainingJobDefinition']['StaticHyperParameters']['sagemaker_estimator_class_name']
    sagemaker_session.sagemaker_client.describe_hyper_parameter_tuning_job = Mock(name='describe_tuning_job',
                                                                                  return_value=job_details)

    tuner = HyperparameterTuner.attach(JOB_NAME, sagemaker_session=sagemaker_session)
    assert isinstance(tuner.estimator, Estimator)
def test_attach_with_no_specified_estimator(sagemaker_session):
    job_details = copy.deepcopy(TUNING_JOB_DETAILS)
    del job_details['TrainingJobDefinition']['StaticHyperParameters']['sagemaker_estimator_module']
    del job_details['TrainingJobDefinition']['StaticHyperParameters']['sagemaker_estimator_class_name']
    sagemaker_session.sagemaker_client.describe_hyper_parameter_tuning_job = Mock(name='describe_tuning_job',
                                                                                  return_value=job_details)

    tuner = HyperparameterTuner.attach(JOB_NAME, sagemaker_session=sagemaker_session)
    assert isinstance(tuner.estimator, Estimator)
def test_identical_dataset_and_algorithm_tuner(sagemaker_session):
    job_details = copy.deepcopy(TUNING_JOB_DETAILS)
    sagemaker_session.sagemaker_client.describe_hyper_parameter_tuning_job = Mock(name='describe_tuning_job',
                                                                                  return_value=job_details)

    tuner = HyperparameterTuner.attach(JOB_NAME, sagemaker_session=sagemaker_session)
    parent_tuner = tuner.identical_dataset_and_algorithm_tuner(additional_parents={"p1", "p2"})
    assert parent_tuner.warm_start_config.type == WarmStartTypes.IDENTICAL_DATA_AND_ALGORITHM
    assert parent_tuner.warm_start_config.parents == {tuner.latest_tuning_job.name, "p1", "p2"}
Example #10
0
def test_attach_tuning_pytorch(sagemaker_session):
    mnist_dir = os.path.join(DATA_DIR, 'pytorch_mnist')
    mnist_script = os.path.join(mnist_dir, 'mnist.py')

    estimator = PyTorch(entry_point=mnist_script,
                        role='SageMakerRole',
                        train_instance_count=1,
                        py_version=PYTHON_VERSION,
                        train_instance_type='ml.c4.xlarge',
                        sagemaker_session=sagemaker_session)

    with timeout(minutes=TUNING_DEFAULT_TIMEOUT_MINUTES):
        objective_metric_name = 'evaluation-accuracy'
        metric_definitions = [{
            'Name': 'evaluation-accuracy',
            'Regex': r'Overall test accuracy: (\d+)'
        }]
        hyperparameter_ranges = {'batch-size': IntegerParameter(50, 100)}

        tuner = HyperparameterTuner(estimator,
                                    objective_metric_name,
                                    hyperparameter_ranges,
                                    metric_definitions,
                                    max_jobs=2,
                                    max_parallel_jobs=2,
                                    early_stopping_type='Auto')

        training_data = estimator.sagemaker_session.upload_data(
            path=os.path.join(mnist_dir, 'training'),
            key_prefix='integ-test-data/pytorch_mnist/training')

        tuning_job_name = unique_name_from_base('pytorch', max_length=32)
        tuner.fit({'training': training_data}, job_name=tuning_job_name)

        print('Started hyperparameter tuning job with name:' + tuning_job_name)

        time.sleep(15)
        tuner.wait()

    attached_tuner = HyperparameterTuner.attach(
        tuning_job_name, sagemaker_session=sagemaker_session)
    assert attached_tuner.early_stopping_type == 'Auto'

    best_training_job = tuner.best_training_job()
    with timeout_and_delete_endpoint_by_name(best_training_job,
                                             sagemaker_session):
        predictor = attached_tuner.deploy(1, 'ml.c4.xlarge')
        data = np.zeros(shape=(1, 1, 28, 28), dtype=np.float32)
        predictor.predict(data)

        batch_size = 100
        data = np.random.rand(batch_size, 1, 28, 28).astype(np.float32)
        output = predictor.predict(data)

        assert output.shape == (batch_size, 10)
def test_attach_tuning_job_with_estimator_from_hyperparameters_with_early_stopping(sagemaker_session):
    job_details = copy.deepcopy(TUNING_JOB_DETAILS)
    job_details['HyperParameterTuningJobConfig']['TrainingJobEarlyStoppingType'] = 'Auto'
    sagemaker_session.sagemaker_client.describe_hyper_parameter_tuning_job = Mock(name='describe_tuning_job',
                                                                                  return_value=job_details)
    tuner = HyperparameterTuner.attach(JOB_NAME, sagemaker_session=sagemaker_session)

    assert tuner.latest_tuning_job.name == JOB_NAME
    assert tuner.early_stopping_type == 'Auto'

    assert isinstance(tuner.estimator, PCA)
def test_transfer_learning_tuner(sagemaker_session):
    job_details = copy.deepcopy(TUNING_JOB_DETAILS)
    sagemaker_session.sagemaker_client.describe_hyper_parameter_tuning_job = Mock(name='describe_tuning_job',
                                                                                  return_value=job_details)

    tuner = HyperparameterTuner.attach(JOB_NAME, sagemaker_session=sagemaker_session)
    parent_tuner = tuner.transfer_learning_tuner(additional_parents={"p1", "p2"})

    assert parent_tuner.warm_start_config.type == WarmStartTypes.TRANSFER_LEARNING
    assert parent_tuner.warm_start_config.parents == {tuner.latest_tuning_job.name, "p1", "p2"}
    assert parent_tuner.estimator == tuner.estimator
def test_attach_with_warm_start_config(sagemaker_session):
    warm_start_config = WarmStartConfig(warm_start_type=WarmStartTypes.TRANSFER_LEARNING, parents={"p1", "p2"})
    job_details = copy.deepcopy(TUNING_JOB_DETAILS)
    job_details["WarmStartConfig"] = warm_start_config.to_input_req()

    sagemaker_session.sagemaker_client.describe_hyper_parameter_tuning_job = Mock(name='describe_tuning_job',
                                                                                  return_value=job_details)

    tuner = HyperparameterTuner.attach(JOB_NAME, sagemaker_session=sagemaker_session)
    assert tuner.warm_start_config.type == warm_start_config.type
    assert tuner.warm_start_config.parents == warm_start_config.parents
Example #14
0
def _attach_tuner(sagemaker_session, tuning_job_name):
    print("Attaching hyperparameter tuning job {} to a new tuner instance".
          format(tuning_job_name))
    return HyperparameterTuner.attach(
        tuning_job_name,
        sagemaker_session=sagemaker_session,
        estimator_cls={
            ESTIMATOR_FM: "sagemaker.estimator.Estimator",
            ESTIMATOR_KNN: "sagemaker.estimator.Estimator",
        },
    )
def test_attach_tuning_pytorch(sagemaker_session):
    mnist_dir = os.path.join(DATA_DIR, 'pytorch_mnist')
    mnist_script = os.path.join(mnist_dir, 'mnist.py')

    estimator = PyTorch(entry_point=mnist_script, role='SageMakerRole', train_instance_count=1,
                        train_instance_type='ml.c4.xlarge', sagemaker_session=sagemaker_session)

    with timeout(minutes=TUNING_DEFAULT_TIMEOUT_MINUTES):
        objective_metric_name = 'evaluation-accuracy'
        metric_definitions = [{'Name': 'evaluation-accuracy', 'Regex': 'Overall test accuracy: (\d+)'}]
        hyperparameter_ranges = {'batch-size': IntegerParameter(50, 100)}

        tuner = HyperparameterTuner(estimator, objective_metric_name, hyperparameter_ranges, metric_definitions,
                                    max_jobs=2, max_parallel_jobs=2)

        training_data = estimator.sagemaker_session.upload_data(path=os.path.join(mnist_dir, 'training'),
                                                                key_prefix='integ-test-data/pytorch_mnist/training')
        tuner.fit({'training': training_data})

        tuning_job_name = tuner.latest_tuning_job.name

        print('Started hyperparameter tuning job with name:' + tuning_job_name)

        time.sleep(15)
        tuner.wait()

    attached_tuner = HyperparameterTuner.attach(tuning_job_name, sagemaker_session=sagemaker_session)
    best_training_job = tuner.best_training_job()
    with timeout_and_delete_endpoint_by_name(best_training_job, sagemaker_session):
        predictor = attached_tuner.deploy(1, 'ml.c4.xlarge')
        data = np.zeros(shape=(1, 1, 28, 28), dtype=np.float32)
        predictor.predict(data)

        batch_size = 100
        data = np.random.rand(batch_size, 1, 28, 28).astype(np.float32)
        output = predictor.predict(data)

        assert output.shape == (batch_size, 10)
def test_attach_tuning_job_with_estimator_from_hyperparameters(sagemaker_session):
    job_details = copy.deepcopy(TUNING_JOB_DETAILS)
    sagemaker_session.sagemaker_client.describe_hyper_parameter_tuning_job = Mock(name='describe_tuning_job',
                                                                                  return_value=job_details)
    tuner = HyperparameterTuner.attach(JOB_NAME, sagemaker_session=sagemaker_session)

    assert tuner.latest_tuning_job.name == JOB_NAME
    assert tuner.objective_metric_name == OBJECTIVE_METRIC_NAME
    assert tuner.max_jobs == 1
    assert tuner.max_parallel_jobs == 1
    assert tuner.metric_definitions == METRIC_DEFINTIONS
    assert tuner.strategy == 'Bayesian'
    assert tuner.objective_type == 'Minimize'

    assert isinstance(tuner.estimator, PCA)
    assert tuner.estimator.role == ROLE
    assert tuner.estimator.train_instance_count == 1
    assert tuner.estimator.train_max_run == 24 * 60 * 60
    assert tuner.estimator.input_mode == 'File'
    assert tuner.estimator.output_path == BUCKET_NAME
    assert tuner.estimator.output_kms_key == ''

    assert '_tuning_objective_metric' not in tuner.estimator.hyperparameters()
    assert tuner.estimator.hyperparameters()['num_components'] == '1'
def test_attach_tuning_job_with_job_details(sagemaker_session):
    job_details = copy.deepcopy(TUNING_JOB_DETAILS)
    HyperparameterTuner.attach(JOB_NAME, sagemaker_session=sagemaker_session, job_details=job_details)
    sagemaker_session.sagemaker_client.describe_hyper_parameter_tuning_job.assert_not_called
def test_attach_tuning_job_with_job_details(sagemaker_session):
    job_details = copy.deepcopy(TUNING_JOB_DETAILS)
    HyperparameterTuner.attach(JOB_NAME, sagemaker_session=sagemaker_session, job_details=job_details)
    sagemaker_session.sagemaker_client.describe_hyper_parameter_tuning_job.assert_not_called
Example #19
0
def test_tuning_lda(sagemaker_session, cpu_instance_type):
    with timeout(minutes=TUNING_DEFAULT_TIMEOUT_MINUTES):
        data_path = os.path.join(DATA_DIR, "lda")
        data_filename = "nips-train_1.pbr"

        with open(os.path.join(data_path, data_filename), "rb") as f:
            all_records = read_records(f)

        # all records must be same
        feature_num = int(
            all_records[0].features["values"].float32_tensor.shape[0])

        lda = LDA(
            role="SageMakerRole",
            instance_type=cpu_instance_type,
            num_topics=10,
            sagemaker_session=sagemaker_session,
        )

        record_set = prepare_record_set_from_local_files(
            data_path, lda.data_location, len(all_records), feature_num,
            sagemaker_session)
        test_record_set = prepare_record_set_from_local_files(
            data_path, lda.data_location, len(all_records), feature_num,
            sagemaker_session)
        test_record_set.channel = "test"

        # specify which hp you want to optimize over
        hyperparameter_ranges = {
            "alpha0": ContinuousParameter(1, 10),
            "num_topics": IntegerParameter(1, 2),
        }
        objective_metric_name = "test:pwll"

        tuner = HyperparameterTuner(
            estimator=lda,
            objective_metric_name=objective_metric_name,
            hyperparameter_ranges=hyperparameter_ranges,
            objective_type="Maximize",
            max_jobs=2,
            max_parallel_jobs=2,
            early_stopping_type="Auto",
        )

        tuning_job_name = unique_name_from_base("test-lda", max_length=32)
        print("Started hyperparameter tuning job with name:" + tuning_job_name)
        tuner.fit([record_set, test_record_set],
                  mini_batch_size=1,
                  job_name=tuning_job_name)

    attached_tuner = HyperparameterTuner.attach(
        tuning_job_name, sagemaker_session=sagemaker_session)
    assert attached_tuner.early_stopping_type == "Auto"
    assert attached_tuner.estimator.alpha0 == 1.0
    assert attached_tuner.estimator.num_topics == 1

    best_training_job = attached_tuner.best_training_job()

    with timeout_and_delete_endpoint_by_name(best_training_job,
                                             sagemaker_session):
        predictor = tuner.deploy(1, cpu_instance_type)
        predict_input = np.random.rand(1, feature_num)
        result = predictor.predict(predict_input)

        assert len(result) == 1
        for record in result:
            assert record.label["topic_mixture"] is not None