コード例 #1
0
def test_continuous_parameter_ranges():
    cont_param = ContinuousParameter(0.1, 1e-2)
    ranges = cont_param.as_tuning_range('some')
    assert len(ranges.keys()) == 3
    assert ranges['Name'] == 'some'
    assert ranges['MinValue'] == '0.1'
    assert ranges['MaxValue'] == '0.01'
コード例 #2
0
def test_continuous_parameter_ranges():
    cont_param = ContinuousParameter(0.1, 1e-2)
    ranges = cont_param.as_tuning_range("some")
    assert len(ranges.keys()) == 4
    assert ranges["Name"] == "some"
    assert ranges["MinValue"] == "0.1"
    assert ranges["MaxValue"] == "0.01"
    assert ranges["ScalingType"] == "Auto"
コード例 #3
0
class AwsLinearLearner(AwsEstimator):
    container_name: str = "linear-learner"
    name: str = "linear_learner"
    default_hyperparameter_tuning: Dict[str, Any] = {
        "learning_rate": ContinuousParameter(0.01, 0.2),
        "mini_batch_size": IntegerParameter(250, 5000),
        "use_bias": CategoricalParameter([True, False]),
    }
    default_tuning_job_config = {
        "max_jobs": 20,
        "max_parallel_jobs": 3,
        "objective_metric_name": "validation:objective_loss",
        "objective_type": "Minimize",
    }

    def _load_results(self, file_name: str) -> DataFrame:
        """
        Extension of the results to remove the score dict
        Arguments and return value the same as superclass
        """
        initial_df = super()._load_results(file_name)
        for _, row in initial_df.iterrows():
            try:
                row[0] = row[0].replace('{"score":', "").replace("}", "")
            except IndexError:
                pass
        initial_df = initial_df.astype("float32")
        return initial_df
コード例 #4
0
ファイル: test_sagemaker.py プロジェクト: toren77/sagify
def test_hyperparameter_optimization_happy_case():
    with patch(
            'boto3.Session'
    ):
        with patch(
                'sagemaker.Session'
        ) as mocked_sagemaker_session:
            sagemaker_session_instance = mocked_sagemaker_session.return_value

            with patch(
                    'sagemaker.get_execution_role',
                    return_value='arn_role'
            ):
                with patch(
                        'sagemaker.estimator.Estimator'
                ) as mocked_sagemaker_estimator:
                    with patch(
                            'sagify.sagemaker.sagemaker.SageMakerClient._construct_image_location',
                            return_value='image-full-name'
                    ):
                        with patch(
                                'sagemaker.tuner.HyperparameterTuner'
                        ) as mocked_sagemaker_tuner:
                            sage_maker_client = sagemaker.SageMakerClient('sagemaker', 'us-east-1')
                            sage_maker_client.hyperparameter_optimization(
                                image_name='image',
                                input_s3_data_location='s3://bucket/input',
                                instance_count=1,
                                instance_type='m1.xlarge',
                                volume_size=30,
                                max_run=60,
                                max_jobs=3,
                                max_parallel_jobs=2,
                                output_path='s3://bucket/output',
                                objective_type='Maximize',
                                objective_metric_name='Precision',
                                hyperparams_ranges_dict={
                                    'lr': ContinuousParameter(0.001, 0.1),
                                    'batch-size': CategoricalParameter([32, 64, 128, 256, 512])
                                },
                                base_job_name="Some-job-name-prefix",
                                job_name="some job name"
                            )
                            mocked_sagemaker_estimator.assert_called_with(
                                image_name='image-full-name',
                                role='arn_role',
                                train_instance_count=1,
                                train_instance_type='m1.xlarge',
                                train_volume_size=30,
                                train_max_run=60,
                                input_mode='File',
                                output_path='s3://bucket/output',
                                sagemaker_session=sagemaker_session_instance
                            )

                            mocked_sagemaker_tuner_instance = mocked_sagemaker_tuner.return_value
                            assert mocked_sagemaker_tuner_instance.fit.call_count == 1
                            mocked_sagemaker_tuner_instance.fit.assert_called_with(
                                's3://bucket/input', job_name='some job name'
                            )
コード例 #5
0
def test_estimator_metric_definitions():
    estimator = DummyEstimator(model="hello",
                               job="world",
                               hyperparameters=dict())

    hyperparameter_ranges = {"sample": ContinuousParameter(1.0, 2.0)}
    tuner = estimator.get_sagemaker_tuner(
        hyperparameter_ranges=hyperparameter_ranges)

    assert tuner.metric_definitions == estimator.get_tuner_metric_definitions()
コード例 #6
0
def test_estimator_get_sagemaker_tuner_default_values():
    estimator = DummyEstimator(model="hello",
                               job="world",
                               hyperparameters=dict())

    hyperparameter_ranges = {"sample": ContinuousParameter(1.0, 2.0)}
    tuner = estimator.get_sagemaker_tuner(
        hyperparameter_ranges=hyperparameter_ranges)

    assert tuner.objective_type == "Minimize"
    assert tuner.max_jobs == 1
    assert tuner.max_parallel_jobs == 1
コード例 #7
0
    def _prepare_parameter_ranges(cls, parameter_ranges):
        ranges = {}

        for parameter in parameter_ranges['CategoricalParameterRanges']:
            ranges[parameter['Name']] = CategoricalParameter(parameter['Values'])

        for parameter in parameter_ranges['ContinuousParameterRanges']:
            ranges[parameter['Name']] = ContinuousParameter(float(parameter['MinValue']), float(parameter['MaxValue']))

        for parameter in parameter_ranges['IntegerParameterRanges']:
            ranges[parameter['Name']] = IntegerParameter(int(parameter['MinValue']), int(parameter['MaxValue']))

        return ranges
コード例 #8
0
    def _prepare_parameter_ranges(cls, parameter_ranges):
        ranges = {}

        for parameter in parameter_ranges["CategoricalParameterRanges"]:
            ranges[parameter["Name"]] = CategoricalParameter(
                parameter["Values"])

        for parameter in parameter_ranges["ContinuousParameterRanges"]:
            ranges[parameter["Name"]] = ContinuousParameter(
                float(parameter["MinValue"]), float(parameter["MaxValue"]))

        for parameter in parameter_ranges["IntegerParameterRanges"]:
            ranges[parameter["Name"]] = IntegerParameter(
                int(parameter["MinValue"]), int(parameter["MaxValue"]))

        return ranges
コード例 #9
0
    def test_sagemaker_transform_step_successfully(self, m_default_bucket):

        m_default_bucket.return_value = "sagemaker-bucket-name"

        with DataJobStack(scope=self.app, id="some-stack", stage="stg") as djs:
            transformer = Transformer(
                model_name="some-model",
                instance_count=1,
                instance_type="ml.t2.medium",
                sagemaker_session=self.sagemaker_session,
            )

            transform_step = TransformStep(
                datajob_stack=djs,
                name="transform-job",
                transformer=transformer,
                data="s3://some-bucket/some-data.csv",
            )

            estimator = SKLearn(
                entry_point=str(
                    pathlib.Path(current_dir, "resources", "train.py")),
                train_instance_type="ml.m5.xlarge",
                role=self.role,
                framework_version="0.20.0",
                py_version="py3",
                sagemaker_session=self.sagemaker_session,
            )

            tuner = HyperparameterTuner(
                estimator=estimator,
                hyperparameter_ranges={
                    "alpha": ContinuousParameter(0.0001, 0.05)
                },
                objective_metric_name="rmse",
            )

            tuner_step = TuningStep(
                datajob_stack=djs,
                name="tuning-step",
                tuner=tuner,
                data="s3://some-bucket/some-data.csv",
            )

            with StepfunctionsWorkflow(djs, "sequential") as sfn_workflow:
                transform_step >> tuner_step
コード例 #10
0
def _read_hyperparams_ranges_config(hyperparams_config_file_path):
    if not os.path.isfile(hyperparams_config_file_path):
        raise ValueError("The given hyperparams file {} doens't exist".format(
            hyperparams_config_file_path))

    with open(hyperparams_config_file_path) as _in_file:
        hyperparams_config_dict = json.load(_in_file)

    if 'ParameterRanges' not in hyperparams_config_dict:
        raise ValueError("ParameterRanges not in the hyperparams file")

    parameter_ranges_dict = hyperparams_config_dict['ParameterRanges']

    if not parameter_ranges_dict:
        raise ValueError("Empty ParameterRanges in the hyperparams file")

    if 'ObjectiveMetric' not in hyperparams_config_dict and 'Name' not in hyperparams_config_dict[
            'ObjectiveMetric']:
        raise ValueError("ObjectiveMetric not in the hyperparams file")

    objective_name = hyperparams_config_dict['ObjectiveMetric']['Name']
    objective_type = hyperparams_config_dict['ObjectiveMetric']['Type']

    hyperparameter_ranges = {}

    categorical_param_ranges_dict = parameter_ranges_dict[
        'CategoricalParameterRanges']
    for _dict in categorical_param_ranges_dict:
        hyperparameter_ranges[_dict['Name']] = CategoricalParameter(
            _dict['Values'])

    integer_param_ranges_dict = parameter_ranges_dict['IntegerParameterRanges']
    for _dict in integer_param_ranges_dict:
        hyperparameter_ranges[_dict['Name']] = IntegerParameter(
            _dict['MinValue'], _dict['MaxValue'])

    continuous_param_ranges_dict = parameter_ranges_dict[
        'ContinuousParameterRanges']
    for _dict in continuous_param_ranges_dict:
        hyperparameter_ranges[_dict['Name']] = ContinuousParameter(
            _dict['MinValue'], _dict['MaxValue'])

    return objective_name, objective_type, hyperparameter_ranges
コード例 #11
0
def test_continuous_parameter():
    cont_param = ContinuousParameter(0.1, 1e-2)
    assert isinstance(cont_param, ParameterRange)
    assert cont_param.__name__ == "Continuous"
コード例 #12
0
REGION = "us-west-2"
BUCKET_NAME = "Some-Bucket"
ROLE = "myrole"
IMAGE_NAME = "image"

TRAIN_INSTANCE_COUNT = 1
TRAIN_INSTANCE_TYPE = "ml.c4.xlarge"
NUM_COMPONENTS = 5

SCRIPT_NAME = "my_script.py"
FRAMEWORK_VERSION = "1.0.0"

INPUTS = "s3://mybucket/train"
OBJECTIVE_METRIC_NAME = "mock_metric"
HYPERPARAMETER_RANGES = {
    "validated": ContinuousParameter(0, 5),
    "elizabeth": IntegerParameter(0, 5),
    "blank": CategoricalParameter([0, 5]),
}
METRIC_DEFINITIONS = "mock_metric_definitions"

TUNING_JOB_DETAILS = {
    "HyperParameterTuningJobConfig": {
        "ResourceLimits": {"MaxParallelTrainingJobs": 1, "MaxNumberOfTrainingJobs": 1},
        "HyperParameterTuningJobObjective": {
            "MetricName": OBJECTIVE_METRIC_NAME,
            "Type": "Minimize",
        },
        "Strategy": "Bayesian",
        "ParameterRanges": {
            "CategoricalParameterRanges": [],
コード例 #13
0
JOB_NAME = 'tuning_job'
REGION = 'us-west-2'
BUCKET_NAME = 'Some-Bucket'
ROLE = 'myrole'
IMAGE_NAME = 'image'

TRAIN_INSTANCE_COUNT = 1
TRAIN_INSTANCE_TYPE = 'ml.c4.xlarge'
NUM_COMPONENTS = 5

SCRIPT_NAME = 'my_script.py'
FRAMEWORK_VERSION = '1.0.0'

INPUTS = 's3://mybucket/train'
OBJECTIVE_METRIC_NAME = 'mock_metric'
HYPERPARAMETER_RANGES = {'validated': ContinuousParameter(0, 5),
                         'elizabeth': IntegerParameter(0, 5),
                         'blank': CategoricalParameter([0, 5])}
METRIC_DEFINITIONS = 'mock_metric_definitions'

TUNING_JOB_DETAILS = {
    'HyperParameterTuningJobConfig': {
        'ResourceLimits': {
            'MaxParallelTrainingJobs': 1,
            'MaxNumberOfTrainingJobs': 1
        },
        'HyperParameterTuningJobObjective': {
            'MetricName': OBJECTIVE_METRIC_NAME,
            'Type': 'Minimize'
        },
        'Strategy': 'Bayesian',
コード例 #14
0
def test_continuous_parameter_scaling_type():
    cont_param = ContinuousParameter(0.1, 2, scaling_type='ReverseLogarithmic')
    cont_range = cont_param.as_tuning_range('range')
    assert cont_range['ScalingType'] == 'ReverseLogarithmic'
コード例 #15
0
# In[ ]:

from sagemaker.parameter import (
    CategoricalParameter,
    ContinuousParameter,
    IntegerParameter,
    ParameterRange,
)
from sagemaker.amazon.hyperparameter import Hyperparameter
from sagemaker.tuner import HyperparameterTuner
import sagemaker

hyperparameter_ranges = {
    'learning_rate': ContinuousParameter(0.0001,
                                         0.1,
                                         scaling_type='Logarithmic'),
    'use_bias': CategoricalParameter([True, False])
}

# Next, you'll specify the objective metric that you'd like to tune and its definition, which includes the regular expression (regex) needed to extract that metric from the Amazon CloudWatch logs of the training job.
#
# Because you are using the built-in linear learner algorithm, it emits two predefined metrics that you have used before: **test: mse** and **test: absolute_loss**. You will elect to monitor **test:mse**. In this case, you only need to specify the metric name and do not need to provide regex. If you bring your own algorithm, your algorithm emits metrics by itself. In that case, you would need to add a metric definition object to define the format of those metrics through regex, so that Amazon SageMaker knows how to extract those metrics from your CloudWatch logs.

# In[ ]:

objective_metric_name = 'test:mse'
objective_type = 'Minimize'

# Now, create a HyperparameterTuner object, to which you will pass the following:
# - The Linear_model estimator created previously
コード例 #16
0
def test_continuous_parameter_scaling_type():
    cont_param = ContinuousParameter(0.1, 2, scaling_type="ReverseLogarithmic")
    cont_range = cont_param.as_tuning_range("range")
    assert cont_range["ScalingType"] == "ReverseLogarithmic"
コード例 #17
0
REGION = 'us-west-2'
BUCKET_NAME = 'Some-Bucket'
ROLE = 'myrole'
IMAGE_NAME = 'image'

TRAIN_INSTANCE_COUNT = 1
TRAIN_INSTANCE_TYPE = 'ml.c4.xlarge'
NUM_COMPONENTS = 5

SCRIPT_NAME = 'my_script.py'
FRAMEWORK_VERSION = '1.0.0'

INPUTS = 's3://mybucket/train'
OBJECTIVE_METRIC_NAME = 'mock_metric'
HYPERPARAMETER_RANGES = {
    'validated': ContinuousParameter(0, 5),
    'elizabeth': IntegerParameter(0, 5),
    'blank': CategoricalParameter([0, 5])
}
METRIC_DEFINTIONS = 'mock_metric_definitions'

TUNING_JOB_DETAILS = {
    'HyperParameterTuningJobConfig': {
        'ResourceLimits': {
            'MaxParallelTrainingJobs': 1,
            'MaxNumberOfTrainingJobs': 1
        },
        'HyperParameterTuningJobObjective': {
            'MetricName': OBJECTIVE_METRIC_NAME,
            'Type': 'Minimize'
        },