Python Estimator.Estimator Exemples, sagemaker.estimator.Estimator.Estimator Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : test_estimator.py Projet : masry707/sagemaker-python-sdk

def test_generic_deploy_vpc_config_override(sagemaker_session):
    vpc_config_a = {'Subnets': ['foo'], 'SecurityGroupIds': ['bar']}
    vpc_config_b = {'Subnets': ['foo', 'bar'], 'SecurityGroupIds': ['baz']}

    e = Estimator(IMAGE_NAME,
                  ROLE,
                  INSTANCE_COUNT,
                  INSTANCE_TYPE,
                  sagemaker_session=sagemaker_session)
    e.fit({'train': 's3://bucket/training-prefix'})
    e.deploy(INSTANCE_COUNT, INSTANCE_TYPE)
    assert sagemaker_session.create_model.call_args_list[0][1][
        'vpc_config'] is None

    e.subnets = vpc_config_a['Subnets']
    e.security_group_ids = vpc_config_a['SecurityGroupIds']
    e.deploy(INSTANCE_COUNT, INSTANCE_TYPE)
    assert sagemaker_session.create_model.call_args_list[1][1][
        'vpc_config'] == vpc_config_a

    e.deploy(INSTANCE_COUNT, INSTANCE_TYPE, vpc_config_override=vpc_config_b)
    assert sagemaker_session.create_model.call_args_list[2][1][
        'vpc_config'] == vpc_config_b

    e.deploy(INSTANCE_COUNT, INSTANCE_TYPE, vpc_config_override=None)
    assert sagemaker_session.create_model.call_args_list[3][1][
        'vpc_config'] is None

Exemple #2

0

Afficher le fichier

Fichier : test_estimator.py Projet : jnclt/sagemaker-python-sdk

def test_generic_to_deploy(sagemaker_session):
    e = Estimator(IMAGE_NAME,
                  ROLE,
                  INSTANCE_COUNT,
                  INSTANCE_TYPE,
                  output_path=OUTPUT_PATH,
                  sagemaker_session=sagemaker_session)

    e.set_hyperparameters(**HYPERPARAMS)

    e.fit({'train': 's3://bucket/training-prefix'})

    predictor = e.deploy(INSTANCE_COUNT, INSTANCE_TYPE)

    sagemaker_session.train.assert_called_once()
    assert len(sagemaker_session.train.call_args[0]) == 0
    args = sagemaker_session.train.call_args[1]
    assert args['job_name'].startswith(IMAGE_NAME)

    args.pop('job_name')
    args.pop('role')

    assert args == HP_TRAIN_CALL

    sagemaker_session.create_model.assert_called_once()
    args = sagemaker_session.create_model.call_args[0]
    assert args[0].startswith(IMAGE_NAME)
    assert args[1] == ROLE
    assert args[2]['Image'] == IMAGE_NAME
    assert args[2]['ModelDataUrl'] == MODEL_DATA

    assert isinstance(predictor, RealTimePredictor)
    assert predictor.endpoint.startswith(IMAGE_NAME)
    assert predictor.sagemaker_session == sagemaker_session

Exemple #3

0

Afficher le fichier

Fichier : test_estimator.py Projet : masry707/sagemaker-python-sdk

def test_generic_create_model_vpc_config_override(sagemaker_session):
    vpc_config_a = {'Subnets': ['foo'], 'SecurityGroupIds': ['bar']}
    vpc_config_b = {'Subnets': ['foo', 'bar'], 'SecurityGroupIds': ['baz']}

    e = Estimator(IMAGE_NAME,
                  ROLE,
                  INSTANCE_COUNT,
                  INSTANCE_TYPE,
                  sagemaker_session=sagemaker_session)
    e.fit({'train': 's3://bucket/training-prefix'})
    assert e.get_vpc_config() is None
    assert e.create_model().vpc_config is None
    assert e.create_model(
        vpc_config_override=vpc_config_a).vpc_config == vpc_config_a
    assert e.create_model(vpc_config_override=None).vpc_config is None

    e.subnets = vpc_config_a['Subnets']
    e.security_group_ids = vpc_config_a['SecurityGroupIds']
    assert e.get_vpc_config() == vpc_config_a
    assert e.create_model().vpc_config == vpc_config_a
    assert e.create_model(
        vpc_config_override=vpc_config_b).vpc_config == vpc_config_b
    assert e.create_model(vpc_config_override=None).vpc_config is None

    with pytest.raises(ValueError):
        e.get_vpc_config(vpc_config_override={'invalid'})
    with pytest.raises(ValueError):
        e.create_model(vpc_config_override={'invalid'})

Exemple #4

0

Afficher le fichier

Fichier : test_steps.py Projet : easyj2j/sagemaker-python-sdk

def test_training_step(sagemaker_session):
    estimator = Estimator(
        image_uri=IMAGE_URI,
        role=ROLE,
        instance_count=1,
        instance_type="c4.4xlarge",
        profiler_config=ProfilerConfig(system_monitor_interval_millis=500),
        rules=[],
        sagemaker_session=sagemaker_session,
    )
    inputs = TrainingInput(f"s3://{BUCKET}/train_manifest")
    cache_config = CacheConfig(enable_caching=True, expire_after="PT1H")
    step = TrainingStep(name="MyTrainingStep",
                        estimator=estimator,
                        inputs=inputs,
                        cache_config=cache_config)
    assert step.to_request() == {
        "Name": "MyTrainingStep",
        "Type": "Training",
        "Arguments": {
            "AlgorithmSpecification": {
                "TrainingImage": IMAGE_URI,
                "TrainingInputMode": "File"
            },
            "InputDataConfig": [{
                "ChannelName": "training",
                "DataSource": {
                    "S3DataSource": {
                        "S3DataDistributionType": "FullyReplicated",
                        "S3DataType": "S3Prefix",
                        "S3Uri": f"s3://{BUCKET}/train_manifest",
                    }
                },
            }],
            "OutputDataConfig": {
                "S3OutputPath": f"s3://{BUCKET}/"
            },
            "ResourceConfig": {
                "InstanceCount": 1,
                "InstanceType": "c4.4xlarge",
                "VolumeSizeInGB": 30,
            },
            "RoleArn":
            ROLE,
            "StoppingCondition": {
                "MaxRuntimeInSeconds": 86400
            },
            "ProfilerConfig": {
                "ProfilingIntervalInMilliseconds": 500,
                "S3OutputPath": f"s3://{BUCKET}/",
            },
        },
        "CacheConfig": {
            "Enabled": True,
            "ExpireAfter": "PT1H"
        },
    }
    assert step.properties.TrainingJobName.expr == {
        "Get": "Steps.MyTrainingStep.TrainingJobName"
    }

Exemple #5

0

Afficher le fichier

Fichier : test_estimator.py Projet : shotarok/sagemaker-python-sdk

def test_distributed_gpu_local_mode(LocalSession):
    with pytest.raises(RuntimeError):
        Estimator(IMAGE_NAME,
                  ROLE,
                  3,
                  'local_gpu',
                  output_path='s3://bucket/prefix')

Exemple #6

0

Afficher le fichier

Fichier : test_model_training.py Projet : kimoyerr/penguin-sagemaker

def test_xgb_train_container_cpu(sagemaker_session, instance_type):
    training_data_path = os.path.join(test_dir, 'resources/data/')
    estimator = Estimator(role=ROLE,
                          sagemaker_session=sagemaker_session,
                          train_instance_count=1,
                          train_instance_type=instance_type,
                          image_name=XGB_IMAGE_NAME,
                          output_path=MODEL_SAVE_PATH,
                          hyperparameters={
                              "train-file": "penguins.csv",
                              "max-depth": 3,
                              "categorical-columns": 'island,sex'
                          })

    inputs = estimator.sagemaker_session.upload_data(path=os.path.join(
        training_data_path, 'penguins.csv'),
                                                     bucket=BUCKET_NAME,
                                                     key_prefix='penguins/tmp')
    estimator.fit(
        inputs, job_name=unique_name_from_base('test-sagemaker-xgb-training'))

    # Clean up the models folder and re-create it
    if os.path.exists(os.path.join(test_dir, 'resources/models_tar')):
        shutil.rmtree(os.path.join(test_dir, 'resources/models_tar'))
        os.mkdir(os.path.join(test_dir, 'resources/models_tar'))

    # Download the model files
    obj_name = os.path.relpath(estimator.model_data, 's3://' + BUCKET_NAME)
    s3.Bucket(BUCKET_NAME).download_file(
        obj_name, os.path.join(test_dir, 'resources/models_tar/model.tar.gz'))

    _assert_s3_file_exists(sagemaker_session.boto_region_name,
                           estimator.model_data)

Exemple #7

0

Afficher le fichier

def main():
    download_training_and_eval_data()

    image = 'sagemaker-tensorflow2-local'

    print('Starting model training.')
    california_housing_estimator = Estimator(
        image,
        DUMMY_IAM_ROLE,
        hyperparameters={'epochs': 10,
                         'batch_size': 64,
                         'learning_rate': 0.1},
        instance_count=1,
        instance_type="local")

    inputs = {'train': 'file://./data/train', 'test': 'file://./data/test'}
    california_housing_estimator.fit(inputs, logs=True)
    print('Completed model training')

    print('Deploying endpoint in local mode')
    predictor = california_housing_estimator.deploy(initial_instance_count=1, instance_type='local')

    do_inference_on_local_endpoint(predictor)

    print('About to delete the endpoint to stop paying (if in cloud mode).')
    predictor.delete_endpoint(predictor.endpoint_name)

Exemple #8

0

Afficher le fichier

Fichier : run_benchmarks.py Projet : mvsusp/hvd-benchmark

def run_benchmark(instance_count,
                  subnet,
                  security_group,
                  aws_account,
                  base_image,
                  region='us-west-2',
                  role="SageMakerRole",
                  tag='tensorflow-hvd:latest',
                  build_image=False,
                  wait=True):

    if build_image:
        build(base_image=base_image,
              entrypoint='launcher.sh',
              source_dir='benchmarks',
              tag=tag)

    ecr_image_name = push(tag)

    output_path = 's3://sagemaker-{}-{}/hvd-1-single/{}node-{}'.format(
        region, aws_account, instance_count, time.time_ns())

    estimator = Estimator(ecr_image_name,
                          role=role,
                          base_job_name='hvd-bench',
                          hyperparameters={},
                          train_instance_count=instance_count,
                          train_instance_type='ml.p3.16xlarge',
                          output_path=output_path,
                          subnets=[subnet],
                          security_group_ids=[security_group])

    estimator.fit('s3://sagemaker-sample-data-%s/spark/mnist/train/' % region,
                  wait=wait)

Exemple #9

0

Afficher le fichier

Fichier : test_byo_estimator.py Projet : jrdeco560/sagemaker-python-sdk

def test_byo_estimator(sagemaker_session, region):
    """Use Factorization Machines algorithm as an example here.

    First we need to prepare data for training. We take standard data set, convert it to the
    format that the algorithm can process and upload it to S3.
    Then we create the Estimator and set hyperparamets as required by the algorithm.
    Next, we can call fit() with path to the S3.
    Later the trained model is deployed and prediction is called against the endpoint.
    Default predictor is updated with json serializer and deserializer.

    """
    image_name = registry(region) + "/factorization-machines:1"
    training_data_path = os.path.join(DATA_DIR, "dummy_tensor")
    job_name = unique_name_from_base("byo")

    with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES):
        data_path = os.path.join(DATA_DIR, "one_p_mnist", "mnist.pkl.gz")
        pickle_args = {} if sys.version_info.major == 2 else {
            "encoding": "latin1"
        }

        with gzip.open(data_path, "rb") as f:
            train_set, _, _ = pickle.load(f, **pickle_args)

        prefix = "test_byo_estimator"
        key = "recordio-pb-data"

        s3_train_data = sagemaker_session.upload_data(path=training_data_path,
                                                      key_prefix=os.path.join(
                                                          prefix, "train",
                                                          key))

        estimator = Estimator(
            image_name=image_name,
            role="SageMakerRole",
            train_instance_count=1,
            train_instance_type="ml.c4.xlarge",
            sagemaker_session=sagemaker_session,
        )

        estimator.set_hyperparameters(num_factors=10,
                                      feature_dim=784,
                                      mini_batch_size=100,
                                      predictor_type="binary_classifier")

        # training labels must be 'float32'
        estimator.fit({"train": s3_train_data}, job_name=job_name)

    with timeout_and_delete_endpoint_by_name(job_name, sagemaker_session):
        model = estimator.create_model()
        predictor = model.deploy(1, "ml.m4.xlarge", endpoint_name=job_name)
        predictor.serializer = fm_serializer
        predictor.content_type = "application/json"
        predictor.deserializer = sagemaker.predictor.json_deserializer

        result = predictor.predict(train_set[0][:10])

        assert len(result["predictions"]) == 10
        for prediction in result["predictions"]:
            assert prediction["score"] is not None

Exemple #10

0

Afficher le fichier

def create_estimator(params, sagemaker_role):
    train_repository_uri = params['train-image-uri']
    instance_type = 'ml.p3.2xlarge'

    metric_definitions = [{
        'Name': 'val:mAP',
        'Regex': 'Average Precision  \(AP\) \@\[ IoU=0.50:0.95 \| area=   all \| maxDets=100 \] = ([0-9\\.]+)'
    }]
    estimator = Estimator(
        image_uri=train_repository_uri,
        role=sagemaker_role,
        metric_definitions=metric_definitions,
        instance_count=1,
        instance_type=instance_type,
        hyperparameters={
            'batch-size': params['hyperparameters']['batch-size'],
            'test-batch-size': 4,
            'lr': 0.01,
            'epochs': params['hyperparameters']['epoch'],
            'experiment-name': params['experiment-name'],
            'mlflow-server': params['mlflow-server-uri']
        },
        output_path=params['train-output-path'])

    return estimator

Exemple #11

0

Afficher le fichier

Fichier : test_estimator.py Projet : yyolk/sagemaker-python-sdk

def test_estimator_transformer_creation_with_optional_params(sagemaker_session):
    base_name = 'foo'
    estimator = Estimator(image_name=IMAGE_NAME, role=ROLE, train_instance_count=INSTANCE_COUNT,
                          train_instance_type=INSTANCE_TYPE, sagemaker_session=sagemaker_session,
                          base_job_name=base_name)
    estimator.latest_training_job = _TrainingJob(sagemaker_session, JOB_NAME)
    sagemaker_session.create_model_from_job.return_value = JOB_NAME

    strategy = 'MultiRecord'
    assemble_with = 'Line'
    kms_key = 'key'
    accept = 'text/csv'
    max_concurrent_transforms = 1
    max_payload = 6
    env = {'FOO': 'BAR'}

    transformer = estimator.transformer(INSTANCE_COUNT, INSTANCE_TYPE, strategy=strategy, assemble_with=assemble_with,
                                        output_path=OUTPUT_PATH, output_kms_key=kms_key, accept=accept, tags=TAGS,
                                        max_concurrent_transforms=max_concurrent_transforms, max_payload=max_payload,
                                        env=env, role=ROLE)

    sagemaker_session.create_model_from_job.assert_called_with(JOB_NAME, role=ROLE)
    assert transformer.strategy == strategy
    assert transformer.assemble_with == assemble_with
    assert transformer.output_path == OUTPUT_PATH
    assert transformer.output_kms_key == kms_key
    assert transformer.accept == accept
    assert transformer.max_concurrent_transforms == max_concurrent_transforms
    assert transformer.max_payload == max_payload
    assert transformer.env == env
    assert transformer.base_transform_job_name == base_name
    assert transformer.tags == TAGS

Exemple #12

0

Afficher le fichier

Fichier : test_tuner.py Projet : preetkhaturia/sagemaker-python-sdk

def estimator(sagemaker_session):
    return Estimator(IMAGE_NAME,
                     ROLE,
                     TRAIN_INSTANCE_COUNT,
                     TRAIN_INSTANCE_TYPE,
                     output_path='s3://bucket/prefix',
                     sagemaker_session=sagemaker_session)

Exemple #13

0

Afficher le fichier

def main():
    download_training_and_eval_data()

    print('Starting model training.')
    print(
        'Note: if launching for the first time in local mode, container image download might take a few minutes to complete.'
    )

    image = 'sagemaker-hdbscan-local'

    local_estimator = Estimator(image,
                                DUMMY_IAM_ROLE,
                                instance_count=1,
                                instance_type="local",
                                hyperparameters={
                                    "min_cluster_size": 50,
                                })

    train_location = 'file://' + local_train

    local_estimator.fit({'train': train_location})
    print('Completed model training')

    model_data = local_estimator.model_data
    print(model_data)

Exemple #14

0

Afficher le fichier

Fichier : main.py Projet : comet-ml/comet-sagemaker

def main():
    args = get_args()

    sess = sagemaker.Session()
    role = get_execution_role()

    client = boto3.client('sts')
    account = client.get_caller_identity()['Account']

    my_session = boto3.session.Session()
    region = my_session.region_name

    container_name = args.container_name
    ecr_image = '{}.dkr.ecr.{}.amazonaws.com/{}:latest'.format(
        account, region, container_name)

    inputs = sess.upload_data(path=args.data, key_prefix=DATASET_PREFIX)

    hyperparameters = {'train-steps': 1000}
    instance_type = 'ml.m4.xlarge'
    estimator = Estimator(role=role,
                          hyperparameters=hyperparameters,
                          instance_count=1,
                          instance_type=instance_type,
                          image_uri=ecr_image)
    estimator.fit(inputs)

Exemple #15

0

Afficher le fichier

Fichier : test_airflow_config.py Projet : benjaminp/sagemaker-python-sdk

def test_byo_airflow_config_uploads_data_source_to_s3_when_inputs_provided(
    sagemaker_session, cpu_instance_type
):
    with timeout(seconds=AIRFLOW_CONFIG_TIMEOUT_IN_SECONDS):
        training_data_path = os.path.join(DATA_DIR, "dummy_tensor")

        data_source_location = "test-airflow-config-{}".format(sagemaker_timestamp())
        inputs = sagemaker_session.upload_data(
            path=training_data_path, key_prefix=os.path.join(data_source_location, "train")
        )

        estimator = Estimator(
            image_name=get_image_uri(
                sagemaker_session.boto_session.region_name, "factorization-machines"
            ),
            role=ROLE,
            train_instance_count=SINGLE_INSTANCE_COUNT,
            train_instance_type=cpu_instance_type,
            sagemaker_session=sagemaker_session,
        )

        training_config = _build_airflow_workflow(
            estimator=estimator, instance_type=cpu_instance_type, inputs=inputs
        )

        _assert_that_s3_url_contains_data(
            sagemaker_session,
            training_config["InputDataConfig"][0]["DataSource"]["S3DataSource"]["S3Uri"],
        )

Exemple #16

0

Afficher le fichier

Fichier : test_step_collections.py Projet : rohangujarathi/sagemaker-python-sdk

def estimator(sagemaker_session):
    return Estimator(
        image_uri=IMAGE_URI,
        role=ROLE,
        instance_count=1,
        instance_type="ml.c4.4xlarge",
        sagemaker_session=sagemaker_session,
    )

Exemple #17

0

Afficher le fichier

Fichier : test_byo_estimator.py Projet : jrdeco560/sagemaker-python-sdk

def test_async_byo_estimator(sagemaker_session, region):
    image_name = registry(region) + "/factorization-machines:1"
    endpoint_name = unique_name_from_base("byo")
    training_data_path = os.path.join(DATA_DIR, "dummy_tensor")
    job_name = unique_name_from_base("byo")

    with timeout(minutes=5):
        data_path = os.path.join(DATA_DIR, "one_p_mnist", "mnist.pkl.gz")
        pickle_args = {} if sys.version_info.major == 2 else {
            "encoding": "latin1"
        }

        with gzip.open(data_path, "rb") as f:
            train_set, _, _ = pickle.load(f, **pickle_args)

        prefix = "test_byo_estimator"
        key = "recordio-pb-data"

        s3_train_data = sagemaker_session.upload_data(path=training_data_path,
                                                      key_prefix=os.path.join(
                                                          prefix, "train",
                                                          key))

        estimator = Estimator(
            image_name=image_name,
            role="SageMakerRole",
            train_instance_count=1,
            train_instance_type="ml.c4.xlarge",
            sagemaker_session=sagemaker_session,
        )

        estimator.set_hyperparameters(num_factors=10,
                                      feature_dim=784,
                                      mini_batch_size=100,
                                      predictor_type="binary_classifier")

        # training labels must be 'float32'
        estimator.fit({"train": s3_train_data}, wait=False, job_name=job_name)

    with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session):
        estimator = Estimator.attach(training_job_name=job_name,
                                     sagemaker_session=sagemaker_session)
        model = estimator.create_model()
        predictor = model.deploy(1,
                                 "ml.m4.xlarge",
                                 endpoint_name=endpoint_name)
        predictor.serializer = fm_serializer
        predictor.content_type = "application/json"
        predictor.deserializer = sagemaker.predictor.json_deserializer

        result = predictor.predict(train_set[0][:10])

        assert len(result["predictions"]) == 10
        for prediction in result["predictions"]:
            assert prediction["score"] is not None

        assert estimator.train_image() == image_name

Exemple #18

0

Afficher le fichier

def create_blaxing_text_model(
        region_name: str,
        sm_session: Session,
        sm_role: str,
        s3_input_url: str,
        s3_output_url: str):
    """
    Create a BlazingText model.

    Args:
        - region_name: AWS Region Name to use SageMaker in.
        - sm_session: SageMaker Session Object.
        - sm_role: SageMaker role arn that allows SM to connect to s3.
        - s3_input_url: training data input path on s3
        - s3_output_url: model artifacts output path

    Return:
        - bt_model: instance of Estimator, can be used to deploy an inference endpoint
    """
    # define container
    container = get_image_uri(region_name, "blazingtext", "latest")

    # create estimator
    bt_model = Estimator(container,
                         sm_role,
                         train_instance_count=1,
                         train_instance_type='ml.c4.2xlarge',
                         train_volume_size=30,
                         train_max_run=360000,
                         input_mode='File',
                         output_path=s3_output_url,
                         sagemaker_session=sm_session)

    # set hyperparameters
    bt_model.set_hyperparameters(mode="skipgram",
                                 epochs=5,
                                 min_count=5,
                                 sampling_threshold=0.0001,
                                 learning_rate=0.05,
                                 window_size=5,
                                 vector_dim=100,
                                 negative_samples=5,
                                 subwords=True,
                                 min_char=3,
                                 max_char=6,
                                 batch_size=11,
                                 evaluation=True)

    # define data channels
    train_data = s3_input(s3_input_url, distribution='FullyReplicated',
                          content_type='text/plain', s3_data_type='S3Prefix')
    data_channels = {'train': train_data}

    # fit model
    bt_model.fit(inputs=data_channels, logs=True)

    return bt_model

Exemple #19

0

Afficher le fichier

Fichier : test_estimator.py Projet : yyolk/sagemaker-python-sdk

def test_start_new_not_local_mode_error(sagemaker_session):
    training_job = _TrainingJob(sagemaker_session, JOB_NAME)
    inputs = 'file://mybucket/train'

    estimator = Estimator(IMAGE_NAME, ROLE, INSTANCE_COUNT, INSTANCE_TYPE,
                          output_path=OUTPUT_PATH, sagemaker_session=sagemaker_session)
    with pytest.raises(ValueError) as error:
        training_job.start_new(estimator, inputs)
        assert 'File URIs are supported in local mode only. Please use a S3 URI instead.' == str(error)

Exemple #20

0

Afficher le fichier

Fichier : test_estimator.py Projet : kaanulvan/sagemaker-python-sdk

def test_prepare_for_training_with_name_based_on_image(sagemaker_session):
    estimator = Estimator(image_name='some-image',
                          role='some_image',
                          train_instance_count=1,
                          train_instance_type='ml.m4.xlarge',
                          sagemaker_session=sagemaker_session)

    estimator._prepare_for_training()
    assert 'some-image' in estimator._current_job_name

Exemple #21

0

Afficher le fichier

Fichier : test_byo_estimator.py Projet : lanlan555/sagemaker-python-sdk

def test_async_byo_estimator(sagemaker_session, region):
    image_name = registry(region) + "/factorization-machines:1"
    endpoint_name = unique_name_from_base('byo')
    training_data_path = os.path.join(DATA_DIR, 'dummy_tensor')
    training_job_name = ""

    with timeout(minutes=5):
        data_path = os.path.join(DATA_DIR, 'one_p_mnist', 'mnist.pkl.gz')
        pickle_args = {} if sys.version_info.major == 2 else {
            'encoding': 'latin1'
        }

        with gzip.open(data_path, 'rb') as f:
            train_set, _, _ = pickle.load(f, **pickle_args)

        prefix = 'test_byo_estimator'
        key = 'recordio-pb-data'

        s3_train_data = sagemaker_session.upload_data(path=training_data_path,
                                                      key_prefix=os.path.join(
                                                          prefix, 'train',
                                                          key))

        estimator = Estimator(image_name=image_name,
                              role='SageMakerRole',
                              train_instance_count=1,
                              train_instance_type='ml.c4.xlarge',
                              sagemaker_session=sagemaker_session,
                              base_job_name='test-byo')

        estimator.set_hyperparameters(num_factors=10,
                                      feature_dim=784,
                                      mini_batch_size=100,
                                      predictor_type='binary_classifier')

        # training labels must be 'float32'
        estimator.fit({'train': s3_train_data}, wait=False)
        training_job_name = estimator.latest_training_job.name

    with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session):
        estimator = Estimator.attach(training_job_name=training_job_name,
                                     sagemaker_session=sagemaker_session)
        model = estimator.create_model()
        predictor = model.deploy(1,
                                 'ml.m4.xlarge',
                                 endpoint_name=endpoint_name)
        predictor.serializer = fm_serializer
        predictor.content_type = 'application/json'
        predictor.deserializer = sagemaker.predictor.json_deserializer

        result = predictor.predict(train_set[0][:10])

        assert len(result['predictions']) == 10
        for prediction in result['predictions']:
            assert prediction['score'] is not None

        assert estimator.train_image() == image_name

Exemple #22

0

Afficher le fichier

Fichier : test_job.py Projet : tlelson/sagemaker-python-sdk

def estimator(sagemaker_session):
    return Estimator(IMAGE_NAME,
                     ROLE,
                     INSTANCE_COUNT,
                     INSTANCE_TYPE,
                     train_volume_size=VOLUME_SIZE,
                     train_max_run=MAX_RUNTIME,
                     output_path=S3_OUTPUT_PATH,
                     sagemaker_session=sagemaker_session)

Exemple #23

0

Afficher le fichier

def test_byo_estimator(sagemaker_session, region, cpu_instance_type,
                       training_set):
    """Use Factorization Machines algorithm as an example here.

    First we need to prepare data for training. We take standard data set, convert it to the
    format that the algorithm can process and upload it to S3.
    Then we create the Estimator and set hyperparamets as required by the algorithm.
    Next, we can call fit() with path to the S3.
    Later the trained model is deployed and prediction is called against the endpoint.
    Default predictor is updated with json serializer and deserializer.

    """
    image_uri = image_uris.retrieve("factorization-machines", region)
    training_data_path = os.path.join(DATA_DIR, "dummy_tensor")
    job_name = unique_name_from_base("byo")

    with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES):
        prefix = "test_byo_estimator"
        key = "recordio-pb-data"

        s3_train_data = sagemaker_session.upload_data(path=training_data_path,
                                                      key_prefix=os.path.join(
                                                          prefix, "train",
                                                          key))

        estimator = Estimator(
            image_uri=image_uri,
            role="SageMakerRole",
            instance_count=1,
            instance_type=cpu_instance_type,
            sagemaker_session=sagemaker_session,
        )

        estimator.set_hyperparameters(num_factors=10,
                                      feature_dim=784,
                                      mini_batch_size=100,
                                      predictor_type="binary_classifier")

        # training labels must be 'float32'
        estimator.fit({"train": s3_train_data}, job_name=job_name)

    with timeout_and_delete_endpoint_by_name(job_name, sagemaker_session):
        model = estimator.create_model()
        predictor = model.deploy(
            1,
            cpu_instance_type,
            endpoint_name=job_name,
            serializer=_FactorizationMachineSerializer(),
            deserializer=sagemaker.deserializers.JSONDeserializer(),
        )

        result = predictor.predict(training_set[0][:10])

        assert len(result["predictions"]) == 10
        for prediction in result["predictions"]:
            assert prediction["score"] is not None

Exemple #24

0

Afficher le fichier

Fichier : test_step_collections.py Projet : xiaoyi-cheng/sagemaker-python-sdk

def estimator(sagemaker_session):
    return Estimator(
        image_uri=IMAGE_URI,
        role=ROLE,
        instance_count=1,
        instance_type="ml.c4.4xlarge",
        sagemaker_session=sagemaker_session,
        subnets=["abc", "def"],
        security_group_ids=["123", "456"],
    )

Exemple #25

0

Afficher le fichier

Fichier : test_estimator.py Projet : jnclt/sagemaker-python-sdk

def test_local_mode(session_class, local_session_class):
    local_session = Mock()
    local_session.local_mode = True

    session = Mock()
    session.local_mode = False

    local_session_class.return_value = local_session
    session_class.return_value = session

    e = Estimator(IMAGE_NAME, ROLE, INSTANCE_COUNT, 'local')
    print(e.sagemaker_session.local_mode)
    assert e.sagemaker_session.local_mode is True

    e2 = Estimator(IMAGE_NAME, ROLE, INSTANCE_COUNT, 'local_gpu')
    assert e2.sagemaker_session.local_mode is True

    e3 = Estimator(IMAGE_NAME, ROLE, INSTANCE_COUNT, INSTANCE_TYPE)
    assert e3.sagemaker_session.local_mode is False

Exemple #26

0

Afficher le fichier

Fichier : pipeline.py Projet : tkazusa/ML-CICD-pipeline

def create_estimator():
    hyperparameters = {'batch_size': args.batch_size,'epochs': args.epoch}
    output_path = 's3://{}/output'.format(BUCKET)
    estimator = Estimator(image_name=args.train_url,
                        role=SAGEMAKER_ROLE,
                        hyperparameters=hyperparameters,
                        train_instance_count=1,
                        train_instance_type='ml.p2.xlarge',
                        output_path=output_path)
    return estimator

Exemple #27

0

Afficher le fichier

def test_async_byo_estimator(sagemaker_session, region):
    image_name = registry(region) + "/factorization-machines:1"
    endpoint_name = name_from_base('byo')
    training_job_name = ""

    with timeout(minutes=5):
        data_path = os.path.join(DATA_DIR, 'one_p_mnist', 'mnist.pkl.gz')
        pickle_args = {} if sys.version_info.major == 2 else {'encoding': 'latin1'}

        with gzip.open(data_path, 'rb') as f:
            train_set, _, _ = pickle.load(f, **pickle_args)

        # take 100 examples for faster execution
        vectors = np.array([t.tolist() for t in train_set[0][:100]]).astype('float32')
        labels = np.where(np.array([t.tolist() for t in train_set[1][:100]]) == 0, 1.0, 0.0).astype('float32')

        buf = io.BytesIO()
        write_numpy_to_dense_tensor(buf, vectors, labels)
        buf.seek(0)

        bucket = sagemaker_session.default_bucket()
        prefix = 'test_byo_estimator'
        key = 'recordio-pb-data'
        boto3.resource('s3').Bucket(bucket).Object(os.path.join(prefix, 'train', key)).upload_fileobj(buf)
        s3_train_data = 's3://{}/{}/train/{}'.format(bucket, prefix, key)

        estimator = Estimator(image_name=image_name,
                              role='SageMakerRole', train_instance_count=1,
                              train_instance_type='ml.c4.xlarge',
                              sagemaker_session=sagemaker_session, base_job_name='test-byo')

        estimator.set_hyperparameters(num_factors=10,
                                      feature_dim=784,
                                      mini_batch_size=100,
                                      predictor_type='binary_classifier')

        # training labels must be 'float32'
        estimator.fit({'train': s3_train_data}, wait=False)
        training_job_name = estimator.latest_training_job.name

    with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session):
        estimator = Estimator.attach(training_job_name=training_job_name, sagemaker_session=sagemaker_session)
        model = estimator.create_model()
        predictor = model.deploy(1, 'ml.m4.xlarge', endpoint_name=endpoint_name)
        predictor.serializer = fm_serializer
        predictor.content_type = 'application/json'
        predictor.deserializer = sagemaker.predictor.json_deserializer

        result = predictor.predict(train_set[0][:10])

        assert len(result['predictions']) == 10
        for prediction in result['predictions']:
            assert prediction['score'] is not None

        assert estimator.train_image() == image_name

Exemple #28

0

Afficher le fichier

Fichier : test_estimator.py Projet : masry707/sagemaker-python-sdk

def test_local_mode_file_output_path(local_session_class):
    local_session = Mock()
    local_session.local_mode = True
    local_session_class.return_value = local_session

    e = Estimator(IMAGE_NAME,
                  ROLE,
                  INSTANCE_COUNT,
                  'local',
                  output_path='file:///tmp/model/')
    assert e.output_path == 'file:///tmp/model/'

Exemple #29

0

Afficher le fichier

Fichier : test_estimator.py Projet : masry707/sagemaker-python-sdk

def test_file_output_path_not_supported_outside_local_mode(session_class):
    session = Mock()
    session.local_mode = False
    session_class.return_value = session

    with pytest.raises(RuntimeError):
        Estimator(IMAGE_NAME,
                  ROLE,
                  INSTANCE_COUNT,
                  INSTANCE_TYPE,
                  output_path='file:///tmp/model')

Exemple #30

0

Afficher le fichier

    def estimator(self, batch_n):
        ll_estimator = Estimator(self.container,
                                 role=self.role,
                                 instance_count=1,
                                 instance_type='ml.m5.large',
                                 output_path='s3://{}/{}/output'.format(
                                     self.bucket, self.prefix))

        ll_estimator.set_hyperparameters(predictor_type='regressor',
                                         mini_batch_size=batch_n)

        return ll_estimator