Python start_new Beispiele, sagemaker.estimator._TrainingJob.start_new Python Beispiele

Beispiel #1

0

Datei anzeigen

Datei: amazon_estimator.py Projekt: duasahil8/sagemaker-python-sdk

    def fit(self, records, mini_batch_size=None, wait=True, logs=True, job_name=None):
        """Fit this Estimator on serialized Record objects, stored in S3.

        ``records`` should be an instance of :class:`~RecordSet`. This defines a collection of
        S3 data files to train this ``Estimator`` on.

        Training data is expected to be encoded as dense or sparse vectors in the "values" feature
        on each Record. If the data is labeled, the label is expected to be encoded as a list of
        scalas in the "values" feature of the Record label.

        More information on the Amazon Record format is available at:
        https://docs.aws.amazon.com/sagemaker/latest/dg/cdf-training.html

        See :meth:`~AmazonAlgorithmEstimatorBase.record_set` to construct a ``RecordSet`` object
        from :class:`~numpy.ndarray` arrays.

        Args:
            records (:class:`~RecordSet`): The records to train this ``Estimator`` on
            mini_batch_size (int or None): The size of each mini-batch to use when training. If ``None``, a
                default value will be used.
            wait (bool): Whether the call should wait until the job completes (default: True).
            logs (bool): Whether to show the logs produced by the job.
                Only meaningful when wait is True (default: True).
            job_name (str): Training job name. If not specified, the estimator generates a default job name,
                based on the training image name and current timestamp.
        """
        self._prepare_for_training(records, job_name=job_name, mini_batch_size=mini_batch_size)

        self.latest_training_job = _TrainingJob.start_new(self, records)
        if wait:
            self.latest_training_job.wait(logs=logs)

Beispiel #2

0

Datei anzeigen

    def fit(self, records, mini_batch_size=None, wait=True, logs=True, job_name=None):
        """Fit this Estimator on serialized Record objects, stored in S3.

        ``records`` should be an instance of :class:`~RecordSet`. This defines a collection of
        S3 data files to train this ``Estimator`` on.

        Training data is expected to be encoded as dense or sparse vectors in the "values" feature
        on each Record. If the data is labeled, the label is expected to be encoded as a list of
        scalas in the "values" feature of the Record label.

        More information on the Amazon Record format is available at:
        https://docs.aws.amazon.com/sagemaker/latest/dg/cdf-training.html

        See :meth:`~AmazonAlgorithmEstimatorBase.record_set` to construct a ``RecordSet`` object
        from :class:`~numpy.ndarray` arrays.

        Args:
            records (:class:`~RecordSet`): The records to train this ``Estimator`` on
            mini_batch_size (int or None): The size of each mini-batch to use when training. If ``None``, a
                default value will be used.
            wait (bool): Whether the call should wait until the job completes (default: True).
            logs (bool): Whether to show the logs produced by the job.
                Only meaningful when wait is True (default: True).
            job_name (str): Training job name. If not specified, the estimator generates a default job name,
                based on the training image name and current timestamp.
        """
        self._prepare_for_training(records, job_name=job_name, mini_batch_size=mini_batch_size)

        self.latest_training_job = _TrainingJob.start_new(self, records)
        if wait:
            self.latest_training_job.wait(logs=logs)

Beispiel #3

0

Datei anzeigen

def fit(
self,
records,
mini_batch_size=None,
wait=True,
logs=True,
job_name=None,
experiment_config=None,
):
"""Fit this Estimator on serialized Record objects, stored in S3.

``records`` should be an instance of :class:`~RecordSet`. This
defines a collection of S3 data files to train this ``Estimator`` on.

Training data is expected to be encoded as dense or sparse vectors in
the "values" feature on each Record. If the data is labeled, the label
is expected to be encoded as a list of scalas in the "values" feature of
the Record label.

More information on the Amazon Record format is available at:
https://docs.aws.amazon.com/sagemaker/latest/dg/cdf-training.html

See :meth:`~AmazonAlgorithmEstimatorBase.record_set` to construct a
``RecordSet`` object from :class:`~numpy.ndarray` arrays.

Args:
records (:class:`~RecordSet`): The records to train this ``Estimator`` on
mini_batch_size (int or None): The size of each mini-batch to use
when training. If ``None``, a default value will be used.
wait (bool): Whether the call should wait until the job completes
(default: True).
logs (bool): Whether to show the logs produced by the job. Only
meaningful when wait is True (default: True).
job_name (str): Training job name. If not specified, the estimator
generates a default job name, based on the training image name
and current timestamp.
experiment_config (dict[str, str]): Experiment management configuration.
Optionally, the dict can contain three keys:
'ExperimentName', 'TrialName', and 'TrialComponentDisplayName'.
The behavior of setting these keys is as follows:
* If `ExperimentName` is supplied but `TrialName` is not a Trial will be
automatically created and the job's Trial Component associated with the Trial.
* If `TrialName` is supplied and the Trial already exists the job's Trial Component
will be associated with the Trial.
* If both `ExperimentName` and `TrialName` are not supplied the trial component
will be unassociated.
* `TrialComponentDisplayName` is used for display in Studio.
"""
self._prepare_for_training(records,
job_name=job_name,
mini_batch_size=mini_batch_size)

self.latest_training_job = _TrainingJob.start_new(
self, records, experiment_config=experiment_config)
if wait:
self.latest_training_job.wait(logs=logs)

Beispiel #4

0

Datei anzeigen

Datei: test_estimator.py Projekt: masry707/sagemaker-python-sdk

def test_sagemaker_model_default_channel_name(sagemaker_session):
    f = DummyFramework(entry_point='my_script.py',
                       role='DummyRole',
                       train_instance_count=3,
                       train_instance_type='ml.m4.xlarge',
                       sagemaker_session=sagemaker_session,
                       model_uri='s3://model-bucket/prefix/model.tar.gz')
    _TrainingJob.start_new(f, {})
    sagemaker_session.train.assert_called_once()
    _, args = sagemaker_session.train.call_args
    assert args['input_config'] == [{
        'ChannelName': 'model',
        'InputMode': 'File',
        'ContentType': 'application/x-sagemaker-model',
        'DataSource': {
            'S3DataSource': {
                'S3DataType': 'S3Prefix',
                'S3DataDistributionType': 'FullyReplicated',
                'S3Uri': 's3://model-bucket/prefix/model.tar.gz'
            }
        }
    }]

Beispiel #5

0

Datei anzeigen

    def fit(self, inputs):
        # ジョブ名は一意である必要がある
        from sagemaker.utils import base_name_from_image, name_from_base
        base_name = self._estimator.base_job_name or base_name_from_image(
            self._estimator.train_image())
        self._estimator._current_job_name = name_from_base(base_name)

        # アウトプットを出力する場所が指定されていない場合には，ここで指定
        if self._estimator.output_path is None:
            self._estimator.output_path = 's3://{}/'.format(
                self._estimator.sagemaker_session.default_bucket())

        from sagemaker.estimator import _TrainingJob
        self._estimator.latest_training_job = _TrainingJob.start_new(
            self._estimator, inputs)

Beispiel #6

0

Datei anzeigen

Datei: test_estimator.py Projekt: jnclt/sagemaker-python-sdk

def test_framework_all_init_args(sagemaker_session):
    f = DummyFramework('my_script.py',
                       role='DummyRole',
                       train_instance_count=3,
                       train_instance_type='ml.m4.xlarge',
                       sagemaker_session=sagemaker_session,
                       train_volume_size=123,
                       train_volume_kms_key='volumekms',
                       train_max_run=456,
                       input_mode='inputmode',
                       output_path='outputpath',
                       output_kms_key='outputkms',
                       base_job_name='basejobname',
                       tags=[{
                           'foo': 'bar'
                       }],
                       subnets=['123', '456'],
                       security_group_ids=['789', '012'])
    _TrainingJob.start_new(f, 's3://mydata')
    sagemaker_session.train.assert_called_once()
    _, args = sagemaker_session.train.call_args
    assert args == {
        'input_mode':
        'inputmode',
        'tags': [{
            'foo': 'bar'
        }],
        'hyperparameters': {},
        'image':
        'fakeimage',
        'input_config': [{
            'ChannelName': 'training',
            'DataSource': {
                'S3DataSource': {
                    'S3DataType': 'S3Prefix',
                    'S3DataDistributionType': 'FullyReplicated',
                    'S3Uri': 's3://mydata'
                }
            }
        }],
        'output_config': {
            'KmsKeyId': 'outputkms',
            'S3OutputPath': 'outputpath'
        },
        'vpc_config': {
            'Subnets': ['123', '456'],
            'SecurityGroupIds': ['789', '012']
        },
        'stop_condition': {
            'MaxRuntimeInSeconds': 456
        },
        'role':
        sagemaker_session.expand_role(),
        'job_name':
        None,
        'resource_config': {
            'VolumeSizeInGB': 123,
            'InstanceCount': 3,
            'VolumeKmsKeyId': 'volumekms',
            'InstanceType': 'ml.m4.xlarge'
        }
    }