def fit(self, records, mini_batch_size=None, wait=True, logs=True, job_name=None):
        """Fit this Estimator on serialized Record objects, stored in S3.

        ``records`` should be an instance of :class:`~RecordSet`. This defines a collection of
        S3 data files to train this ``Estimator`` on.

        Training data is expected to be encoded as dense or sparse vectors in the "values" feature
        on each Record. If the data is labeled, the label is expected to be encoded as a list of
        scalas in the "values" feature of the Record label.

        More information on the Amazon Record format is available at:
        https://docs.aws.amazon.com/sagemaker/latest/dg/cdf-training.html

        See :meth:`~AmazonAlgorithmEstimatorBase.record_set` to construct a ``RecordSet`` object
        from :class:`~numpy.ndarray` arrays.

        Args:
            records (:class:`~RecordSet`): The records to train this ``Estimator`` on
            mini_batch_size (int or None): The size of each mini-batch to use when training. If ``None``, a
                default value will be used.
            wait (bool): Whether the call should wait until the job completes (default: True).
            logs (bool): Whether to show the logs produced by the job.
                Only meaningful when wait is True (default: True).
            job_name (str): Training job name. If not specified, the estimator generates a default job name,
                based on the training image name and current timestamp.
        """
        self._prepare_for_training(records, job_name=job_name, mini_batch_size=mini_batch_size)

        self.latest_training_job = _TrainingJob.start_new(self, records)
        if wait:
            self.latest_training_job.wait(logs=logs)
Beispiel #2
0
    def fit(self, records, mini_batch_size=None, wait=True, logs=True, job_name=None):
        """Fit this Estimator on serialized Record objects, stored in S3.

        ``records`` should be an instance of :class:`~RecordSet`. This defines a collection of
        S3 data files to train this ``Estimator`` on.

        Training data is expected to be encoded as dense or sparse vectors in the "values" feature
        on each Record. If the data is labeled, the label is expected to be encoded as a list of
        scalas in the "values" feature of the Record label.

        More information on the Amazon Record format is available at:
        https://docs.aws.amazon.com/sagemaker/latest/dg/cdf-training.html

        See :meth:`~AmazonAlgorithmEstimatorBase.record_set` to construct a ``RecordSet`` object
        from :class:`~numpy.ndarray` arrays.

        Args:
            records (:class:`~RecordSet`): The records to train this ``Estimator`` on
            mini_batch_size (int or None): The size of each mini-batch to use when training. If ``None``, a
                default value will be used.
            wait (bool): Whether the call should wait until the job completes (default: True).
            logs (bool): Whether to show the logs produced by the job.
                Only meaningful when wait is True (default: True).
            job_name (str): Training job name. If not specified, the estimator generates a default job name,
                based on the training image name and current timestamp.
        """
        self._prepare_for_training(records, job_name=job_name, mini_batch_size=mini_batch_size)

        self.latest_training_job = _TrainingJob.start_new(self, records)
        if wait:
            self.latest_training_job.wait(logs=logs)
Beispiel #3
0
    def fit(
        self,
        records,
        mini_batch_size=None,
        wait=True,
        logs=True,
        job_name=None,
        experiment_config=None,
    ):
        """Fit this Estimator on serialized Record objects, stored in S3.

        ``records`` should be an instance of :class:`~RecordSet`. This
        defines a collection of S3 data files to train this ``Estimator`` on.

        Training data is expected to be encoded as dense or sparse vectors in
        the "values" feature on each Record. If the data is labeled, the label
        is expected to be encoded as a list of scalas in the "values" feature of
        the Record label.

        More information on the Amazon Record format is available at:
        https://docs.aws.amazon.com/sagemaker/latest/dg/cdf-training.html

        See :meth:`~AmazonAlgorithmEstimatorBase.record_set` to construct a
        ``RecordSet`` object from :class:`~numpy.ndarray` arrays.

        Args:
            records (:class:`~RecordSet`): The records to train this ``Estimator`` on
            mini_batch_size (int or None): The size of each mini-batch to use
                when training. If ``None``, a default value will be used.
            wait (bool): Whether the call should wait until the job completes
                (default: True).
            logs (bool): Whether to show the logs produced by the job. Only
                meaningful when wait is True (default: True).
            job_name (str): Training job name. If not specified, the estimator
                generates a default job name, based on the training image name
                and current timestamp.
            experiment_config (dict[str, str]): Experiment management configuration.
                Optionally, the dict can contain three keys:
                'ExperimentName', 'TrialName', and 'TrialComponentDisplayName'.
                The behavior of setting these keys is as follows:
                * If `ExperimentName` is supplied but `TrialName` is not a Trial will be
                automatically created and the job's Trial Component associated with the Trial.
                * If `TrialName` is supplied and the Trial already exists the job's Trial Component
                will be associated with the Trial.
                * If both `ExperimentName` and `TrialName` are not supplied the trial component
                will be unassociated.
                * `TrialComponentDisplayName` is used for display in Studio.
        """
        self._prepare_for_training(records,
                                   job_name=job_name,
                                   mini_batch_size=mini_batch_size)

        self.latest_training_job = _TrainingJob.start_new(
            self, records, experiment_config=experiment_config)
        if wait:
            self.latest_training_job.wait(logs=logs)
def test_sagemaker_model_default_channel_name(sagemaker_session):
    f = DummyFramework(entry_point='my_script.py',
                       role='DummyRole',
                       train_instance_count=3,
                       train_instance_type='ml.m4.xlarge',
                       sagemaker_session=sagemaker_session,
                       model_uri='s3://model-bucket/prefix/model.tar.gz')
    _TrainingJob.start_new(f, {})
    sagemaker_session.train.assert_called_once()
    _, args = sagemaker_session.train.call_args
    assert args['input_config'] == [{
        'ChannelName': 'model',
        'InputMode': 'File',
        'ContentType': 'application/x-sagemaker-model',
        'DataSource': {
            'S3DataSource': {
                'S3DataType': 'S3Prefix',
                'S3DataDistributionType': 'FullyReplicated',
                'S3Uri': 's3://model-bucket/prefix/model.tar.gz'
            }
        }
    }]
Beispiel #5
0
    def fit(self, inputs):
        # ジョブ名は一意である必要がある
        from sagemaker.utils import base_name_from_image, name_from_base
        base_name = self._estimator.base_job_name or base_name_from_image(
            self._estimator.train_image())
        self._estimator._current_job_name = name_from_base(base_name)

        # アウトプットを出力する場所が指定されていない場合には,ここで指定
        if self._estimator.output_path is None:
            self._estimator.output_path = 's3://{}/'.format(
                self._estimator.sagemaker_session.default_bucket())

        from sagemaker.estimator import _TrainingJob
        self._estimator.latest_training_job = _TrainingJob.start_new(
            self._estimator, inputs)
def test_framework_all_init_args(sagemaker_session):
    f = DummyFramework('my_script.py',
                       role='DummyRole',
                       train_instance_count=3,
                       train_instance_type='ml.m4.xlarge',
                       sagemaker_session=sagemaker_session,
                       train_volume_size=123,
                       train_volume_kms_key='volumekms',
                       train_max_run=456,
                       input_mode='inputmode',
                       output_path='outputpath',
                       output_kms_key='outputkms',
                       base_job_name='basejobname',
                       tags=[{
                           'foo': 'bar'
                       }],
                       subnets=['123', '456'],
                       security_group_ids=['789', '012'])
    _TrainingJob.start_new(f, 's3://mydata')
    sagemaker_session.train.assert_called_once()
    _, args = sagemaker_session.train.call_args
    assert args == {
        'input_mode':
        'inputmode',
        'tags': [{
            'foo': 'bar'
        }],
        'hyperparameters': {},
        'image':
        'fakeimage',
        'input_config': [{
            'ChannelName': 'training',
            'DataSource': {
                'S3DataSource': {
                    'S3DataType': 'S3Prefix',
                    'S3DataDistributionType': 'FullyReplicated',
                    'S3Uri': 's3://mydata'
                }
            }
        }],
        'output_config': {
            'KmsKeyId': 'outputkms',
            'S3OutputPath': 'outputpath'
        },
        'vpc_config': {
            'Subnets': ['123', '456'],
            'SecurityGroupIds': ['789', '012']
        },
        'stop_condition': {
            'MaxRuntimeInSeconds': 456
        },
        'role':
        sagemaker_session.expand_role(),
        'job_name':
        None,
        'resource_config': {
            'VolumeSizeInGB': 123,
            'InstanceCount': 3,
            'VolumeKmsKeyId': 'volumekms',
            'InstanceType': 'ml.m4.xlarge'
        }
    }