Example #1
0
    def hyperparameters(self):
        """Return hyperparameters used by your custom TensorFlow code during model training."""
        hyperparameters = super(TensorFlow, self).hyperparameters()

        self.checkpoint_path = self.checkpoint_path or self._default_s3_path(
            'checkpoints')

        if self._script_mode_enabled():
            self.model_dir = self.model_dir or self._default_s3_path('model')
            additional_hyperparameters = {'model_dir': self.model_dir}
            if 'parameter_server' in self.distributions:
                enabled = self.distributions['parameter_server'].get(
                    'enabled', False)
                additional_hyperparameters[self.LAUNCH_PS_ENV_NAME] = enabled
        else:
            additional_hyperparameters = {
                'checkpoint_path': self.checkpoint_path,
                'training_steps': self.training_steps,
                'evaluation_steps': self.evaluation_steps,
                'sagemaker_requirements': self.requirements_file
            }

        hyperparameters.update(
            Framework._json_encode_hyperparameters(additional_hyperparameters))
        return hyperparameters
    def hyperparameters(self):
        """Return hyperparameters used by your custom TensorFlow code during model training."""
        hyperparameters = super(TensorFlow, self).hyperparameters()

        self.checkpoint_path = self.checkpoint_path or self._default_s3_path('checkpoints')
        mpi_enabled = False

        if self._script_mode_enabled():
            additional_hyperparameters = {}

            if 'parameter_server' in self.distributions:
                ps_enabled = self.distributions['parameter_server'].get('enabled', False)
                additional_hyperparameters[self.LAUNCH_PS_ENV_NAME] = ps_enabled

            if 'mpi' in self.distributions:
                mpi_dict = self.distributions['mpi']
                mpi_enabled = mpi_dict.get('enabled', False)
                additional_hyperparameters[self.LAUNCH_MPI_ENV_NAME] = mpi_enabled
                additional_hyperparameters[self.MPI_NUM_PROCESSES_PER_HOST] = mpi_dict.get('processes_per_host', 1)
                additional_hyperparameters[self.MPI_CUSTOM_MPI_OPTIONS] = mpi_dict.get('custom_mpi_options', '')

            self.model_dir = self.model_dir or self._default_s3_path('model', mpi=mpi_enabled)
            additional_hyperparameters['model_dir'] = self.model_dir
        else:
            additional_hyperparameters = {'checkpoint_path': self.checkpoint_path,
                                          'training_steps': self.training_steps,
                                          'evaluation_steps': self.evaluation_steps,
                                          'sagemaker_requirements': self.requirements_file}

        hyperparameters.update(Framework._json_encode_hyperparameters(additional_hyperparameters))
        return hyperparameters
Example #3
0
    def hyperparameters(self):
        """Return hyperparameters used by your custom TensorFlow code during model training."""
        hyperparameters = super(TensorFlow, self).hyperparameters()
        additional_hyperparameters = {}

        if "parameter_server" in self.distribution:
            ps_enabled = self.distribution["parameter_server"].get(
                "enabled", False)
            additional_hyperparameters[self.LAUNCH_PS_ENV_NAME] = ps_enabled

        mpi_enabled = False
        if "mpi" in self.distribution:
            mpi_dict = self.distribution["mpi"]
            mpi_enabled = mpi_dict.get("enabled", False)
            additional_hyperparameters[self.LAUNCH_MPI_ENV_NAME] = mpi_enabled

            if mpi_dict.get("processes_per_host"):
                additional_hyperparameters[
                    self.MPI_NUM_PROCESSES_PER_HOST] = mpi_dict.get(
                        "processes_per_host")

            additional_hyperparameters[
                self.MPI_CUSTOM_MPI_OPTIONS] = mpi_dict.get(
                    "custom_mpi_options", "")

        if self.model_dir is not False:
            self.model_dir = self.model_dir or self._default_s3_path(
                "model", mpi=mpi_enabled)
            additional_hyperparameters["model_dir"] = self.model_dir

        hyperparameters.update(
            Framework._json_encode_hyperparameters(additional_hyperparameters))
        return hyperparameters
 def hyperparameters(self):
     """Return hyperparameters used by your custom PyTorch code during model training."""
     hyperparameters = super(HuggingFace, self).hyperparameters()
     additional_hyperparameters = self._distribution_configuration(
         distribution=self.distribution
     )
     hyperparameters.update(Framework._json_encode_hyperparameters(additional_hyperparameters))
     return hyperparameters
    def hyperparameters(self):
        """Return hyperparameters used by your custom PyTorch code during model training."""
        hyperparameters = super(HuggingFace, self).hyperparameters()
        distributed_training_hyperparameters = self._distribution_configuration(
            distribution=self.distribution)
        hyperparameters.update(
            Framework._json_encode_hyperparameters(
                distributed_training_hyperparameters))

        if self.compiler_config:
            training_compiler_hyperparameters = self.compiler_config._to_hyperparameter_dict(
            )
            hyperparameters.update(
                Framework._json_encode_hyperparameters(
                    training_compiler_hyperparameters))

        return hyperparameters
Example #6
0
    def hyperparameters(self):
        """Return hyperparameters used by your custom TensorFlow code during model training."""
        hyperparameters = super(RLEstimator, self).hyperparameters()

        additional_hyperparameters = {
            SAGEMAKER_OUTPUT_LOCATION: self.output_path,
            # TODO: can be applied to all other estimators
            SAGEMAKER_ESTIMATOR: SAGEMAKER_ESTIMATOR_VALUE,
        }

        hyperparameters.update(Framework._json_encode_hyperparameters(additional_hyperparameters))
        return hyperparameters
    def hyperparameters(self):
        """Return hyperparameters used by your custom Chainer code during training."""
        hyperparameters = super(Chainer, self).hyperparameters()

        additional_hyperparameters = {Chainer._use_mpi: self.use_mpi,
                                      Chainer._num_processes: self.num_processes,
                                      Chainer._process_slots_per_host: self.process_slots_per_host,
                                      Chainer._additional_mpi_options: self.additional_mpi_options}

        # remove unset keys.
        additional_hyperparameters = {k: v for k, v in additional_hyperparameters.items() if v}
        hyperparameters.update(Framework._json_encode_hyperparameters(additional_hyperparameters))
        return hyperparameters
    def hyperparameters(self):
        """Return hyperparameters used by your custom Chainer code during training."""
        hyperparameters = super(Chainer, self).hyperparameters()

        additional_hyperparameters = {Chainer._use_mpi: self.use_mpi,
                                      Chainer._num_processes: self.num_processes,
                                      Chainer._process_slots_per_host: self.process_slots_per_host,
                                      Chainer._additional_mpi_options: self.additional_mpi_options}

        # remove unset keys.
        additional_hyperparameters = {k: v for k, v in additional_hyperparameters.items() if v}
        hyperparameters.update(Framework._json_encode_hyperparameters(additional_hyperparameters))
        return hyperparameters
Example #9
0
    def hyperparameters(self):
        """Return hyperparameters used by your custom TensorFlow code during model training."""
        hyperparameters = super(TensorFlow, self).hyperparameters()
        additional_hyperparameters = {}

        if "parameter_server" in self.distribution:
            ps_enabled = self.distribution["parameter_server"].get(
                "enabled", False)
            additional_hyperparameters[self.LAUNCH_PS_ENV_NAME] = ps_enabled

        mpi_enabled = False
        if "mpi" in self.distribution:
            mpi_dict = self.distribution["mpi"]
            mpi_enabled = mpi_dict.get("enabled", False)
            additional_hyperparameters[self.LAUNCH_MPI_ENV_NAME] = mpi_enabled

            if mpi_dict.get("processes_per_host"):
                additional_hyperparameters[
                    self.MPI_NUM_PROCESSES_PER_HOST] = mpi_dict.get(
                        "processes_per_host")

            additional_hyperparameters[
                self.MPI_CUSTOM_MPI_OPTIONS] = mpi_dict.get(
                    "custom_mpi_options", "")

            if fw.get_mp_parameters(self.distribution):
                additional_hyperparameters[
                    "mp_parameters"] = fw.get_mp_parameters(self.distribution)

        elif "modelparallel" in self.distribution.get("smdistributed", {}):
            raise ValueError(
                "Cannot use Model Parallelism without MPI enabled!")

        if "smdistributed" in self.distribution:
            # smdistributed strategy selected
            smdistributed = self.distribution["smdistributed"]
            smdataparallel_enabled = smdistributed.get("dataparallel", {}).get(
                "enabled", False)
            additional_hyperparameters[
                self.LAUNCH_SM_DDP_ENV_NAME] = smdataparallel_enabled
            additional_hyperparameters[self.INSTANCE_TYPE] = self.instance_type

        if self.model_dir is not False:
            self.model_dir = self.model_dir or self._default_s3_path(
                "model", mpi=mpi_enabled)
            additional_hyperparameters["model_dir"] = self.model_dir

        hyperparameters.update(
            Framework._json_encode_hyperparameters(additional_hyperparameters))
        return hyperparameters
Example #10
0
    def hyperparameters(self):
        """Return hyperparameters used by your custom TensorFlow code during model training."""
        hyperparameters = super(TensorFlow, self).hyperparameters()
        additional_hyperparameters = self._distribution_configuration(
            self.distribution)

        if self.model_dir is not False:
            self.model_dir = self.model_dir or self._default_s3_path(
                "model",
                mpi=additional_hyperparameters.get(self.LAUNCH_MPI_ENV_NAME,
                                                   False))
            additional_hyperparameters["model_dir"] = self.model_dir

        hyperparameters.update(
            Framework._json_encode_hyperparameters(additional_hyperparameters))
        return hyperparameters
Example #11
0
def validate_estimator_customparams(estimator: Framework,
                                    mock_session: Session, descriptor,
                                    sagemaker_config: SageMakerExecutorConfig):
    assert estimator.metric_definitions == get_metric_definitions(descriptor)
    assert estimator.hyperparameters()
    assert estimator.source_dir == SOURCE_DIR
    assert estimator.entry_point == ScriptSourceDirectory.PYTHON_ENTRY_POINT
    assert estimator.sagemaker_session == mock_session
    assert estimator.image_name == descriptor.env.docker_image
    assert estimator.framework_version == descriptor.ml.framework_version
    assert estimator.train_instance_type == descriptor.hardware.instance_type
    assert estimator.train_instance_count == descriptor.hardware.distributed.num_instances
    assert estimator.role == sagemaker_config.sm_role
    assert estimator.output_path == f"s3://{sagemaker_config.s3_output_bucket}"
    assert estimator.security_group_ids == sagemaker_config.security_group_ids
    assert estimator.subnets == sagemaker_config.subnets
Example #12
0
    def hyperparameters(self):
        """Return hyperparameters used by your custom TensorFlow code during model training."""
        hyperparameters = super(TensorFlow, self).hyperparameters()

        self.checkpoint_path = self.checkpoint_path or self._default_s3_path(
            "checkpoints")
        mpi_enabled = False

        if self._script_mode_enabled():
            additional_hyperparameters = {}

            if "parameter_server" in self.distributions:
                ps_enabled = self.distributions["parameter_server"].get(
                    "enabled", False)
                additional_hyperparameters[
                    self.LAUNCH_PS_ENV_NAME] = ps_enabled

            if "mpi" in self.distributions:
                mpi_dict = self.distributions["mpi"]
                mpi_enabled = mpi_dict.get("enabled", False)
                additional_hyperparameters[
                    self.LAUNCH_MPI_ENV_NAME] = mpi_enabled

                if mpi_dict.get("processes_per_host"):
                    additional_hyperparameters[
                        self.MPI_NUM_PROCESSES_PER_HOST] = mpi_dict.get(
                            "processes_per_host")

                additional_hyperparameters[
                    self.MPI_CUSTOM_MPI_OPTIONS] = mpi_dict.get(
                        "custom_mpi_options", "")

            self.model_dir = self.model_dir or self._default_s3_path(
                "model", mpi=mpi_enabled)
            additional_hyperparameters["model_dir"] = self.model_dir
        else:
            additional_hyperparameters = {
                "checkpoint_path": self.checkpoint_path,
                "training_steps": self.training_steps,
                "evaluation_steps": self.evaluation_steps,
                "sagemaker_requirements": self.requirements_file,
            }

        hyperparameters.update(
            Framework._json_encode_hyperparameters(additional_hyperparameters))
        return hyperparameters
Example #13
0
    def hyperparameters(self):
        """Return hyperparameters used by your custom TensorFlow code during model training."""
        hyperparameters = super(MXNet, self).hyperparameters()

        if not self.checkpoint_path:
            local_code = get_config_value('local.local_code', self.sagemaker_session.config)
            if self.sagemaker_session.local_mode and local_code:
                self.checkpoint_path = '/opt/ml/shared/checkpoints'
            else:
                self.checkpoint_path = os.path.join(self. ,
                                                    self._current_job_name, 'checkpoints')


        additional_hyperparameters = {'checkpoint_path': self.checkpoint_path}

        hyperparameters.update(Framework._json_encode_hyperparameters(additional_hyperparameters))
        return hyperparameters            
    def hyperparameters(self):
        """Return hyperparameters used by your custom TensorFlow code during model training."""
        hyperparameters = super(TensorFlow, self).hyperparameters()

        if not self.checkpoint_path:
            self.checkpoint_path = os.path.join(self.output_path,
                                                self._current_job_name,
                                                'checkpoints')

        additional_hyperparameters = {
            'checkpoint_path': self.checkpoint_path,
            'training_steps': self.training_steps,
            'evaluation_steps': self.evaluation_steps
        }

        hyperparameters.update(
            Framework._json_encode_hyperparameters(additional_hyperparameters))
        return hyperparameters
    def hyperparameters(self):
        """Return hyperparameters used by your custom TensorFlow code during model training."""
        hyperparameters = super(TensorFlow, self).hyperparameters()

        if not self.checkpoint_path:
            local_code = get_config_value('local.local_code', self.sagemaker_session.config)
            if self.sagemaker_session.local_mode and local_code:
                self.checkpoint_path = '/opt/ml/shared/checkpoints'
            else:
                self.checkpoint_path = os.path.join(self.output_path,
                                                    self._current_job_name, 'checkpoints')

        additional_hyperparameters = {'checkpoint_path': self.checkpoint_path,
                                      'training_steps': self.training_steps,
                                      'evaluation_steps': self.evaluation_steps,
                                      'sagemaker_requirements': self.requirements_file}

        hyperparameters.update(Framework._json_encode_hyperparameters(additional_hyperparameters))
        return hyperparameters
Example #16
0
def test_attach_no_job_name_framework(sagemaker_session):
    with pytest.raises(ValueError) as error:
        Framework.attach(training_job_name=None,
                         sagemaker_session=sagemaker_session)
    assert 'must specify training_job name' in str(error)