Python CustomJob Beispiele, google.cloud.aiplatform.CustomJob Python Beispiele

Beispiel #1

0

Datei anzeigen

    def test_run_hyperparameter_tuning_job_with_fail_raises(
        self,
        create_hyperparameter_tuning_job_mock,
        get_hyperparameter_tuning_job_mock_with_fail,
        sync,
    ):
        aiplatform.init(
            project=_TEST_PROJECT,
            location=_TEST_LOCATION,
            staging_bucket=_TEST_STAGING_BUCKET,
            encryption_spec_key_name=_TEST_DEFAULT_ENCRYPTION_KEY_NAME,
        )

        custom_job = aiplatform.CustomJob(
            display_name=test_custom_job._TEST_DISPLAY_NAME,
            worker_pool_specs=test_custom_job._TEST_WORKER_POOL_SPEC,
            base_output_dir=test_custom_job._TEST_BASE_OUTPUT_DIR,
        )

        job = aiplatform.HyperparameterTuningJob(
            display_name=_TEST_DISPLAY_NAME,
            custom_job=custom_job,
            metric_spec={_TEST_METRIC_SPEC_KEY: _TEST_METRIC_SPEC_VALUE},
            parameter_spec={
                "lr": hpt.DoubleParameterSpec(min=0.001, max=0.1, scale="log"),
                "units": hpt.IntegerParameterSpec(min=4, max=1028, scale="linear"),
                "activation": hpt.CategoricalParameterSpec(
                    values=["relu", "sigmoid", "elu", "selu", "tanh"]
                ),
                "batch_size": hpt.DiscreteParameterSpec(
                    values=[16, 32], scale="linear"
                ),
            },
            parallel_trial_count=_TEST_PARALLEL_TRIAL_COUNT,
            max_trial_count=_TEST_MAX_TRIAL_COUNT,
            max_failed_trial_count=_TEST_MAX_FAILED_TRIAL_COUNT,
            search_algorithm=_TEST_SEARCH_ALGORITHM,
            measurement_selection=_TEST_MEASUREMENT_SELECTION,
            labels=_TEST_LABELS,
        )

        with pytest.raises(RuntimeError):
            job.run(
                service_account=_TEST_SERVICE_ACCOUNT,
                network=_TEST_NETWORK,
                timeout=_TEST_TIMEOUT,
                restart_job_on_worker_restart=_TEST_RESTART_JOB_ON_WORKER_RESTART,
                sync=sync,
            )

            job.wait()

        expected_hyperparameter_tuning_job = _get_hyperparameter_tuning_job_proto()

        create_hyperparameter_tuning_job_mock.assert_called_once_with(
            parent=_TEST_PARENT,
            hyperparameter_tuning_job=expected_hyperparameter_tuning_job,
        )

        assert job._gca_resource.state == gca_job_state_compat.JobState.JOB_STATE_FAILED

Beispiel #2

0

Datei anzeigen

Datei: tuning_lightweight_component.py Projekt: maneerali-tudip/mlops-on-gcp

def tune_hyperparameters(
    project: str, location: str, container_uri: str, training_file_path: str,
    validation_file_path: str, staging_bucket: str, max_trial_count: int,
    parallel_trial_count: int
) -> NamedTuple('Outputs', [("best_accuracy", float), ("best_alpha", float),
                            ("best_max_iter", int)]):
    from google.cloud import aiplatform
    from google.cloud.aiplatform import hyperparameter_tuning as hpt

    aiplatform.init(project=project,
                    location=location,
                    staging_bucket=staging_bucket)

    worker_pool_specs = [{
        "machine_spec": {
            "machine_type": "n1-standard-4",
            "accelerator_type": "NVIDIA_TESLA_K80",
            "accelerator_count": 1,
        },
        "replica_count": 1,
        "container_spec": {
            "image_uri":
            container_uri,
            "args": [
                f"--training_dataset_path={training_file_path}",
                f"--validation_dataset_path={validation_file_path}", "--hptune"
            ],
        },
    }]

    custom_job = aiplatform.CustomJob(display_name='covertype_kfp_trial_job',
                                      worker_pool_specs=worker_pool_specs)

    hp_job = aiplatform.HyperparameterTuningJob(
        display_name='covertype_kfp_tuning_job',
        custom_job=custom_job,
        metric_spec={
            'accuracy': 'maximize',
        },
        parameter_spec={
            'alpha':
            hpt.DoubleParameterSpec(min=1.0e-4, max=1.0e-1, scale='linear'),
            'max_iter':
            hpt.DiscreteParameterSpec(values=[1, 2], scale='linear')
        },
        max_trial_count=max_trial_count,
        parallel_trial_count=parallel_trial_count,
    )

    hp_job.run()

    metrics = [
        trial.final_measurement.metrics[0].value for trial in hp_job.trials
    ]
    best_trial = hp_job.trials[metrics.index(max(metrics))]
    best_accuracy = float(best_trial.final_measurement.metrics[0].value)
    best_alpha = float(best_trial.parameters[0].value)
    best_max_iter = int(best_trial.parameters[1].value)

    return best_accuracy, best_alpha, best_max_iter

Beispiel #3

0

Datei anzeigen

Datei: test_custom_job.py Projekt: vinnysenthil/python-aiplatform

    def test_create_custom_job_with_tensorboard(self,
                                                create_custom_job_v1beta1_mock,
                                                get_custom_job_mock, sync):

        aiplatform.init(
            project=_TEST_PROJECT,
            location=_TEST_LOCATION,
            staging_bucket=_TEST_STAGING_BUCKET,
            encryption_spec_key_name=_TEST_DEFAULT_ENCRYPTION_KEY_NAME,
        )

        job = aiplatform.CustomJob(display_name=_TEST_DISPLAY_NAME,
                                   worker_pool_specs=_TEST_WORKER_POOL_SPEC)

        job.run(
            service_account=_TEST_SERVICE_ACCOUNT,
            tensorboard=_TEST_TENSORBOARD_NAME,
            network=_TEST_NETWORK,
            timeout=_TEST_TIMEOUT,
            restart_job_on_worker_restart=_TEST_RESTART_JOB_ON_WORKER_RESTART,
            sync=sync,
        )

        job.wait()

        expected_custom_job = _get_custom_job_proto(version="v1beta1")

        create_custom_job_v1beta1_mock.assert_called_once_with(
            parent=_TEST_PARENT, custom_job=expected_custom_job)

        expected_custom_job = _get_custom_job_proto()

        assert job.job_spec == expected_custom_job.job_spec
        assert (job._gca_resource.state ==
                gca_job_state_compat.JobState.JOB_STATE_SUCCEEDED)

Beispiel #4

0

Datei anzeigen

Datei: test_custom_job.py Projekt: vinnysenthil/python-aiplatform

    def test_run_custom_job_with_fail_raises(self, create_custom_job_mock,
                                             get_custom_job_mock_with_fail,
                                             sync):
        aiplatform.init(
            project=_TEST_PROJECT,
            location=_TEST_LOCATION,
            staging_bucket=_TEST_STAGING_BUCKET,
            encryption_spec_key_name=_TEST_DEFAULT_ENCRYPTION_KEY_NAME,
        )

        job = aiplatform.CustomJob(display_name=_TEST_DISPLAY_NAME,
                                   worker_pool_specs=_TEST_WORKER_POOL_SPEC)

        with pytest.raises(RuntimeError):
            job.run(
                service_account=_TEST_SERVICE_ACCOUNT,
                network=_TEST_NETWORK,
                timeout=_TEST_TIMEOUT,
                restart_job_on_worker_restart=
                _TEST_RESTART_JOB_ON_WORKER_RESTART,
                sync=sync,
            )

            job.wait()

        expected_custom_job = _get_custom_job_proto()

        create_custom_job_mock.assert_called_once_with(
            parent=_TEST_PARENT, custom_job=expected_custom_job)

        assert job.job_spec == expected_custom_job.job_spec
        assert job._gca_resource.state == gca_job_state_compat.JobState.JOB_STATE_FAILED

Beispiel #5

0

Datei anzeigen

    def test_check_custom_job_availability(self):
        aiplatform.init(
            project=_TEST_PROJECT,
            location=_TEST_LOCATION,
            staging_bucket=_TEST_STAGING_BUCKET,
            encryption_spec_key_name=_TEST_DEFAULT_ENCRYPTION_KEY_NAME,
        )

        job = aiplatform.CustomJob(
            display_name=_TEST_DISPLAY_NAME,
            worker_pool_specs=_TEST_WORKER_POOL_SPEC,
            base_output_dir=_TEST_BASE_OUTPUT_DIR,
            labels=_TEST_LABELS,
        )

        assert not job._resource_is_available
        assert job.__repr__().startswith(
            "<google.cloud.aiplatform.jobs.CustomJob object")

        job.run(
            service_account=_TEST_SERVICE_ACCOUNT,
            network=_TEST_NETWORK,
            timeout=_TEST_TIMEOUT,
            restart_job_on_worker_restart=_TEST_RESTART_JOB_ON_WORKER_RESTART,
        )

        job.wait_for_resource_creation()

        assert job._resource_is_available
        assert "resource name" in job.__repr__()

        job.wait()

Beispiel #6

0

Datei anzeigen

    def test_no_staging_bucket_raises(self):

        aiplatform.init(project=_TEST_PROJECT, location=_TEST_LOCATION)

        with pytest.raises(RuntimeError):
            job = aiplatform.CustomJob(  # noqa: F841
                display_name=_TEST_DISPLAY_NAME,
                worker_pool_specs=_TEST_WORKER_POOL_SPEC,
            )

Beispiel #7

0

Datei anzeigen

    def test_create_custom_job_with_enable_web_access(
        self,
        create_custom_job_mock_with_enable_web_access,
        get_custom_job_mock_with_enable_web_access,
        sync,
        caplog,
    ):
        caplog.set_level(logging.INFO)

        aiplatform.init(
            project=_TEST_PROJECT,
            location=_TEST_LOCATION,
            staging_bucket=_TEST_STAGING_BUCKET,
            encryption_spec_key_name=_TEST_DEFAULT_ENCRYPTION_KEY_NAME,
        )

        job = aiplatform.CustomJob(
            display_name=_TEST_DISPLAY_NAME,
            worker_pool_specs=_TEST_WORKER_POOL_SPEC,
            base_output_dir=_TEST_BASE_OUTPUT_DIR,
            labels=_TEST_LABELS,
        )

        job.run(
            enable_web_access=_TEST_ENABLE_WEB_ACCESS,
            service_account=_TEST_SERVICE_ACCOUNT,
            network=_TEST_NETWORK,
            timeout=_TEST_TIMEOUT,
            restart_job_on_worker_restart=_TEST_RESTART_JOB_ON_WORKER_RESTART,
            sync=sync,
            create_request_timeout=None,
        )

        job.wait_for_resource_creation()

        job.wait()

        assert "workerpool0-0" in caplog.text

        assert job.resource_name == _TEST_CUSTOM_JOB_NAME

        expected_custom_job = _get_custom_job_proto_with_enable_web_access()

        create_custom_job_mock_with_enable_web_access.assert_called_once_with(
            parent=_TEST_PARENT,
            custom_job=expected_custom_job,
            timeout=None,
        )

        assert job.job_spec == expected_custom_job.job_spec
        assert (job._gca_resource.state ==
                gca_job_state_compat.JobState.JOB_STATE_SUCCEEDED)
        caplog.clear()

Beispiel #8

0

Datei anzeigen

Datei: test_custom_job.py Projekt: vinnysenthil/python-aiplatform

    def test_custom_job_get_state_raises_without_run(self):
        aiplatform.init(
            project=_TEST_PROJECT,
            location=_TEST_LOCATION,
            staging_bucket=_TEST_STAGING_BUCKET,
            encryption_spec_key_name=_TEST_DEFAULT_ENCRYPTION_KEY_NAME,
        )

        job = aiplatform.CustomJob(display_name=_TEST_DISPLAY_NAME,
                                   worker_pool_specs=_TEST_WORKER_POOL_SPEC)

        with pytest.raises(RuntimeError):
            print(job.state)

Beispiel #9

0

Datei anzeigen

    def test_run_custom_job_with_fail_raises(self, create_custom_job_mock,
                                             get_custom_job_mock_with_fail,
                                             sync):
        aiplatform.init(
            project=_TEST_PROJECT,
            location=_TEST_LOCATION,
            staging_bucket=_TEST_STAGING_BUCKET,
            encryption_spec_key_name=_TEST_DEFAULT_ENCRYPTION_KEY_NAME,
        )

        job = aiplatform.CustomJob(
            display_name=_TEST_DISPLAY_NAME,
            worker_pool_specs=_TEST_WORKER_POOL_SPEC,
            base_output_dir=_TEST_BASE_OUTPUT_DIR,
            labels=_TEST_LABELS,
        )

        with pytest.raises(RuntimeError) as e:
            job.wait_for_resource_creation()
        assert e.match(r"CustomJob resource is not scheduled to be created.")

        with pytest.raises(RuntimeError):
            job.run(
                service_account=_TEST_SERVICE_ACCOUNT,
                network=_TEST_NETWORK,
                timeout=_TEST_TIMEOUT,
                restart_job_on_worker_restart=
                _TEST_RESTART_JOB_ON_WORKER_RESTART,
                sync=sync,
                create_request_timeout=None,
            )

            job.wait()

        # shouldn't fail
        job.wait_for_resource_creation()
        assert job.resource_name == _TEST_CUSTOM_JOB_NAME

        expected_custom_job = _get_custom_job_proto()

        create_custom_job_mock.assert_called_once_with(
            parent=_TEST_PARENT,
            custom_job=expected_custom_job,
            timeout=None,
        )

        assert job.job_spec == expected_custom_job.job_spec
        assert job.state == gca_job_state_compat.JobState.JOB_STATE_FAILED

Beispiel #10

0

Datei anzeigen

    def test_create_custom_job_without_base_output_dir(self, ):

        aiplatform.init(
            project=_TEST_PROJECT,
            location=_TEST_LOCATION,
            staging_bucket=_TEST_STAGING_BUCKET,
            encryption_spec_key_name=_TEST_DEFAULT_ENCRYPTION_KEY_NAME,
        )

        job = aiplatform.CustomJob(
            display_name=_TEST_DISPLAY_NAME,
            worker_pool_specs=_TEST_WORKER_POOL_SPEC,
        )

        assert job.job_spec.base_output_directory.output_uri_prefix.startswith(
            f"{_TEST_STAGING_BUCKET}/aiplatform-custom-job")

Beispiel #11

0

Datei anzeigen

Datei: test_project_id_inference.py Projekt: morgandu/python-aiplatform

    def test_project_id_inference(self, shared_state):
        # Collection of resources generated by this test, to be deleted during teardown
        shared_state["resources"] = []

        aiplatform.init(
            project=e2e_base._PROJECT,
            location=e2e_base._LOCATION,
            staging_bucket=shared_state["staging_bucket_name"],
        )

        worker_pool_specs = [{
            "machine_spec": {
                "machine_type": "n1-standard-4"
            },
            "replica_count": 1,
            "container_spec": {
                "image_uri":
                "python:3.9",
                "command": [
                    "sh",
                    "-exc",
                    """python3 -m pip install git+https://github.com/googleapis/python-aiplatform@main
                            "$0" "$@"
                            """,
                    "python3",
                    "-c",
                    _SCRIPT,
                ],
                "args": [],
            },
        }]

        custom_job = aiplatform.CustomJob(
            display_name=self._make_display_name("custom"),
            worker_pool_specs=worker_pool_specs,
        )
        custom_job.run()

        shared_state["resources"].append(custom_job)

        assert custom_job.state == gca_job_state.JobState.JOB_STATE_SUCCEEDED

Beispiel #12

0

Datei anzeigen

    def test_create_custom_job(self, create_custom_job_mock,
                               get_custom_job_mock, sync):

        aiplatform.init(
            project=_TEST_PROJECT,
            location=_TEST_LOCATION,
            staging_bucket=_TEST_STAGING_BUCKET,
            encryption_spec_key_name=_TEST_DEFAULT_ENCRYPTION_KEY_NAME,
        )

        job = aiplatform.CustomJob(
            display_name=_TEST_DISPLAY_NAME,
            worker_pool_specs=_TEST_WORKER_POOL_SPEC,
            base_output_dir=_TEST_BASE_OUTPUT_DIR,
            labels=_TEST_LABELS,
        )

        job.run(
            service_account=_TEST_SERVICE_ACCOUNT,
            network=_TEST_NETWORK,
            timeout=_TEST_TIMEOUT,
            restart_job_on_worker_restart=_TEST_RESTART_JOB_ON_WORKER_RESTART,
            sync=sync,
        )

        job.wait_for_resource_creation()

        assert job.resource_name == _TEST_CUSTOM_JOB_NAME

        job.wait()

        expected_custom_job = _get_custom_job_proto()

        create_custom_job_mock.assert_called_once_with(
            parent=_TEST_PARENT, custom_job=expected_custom_job)

        assert job.job_spec == expected_custom_job.job_spec
        assert (job._gca_resource.state ==
                gca_job_state_compat.JobState.JOB_STATE_SUCCEEDED)
        assert job.network == _TEST_NETWORK

Beispiel #13

0

Datei anzeigen

Datei: test_hyperparameter_tuning_job.py Projekt: googleapis/python-aiplatform

    def test_hyperparameter_tuning_job_get_state_raises_without_run(self):
        aiplatform.init(
            project=_TEST_PROJECT,
            location=_TEST_LOCATION,
            staging_bucket=_TEST_STAGING_BUCKET,
            encryption_spec_key_name=_TEST_DEFAULT_ENCRYPTION_KEY_NAME,
        )

        custom_job = aiplatform.CustomJob(
            display_name=test_custom_job._TEST_DISPLAY_NAME,
            worker_pool_specs=test_custom_job._TEST_WORKER_POOL_SPEC,
            base_output_dir=test_custom_job._TEST_BASE_OUTPUT_DIR,
        )

        job = aiplatform.HyperparameterTuningJob(
            display_name=_TEST_DISPLAY_NAME,
            custom_job=custom_job,
            metric_spec={_TEST_METRIC_SPEC_KEY: _TEST_METRIC_SPEC_VALUE},
            parameter_spec={
                "lr":
                hpt.DoubleParameterSpec(min=0.001, max=0.1, scale="log"),
                "units":
                hpt.IntegerParameterSpec(min=4, max=1028, scale="linear"),
                "activation":
                hpt.CategoricalParameterSpec(
                    values=["relu", "sigmoid", "elu", "selu", "tanh"]),
                "batch_size":
                hpt.DiscreteParameterSpec(values=[16, 32, 64], scale="linear"),
            },
            parallel_trial_count=_TEST_PARALLEL_TRIAL_COUNT,
            max_trial_count=_TEST_MAX_TRIAL_COUNT,
            max_failed_trial_count=_TEST_MAX_FAILED_TRIAL_COUNT,
            search_algorithm=_TEST_SEARCH_ALGORITHM,
            measurement_selection=_TEST_MEASUREMENT_SELECTION,
        )

        with pytest.raises(RuntimeError):
            print(job.state)

Beispiel #14

0

Datei anzeigen

    def test_run_custom_job_with_fail_at_creation(self):
        aiplatform.init(
            project=_TEST_PROJECT,
            location=_TEST_LOCATION,
            staging_bucket=_TEST_STAGING_BUCKET,
            encryption_spec_key_name=_TEST_DEFAULT_ENCRYPTION_KEY_NAME,
        )

        job = aiplatform.CustomJob(
            display_name=_TEST_DISPLAY_NAME,
            worker_pool_specs=_TEST_WORKER_POOL_SPEC,
            base_output_dir=_TEST_BASE_OUTPUT_DIR,
        )

        job.run(
            service_account=_TEST_SERVICE_ACCOUNT,
            network=_TEST_NETWORK,
            timeout=_TEST_TIMEOUT,
            restart_job_on_worker_restart=_TEST_RESTART_JOB_ON_WORKER_RESTART,
            sync=False,
        )

        with pytest.raises(RuntimeError) as e:
            job.wait_for_resource_creation()
        assert e.match("Mock fail")

        with pytest.raises(RuntimeError) as e:
            job.resource_name
        assert e.match(
            "CustomJob resource has not been created. Resource failed with: Mock fail"
        )

        with pytest.raises(RuntimeError) as e:
            job.network
        assert e.match(
            "CustomJob resource has not been created. Resource failed with: Mock fail"
        )

Beispiel #15

0

Datei anzeigen

    def test_create_hyperparameter_tuning_job_with_enable_web_access(
        self,
        create_hyperparameter_tuning_job_mock_with_enable_web_access,
        get_hyperparameter_tuning_job_mock_with_enable_web_access,
        sync,
        caplog,
    ):
        caplog.set_level(logging.INFO)

        aiplatform.init(
            project=_TEST_PROJECT,
            location=_TEST_LOCATION,
            staging_bucket=_TEST_STAGING_BUCKET,
            encryption_spec_key_name=_TEST_DEFAULT_ENCRYPTION_KEY_NAME,
        )

        custom_job = aiplatform.CustomJob(
            display_name=test_custom_job._TEST_DISPLAY_NAME,
            worker_pool_specs=test_custom_job._TEST_WORKER_POOL_SPEC,
            base_output_dir=test_custom_job._TEST_BASE_OUTPUT_DIR,
        )

        job = aiplatform.HyperparameterTuningJob(
            display_name=_TEST_DISPLAY_NAME,
            custom_job=custom_job,
            metric_spec={_TEST_METRIC_SPEC_KEY: _TEST_METRIC_SPEC_VALUE},
            parameter_spec={
                "lr":
                hpt.DoubleParameterSpec(min=0.001, max=0.1, scale="log"),
                "units":
                hpt.IntegerParameterSpec(min=4, max=1028, scale="linear"),
                "activation":
                hpt.CategoricalParameterSpec(
                    values=["relu", "sigmoid", "elu", "selu", "tanh"]),
                "batch_size":
                hpt.DiscreteParameterSpec(values=[16, 32], scale="linear"),
            },
            parallel_trial_count=_TEST_PARALLEL_TRIAL_COUNT,
            max_trial_count=_TEST_MAX_TRIAL_COUNT,
            max_failed_trial_count=_TEST_MAX_FAILED_TRIAL_COUNT,
            search_algorithm=_TEST_SEARCH_ALGORITHM,
            measurement_selection=_TEST_MEASUREMENT_SELECTION,
            labels=_TEST_LABELS,
        )

        job.run(
            service_account=_TEST_SERVICE_ACCOUNT,
            network=_TEST_NETWORK,
            timeout=_TEST_TIMEOUT,
            restart_job_on_worker_restart=_TEST_RESTART_JOB_ON_WORKER_RESTART,
            enable_web_access=test_custom_job._TEST_ENABLE_WEB_ACCESS,
            sync=sync,
            create_request_timeout=None,
        )

        job.wait()

        assert "workerpool0-0" in caplog.text

        expected_hyperparameter_tuning_job = (
            _get_hyperparameter_tuning_job_proto_with_enable_web_access())

        create_hyperparameter_tuning_job_mock_with_enable_web_access.assert_called_once_with(
            parent=_TEST_PARENT,
            hyperparameter_tuning_job=expected_hyperparameter_tuning_job,
            timeout=None,
        )

        assert job.state == gca_job_state_compat.JobState.JOB_STATE_SUCCEEDED
        assert job.network == _TEST_NETWORK
        assert job.trials == []

        caplog.clear()

Beispiel #16

0

Datei anzeigen

Datei: component.py Projekt: tomar27/pipelines

def hyperparameter_tuning_job_run_op(
    display_name: str,
    project: str,
    base_output_directory: str,
    worker_pool_specs: list,
    study_spec_metrics: dict,
    study_spec_parameters: list,
    max_trial_count: int,
    parallel_trial_count: int,
    max_failed_trial_count: int = 0,
    location: str = "us-central1",
    study_spec_algorithm: str = "ALGORITHM_UNSPECIFIED",
    study_spec_measurement_selection_type: str = "BEST_MEASUREMENT",
    encryption_spec_key_name: str = None,
    service_account: str = None,
    network: str = None,
) -> NamedTuple('Outputs', [
    ("trials", list),
]):
    """
    Creates a Google Cloud AI Platform HyperparameterTuning Job and waits for it to complete.

    For example usage, see https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/experimental/hyperparameter_tuning_job/hp_tuning_job_sample.ipynb.

    For more information on using hyperparameter tuning, please visit:
    https://cloud.google.com/vertex-ai/docs/training/using-hyperparameter-tuning

    Args:
    Creates a Google Cloud AI Platform HyperparameterTuning Job and waits for it to complete.

    For example usage, see https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/experimental/hyperparameter_tuning_job/hp_tuning_job_sample.ipynb.

    For more information on using hyperparameter tuning, please visit:
    https://cloud.google.com/vertex-ai/docs/training/using-hyperparameter-tuning

    Args:
        display_name (str):
            Required. The user-defined name of the HyperparameterTuningJob.
            The name can be up to 128 characters long and can be consist
            of any UTF-8 characters.
        project (str):
            Required. Project to run the HyperparameterTuningJob in.
        base_output_directory (str):
            Required. The Cloud Storage location to store the output of this
            HyperparameterTuningJob. The base_output_directory of each
            child CustomJob backing a Trial is set to a subdirectory
            with name as the trial id under its parent HyperparameterTuningJob's
            base_output_directory. The following Vertex AI environment
            variables will be passed to containers or python modules
            when this field is set:
            For CustomJob backing a Trial of HyperparameterTuningJob:
            * AIP_MODEL_DIR = `\/\/model\/`
            * AIP_CHECKPOINT_DIR = `\/\/checkpoints\/`
            * AIP_TENSORBOARD_LOG_DIR = `\/\/logs\/`
        worker_pool_specs (List[Dict]):
            Required. The spec of the worker pools including machine type and Docker image.
            All worker pools except the first one are optional and can be skipped by providing
            an empty value.
        study_spec_metrics: (Dict[str, str]):
            Required. Dictionary representing metrics to optimize. The dictionary key is the metric_id,
            which is reported by your training job, and the dictionary value is the
            optimization goal of the metric ('minimize' or 'maximize'). example:
            metrics = {'loss': 'minimize', 'accuracy': 'maximize'}
        study_spec_parameters (list[str]):
            Required. List serialized from the parameter dictionary. The dictionary
            represents parameters to optimize. The dictionary key is the parameter_id,
            which is passed into your training job as a command line key word argument, and the
            dictionary value is the parameter specification of the metric.
            from google.cloud.aiplatform import hyperparameter_tuning as hpt
            from google_cloud_pipeline_components.experimental import hyperparameter_tuning_job
            parameters = hyperparameter_tuning_job.serialize_parameters({
                'lr': hpt.DoubleParameterSpec(min=0.001, max=0.1, scale='log'),
                'units': hpt.IntegerParameterSpec(min=4, max=128, scale='linear'),
                'activation': hpt.CategoricalParameterSpec(values=['relu', 'selu']),
                'batch_size': hpt.DiscreteParameterSpec(values=[128, 256], scale='linear')
            })
            Supported parameter specifications can be found in aiplatform.hyperparameter_tuning.
            These parameter specification are currently supported:
            DoubleParameterSpec, IntegerParameterSpec, CategoricalParameterSpace, DiscreteParameterSpec
        max_trial_count (int):
            Required. The desired total number of Trials.
        parallel_trial_count (int):
            Required. The desired number of Trials to run in parallel.
        max_failed_trial_count (Optional[int]):
            The number of failed Trials that need to be
            seen before failing the HyperparameterTuningJob.
            If set to 0, Vertex AI decides how many Trials
            must fail before the whole job fails.
        location (Optional[str]):
            Location to run the HyperparameterTuningJob in, defaults
            to "us-central1"
        study_spec_algorithm (Optional[str]):
            The search algorithm specified for the Study.
            Accepts one of the following:
                * `ALGORITHM_UNSPECIFIED` - If you do not specify an algorithm,
                your job uses the default Vertex AI algorithm. The default
                algorithm applies Bayesian optimization to arrive at the optimal
                solution with a more effective search over the parameter space.
                * 'GRID_SEARCH' - A simple grid search within the feasible space.
                This option is particularly useful if you want to specify a
                quantity of trials that is greater than the number of points in
                the feasible space. In such cases, if you do not specify a grid
                search, the Vertex AI default algorithm may generate duplicate
                suggestions. To use grid search, all parameter specs must be
                of type `IntegerParameterSpec`, `CategoricalParameterSpace`,
                or `DiscreteParameterSpec`.
                * 'RANDOM_SEARCH' - A simple random search within the feasible
                space.
        study_spec_measurement_selection_type (Optional[str]):
            This indicates which measurement to use if/when the service
            automatically selects the final measurement from previously reported
            intermediate measurements.
            Accepts: 'BEST_MEASUREMENT', 'LAST_MEASUREMENT'
            Choose this based on two considerations:
            A) Do you expect your measurements to monotonically improve? If so,
            choose 'LAST_MEASUREMENT'. On the other hand, if you're in a situation
            where your system can "over-train" and you expect the performance to
            get better for a while but then start declining, choose
            'BEST_MEASUREMENT'. B) Are your measurements significantly noisy
            and/or irreproducible? If so, 'BEST_MEASUREMENT' will tend to be
            over-optimistic, and it may be better to choose 'LAST_MEASUREMENT'. If
            both or neither of (A) and (B) apply, it doesn't matter which
            selection type is chosen.
        encryption_spec_key_name (Optional[str]):
            Customer-managed encryption key options for a
            HyperparameterTuningJob. If this is set, then
            all resources created by the
            HyperparameterTuningJob will be encrypted with
            the provided encryption key.

            Has the form:
            ``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``.
            The key needs to be in the same region as where the compute
            resource is created.
        service_account (Optional[str]):
            Specifies the service account for workload run-as account.
            Users submitting jobs must have act-as permission on this run-as account.
        network (Optional[str]):
            The full name of the Compute Engine network to which the job
            should be peered. For example, projects/12345/global/networks/myVPC.
            Private services access must already be configured for the network.
            If left unspecified, the job is not peered with any network.
    Returns:
        List of HyperparameterTuningJob trials
    """
    from google.cloud import aiplatform
    from google.cloud.aiplatform import hyperparameter_tuning as hpt
    from google.cloud.aiplatform_v1.types import study
    from google.cloud.aiplatform.hyperparameter_tuning import _SCALE_TYPE_MAP

    # Reverse the _SCALE_TYPE_MAP dict for deserialization
    SCALE_MAP = dict((reversed(item) for item in _SCALE_TYPE_MAP.items()))

    PARAMETER_SPEC_MAP = {
        hpt.DoubleParameterSpec._parameter_spec_value_key: hpt.DoubleParameterSpec,
        hpt.IntegerParameterSpec._parameter_spec_value_key: hpt.IntegerParameterSpec,
        hpt.CategoricalParameterSpec._parameter_spec_value_key: hpt.CategoricalParameterSpec,
        hpt.DiscreteParameterSpec._parameter_spec_value_key: hpt.DiscreteParameterSpec,
    }

    ALGORITHM_MAP = {
        'ALGORITHM_UNSPECIFIED': None,
        'GRID_SEARCH': 'grid',
        'RANDOM_SEARCH': 'random',
    }

    MEASUREMENT_SELECTION_TYPE_MAP = {
        'BEST_MEASUREMENT': 'best',
        'LAST_MEASUREMENT': 'last',
    }

    aiplatform.init(project=project, location=location,
                staging_bucket=base_output_directory)

    # Deserialize the parameters
    parameters_kwargs = {}
    for parameter in study_spec_parameters:
        param = study.StudySpec.ParameterSpec.from_json(parameter)
        parameter_id = param.parameter_id
        param_attrs = {}
        for parameter_spec_value_key, parameter_spec in PARAMETER_SPEC_MAP.items():
            if getattr(param, parameter_spec_value_key):
                attrs = getattr(param, parameter_spec_value_key)
                for parameter, value in parameter_spec._parameter_value_map:
                    if hasattr(attrs, value):
                        param_attrs[parameter] = getattr(attrs, value)
                # Detect 'scale' in list of arguments to parameter_spec.__init__
                param_spec_code = parameter_spec.__init__.__code__
                if 'scale' in param_spec_code.co_varnames[:param_spec_code.co_argcount]:
                    param_attrs['scale'] = SCALE_MAP[param.scale_type]
                parameters_kwargs[parameter_id] = parameter_spec(
                    **param_attrs)  # pytype: disable=wrong-keyword-args
                break

    custom_job_display_name = display_name + '_custom_job'

    job = aiplatform.CustomJob(
        display_name=custom_job_display_name,
        staging_bucket=base_output_directory,
        worker_pool_specs=worker_pool_specs,
    )

    hp_job = aiplatform.HyperparameterTuningJob(
        display_name=display_name,
        custom_job=job,
        metric_spec=study_spec_metrics,
        parameter_spec={
            **parameters_kwargs
        },
        max_trial_count=max_trial_count,
        parallel_trial_count=parallel_trial_count,
        max_failed_trial_count=max_failed_trial_count,
        search_algorithm=ALGORITHM_MAP[study_spec_algorithm],
        measurement_selection=MEASUREMENT_SELECTION_TYPE_MAP[
            study_spec_measurement_selection_type
        ],
        encryption_spec_key_name=encryption_spec_key_name
    )

    hp_job.run(
        service_account=service_account,
        network=network)

    trials = [study.Trial.to_json(trial) for trial in hp_job.trials]

    return trials  # pytype: disable=bad-return-type

Beispiel #17

0

Datei anzeigen

Datei: test_hyperparameter_tuning_job.py Projekt: googleapis/python-aiplatform

    def test_run_hyperparameter_tuning_job_with_fail_at_creation(self):
        aiplatform.init(
            project=_TEST_PROJECT,
            location=_TEST_LOCATION,
            staging_bucket=_TEST_STAGING_BUCKET,
            encryption_spec_key_name=_TEST_DEFAULT_ENCRYPTION_KEY_NAME,
        )

        custom_job = aiplatform.CustomJob(
            display_name=test_custom_job._TEST_DISPLAY_NAME,
            worker_pool_specs=test_custom_job._TEST_WORKER_POOL_SPEC,
            base_output_dir=test_custom_job._TEST_BASE_OUTPUT_DIR,
        )

        job = aiplatform.HyperparameterTuningJob(
            display_name=_TEST_DISPLAY_NAME,
            custom_job=custom_job,
            metric_spec={_TEST_METRIC_SPEC_KEY: _TEST_METRIC_SPEC_VALUE},
            parameter_spec={
                "lr":
                hpt.DoubleParameterSpec(min=0.001, max=0.1, scale="log"),
                "units":
                hpt.IntegerParameterSpec(min=4, max=1028, scale="linear"),
                "activation":
                hpt.CategoricalParameterSpec(
                    values=["relu", "sigmoid", "elu", "selu", "tanh"]),
                "batch_size":
                hpt.DiscreteParameterSpec(values=[16, 32], scale="linear"),
            },
            parallel_trial_count=_TEST_PARALLEL_TRIAL_COUNT,
            max_trial_count=_TEST_MAX_TRIAL_COUNT,
            max_failed_trial_count=_TEST_MAX_FAILED_TRIAL_COUNT,
            search_algorithm=_TEST_SEARCH_ALGORITHM,
            measurement_selection=_TEST_MEASUREMENT_SELECTION,
        )

        job.run(
            service_account=_TEST_SERVICE_ACCOUNT,
            network=_TEST_NETWORK,
            timeout=_TEST_TIMEOUT,
            restart_job_on_worker_restart=_TEST_RESTART_JOB_ON_WORKER_RESTART,
            sync=False,
        )

        with pytest.raises(RuntimeError) as e:
            job.wait_for_resource_creation()
        assert e.match("Mock fail")

        with pytest.raises(RuntimeError) as e:
            job.resource_name
        assert e.match(
            "HyperparameterTuningJob resource has not been created. Resource failed with: Mock fail"
        )

        with pytest.raises(RuntimeError) as e:
            job.network
        assert e.match(
            "HyperparameterTuningJob resource has not been created. Resource failed with: Mock fail"
        )

        with pytest.raises(RuntimeError) as e:
            job.trials
        assert e.match(
            "HyperparameterTuningJob resource has not been created. Resource failed with: Mock fail"
        )