예제 #1
0
    def test_init_aiplatform_with_encryption_key_name_and_create_default_metadata_store(
        self, create_default_metadata_store_mock
    ):
        aiplatform.init(
            project=_TEST_PROJECT,
            encryption_spec_key_name=_TEST_ENCRYPTION_KEY_NAME,
        )

        my_metadata_store = metadata_store._MetadataStore._create(
            encryption_spec_key_name=_TEST_ENCRYPTION_KEY_NAME,
        )

        expected_metadata_store = GapicMetadataStore(
            encryption_spec=_TEST_ENCRYPTION_SPEC,
        )

        create_default_metadata_store_mock.assert_called_once_with(
            parent=_TEST_PARENT,
            metadata_store_id=_TEST_DEFAULT_ID,
            metadata_store=expected_metadata_store,
        )

        expected_metadata_store.name = _TEST_DEFAULT_NAME
        assert my_metadata_store._gca_resource == expected_metadata_store
    def test_deploy_with_dedicated_resources(self, deploy_model_mock, sync):
        aiplatform.init(project=_TEST_PROJECT, location=_TEST_LOCATION)
        test_endpoint = models.Endpoint(_TEST_ENDPOINT_NAME)
        test_model = models.Model(_TEST_ID)
        test_endpoint.deploy(
            model=test_model,
            machine_type=_TEST_MACHINE_TYPE,
            accelerator_type=_TEST_ACCELERATOR_TYPE,
            accelerator_count=_TEST_ACCELERATOR_COUNT,
            sync=sync,
        )

        if not sync:
            test_endpoint.wait()

        expected_machine_spec = gca_machine_resources.MachineSpec(
            machine_type=_TEST_MACHINE_TYPE,
            accelerator_type=_TEST_ACCELERATOR_TYPE,
            accelerator_count=_TEST_ACCELERATOR_COUNT,
        )
        expected_dedicated_resources = gca_machine_resources.DedicatedResources(
            machine_spec=expected_machine_spec,
            min_replica_count=1,
            max_replica_count=1,
        )
        expected_deployed_model = gca_endpoint.DeployedModel(
            dedicated_resources=expected_dedicated_resources,
            model=test_model.resource_name,
            display_name=None,
        )
        deploy_model_mock.assert_called_once_with(
            endpoint=test_endpoint.resource_name,
            deployed_model=expected_deployed_model,
            traffic_split={"0": 100},
            metadata=(),
        )
    def test_list_artifacts(self, list_artifacts_mock):
        aiplatform.init(project=_TEST_PROJECT)

        filter = "test-filter"
        artifact_list = artifact._Artifact.list(
            filter=filter, metadata_store_id=_TEST_METADATA_STORE)

        expected_artifact = GapicArtifact(
            name=_TEST_ARTIFACT_NAME,
            schema_title=_TEST_SCHEMA_TITLE,
            schema_version=_TEST_SCHEMA_VERSION,
            display_name=_TEST_DISPLAY_NAME,
            description=_TEST_DESCRIPTION,
            metadata=_TEST_METADATA,
        )

        list_artifacts_mock.assert_called_once_with(
            request=ListArtifactsRequest(
                parent=_TEST_PARENT,
                filter=filter,
            ))
        assert len(artifact_list) == 2
        assert artifact_list[0]._gca_resource == expected_artifact
        assert artifact_list[1]._gca_resource == expected_artifact
    def test_run_called_twice_raises(self, mock_dataset_text, sync):
        aiplatform.init(project=_TEST_PROJECT)

        job = training_jobs.AutoMLTextTrainingJob(
            display_name=_TEST_DISPLAY_NAME,
            prediction_type="classification",
            multi_label=True,
        )

        job.run(
            dataset=mock_dataset_text,
            model_display_name=_TEST_MODEL_DISPLAY_NAME,
            training_fraction_split=_TEST_FRACTION_SPLIT_TRAINING,
            validation_fraction_split=_TEST_FRACTION_SPLIT_VALIDATION,
            test_fraction_split=_TEST_FRACTION_SPLIT_TEST,
            sync=sync,
        )

        with pytest.raises(RuntimeError):
            job.run(
                dataset=mock_dataset_text,
                model_display_name=_TEST_MODEL_DISPLAY_NAME,
                sync=sync,
            )
예제 #5
0
    def test_deploy(self, deploy_model_mock, sync):
        aiplatform.init(project=_TEST_PROJECT, location=_TEST_LOCATION)
        test_endpoint = models.Endpoint(_TEST_ENDPOINT_NAME)
        test_model = models.Model(_TEST_ID)
        test_endpoint.deploy(test_model, sync=sync)

        if not sync:
            test_endpoint.wait()

        automatic_resources = gca_machine_resources.AutomaticResources(
            min_replica_count=1,
            max_replica_count=1,
        )
        deployed_model = gca_endpoint.DeployedModel(
            automatic_resources=automatic_resources,
            model=test_model.resource_name,
            display_name=None,
        )
        deploy_model_mock.assert_called_once_with(
            endpoint=test_endpoint.resource_name,
            deployed_model=deployed_model,
            traffic_split={"0": 100},
            metadata=(),
        )
    def test_list_executions(self, list_executions_mock):
        aiplatform.init(project=_TEST_PROJECT)

        filter = "test-filter"
        execution_list = execution._Execution.list(
            filter=filter, metadata_store_id=_TEST_METADATA_STORE)

        expected_execution = GapicExecution(
            name=_TEST_EXECUTION_NAME,
            schema_title=_TEST_SCHEMA_TITLE,
            schema_version=_TEST_SCHEMA_VERSION,
            display_name=_TEST_DISPLAY_NAME,
            description=_TEST_DESCRIPTION,
            metadata=_TEST_METADATA,
        )

        list_executions_mock.assert_called_once_with(
            request=ListExecutionsRequest(
                parent=_TEST_PARENT,
                filter=filter,
            ))
        assert len(execution_list) == 2
        assert execution_list[0]._gca_resource == expected_execution
        assert execution_list[1]._gca_resource == expected_execution
def batch_serve_features_to_bq_sample(
    project: str,
    location: str,
    featurestore_name: str,
    bq_destination_output_uri: str,
    read_instances_uri: str,
    sync: bool = True,
):

    aiplatform.init(project=project, location=location)

    fs = aiplatform.featurestore.Featurestore(featurestore_name=featurestore_name)

    SERVING_FEATURE_IDS = {
        "users": ["age", "gender", "liked_genres"],
        "movies": ["title", "average_rating", "genres"],
    }

    fs.batch_serve_to_bq(
        bq_destination_output_uri=bq_destination_output_uri,
        serving_feature_ids=SERVING_FEATURE_IDS,
        read_instances_uri=read_instances_uri,
        sync=sync,
    )
def create_training_pipeline_image_classification_sample(
    project: str,
    display_name: str,
    dataset_id: int,
    location: str = "us-central1",
    model_display_name: str = None,
    training_fraction_split: float = 0.8,
    validation_fraction_split: float = 0.1,
    test_fraction_split: float = 0.1,
    budget_milli_node_hours: int = 8000,
    disable_early_stopping: bool = False,
    sync: bool = True,
):
    aiplatform.init(project=project, location=location)

    job = aiplatform.AutoMLImageTrainingJob(display_name=display_name)

    my_image_ds = aiplatform.ImageDataset(dataset_id)

    model = job.run(
        dataset=my_image_ds,
        model_display_name=model_display_name,
        training_fraction_split=training_fraction_split,
        validation_fraction_split=validation_fraction_split,
        test_fraction_split=test_fraction_split,
        budget_milli_node_hours=budget_milli_node_hours,
        disable_early_stopping=disable_early_stopping,
        sync=sync,
    )

    model.wait()

    print(model.display_name)
    print(model.resource_name)
    print(model.uri)
    return model
예제 #9
0
    def test_create_dataset(self, create_dataset_mock, sync):
        aiplatform.init(project=_TEST_PROJECT,
                        encryption_spec_key_name=_TEST_ENCRYPTION_KEY_NAME)

        my_dataset = datasets.VideoDataset.create(
            display_name=_TEST_DISPLAY_NAME,
            sync=sync,
        )

        if not sync:
            my_dataset.wait()

        expected_dataset = gca_dataset.Dataset(
            display_name=_TEST_DISPLAY_NAME,
            metadata_schema_uri=_TEST_METADATA_SCHEMA_URI_VIDEO,
            metadata=_TEST_NONTABULAR_DATASET_METADATA,
            encryption_spec=_TEST_ENCRYPTION_SPEC,
        )

        create_dataset_mock.assert_called_once_with(
            parent=_TEST_PARENT,
            dataset=expected_dataset,
            metadata=_TEST_REQUEST_METADATA,
        )
예제 #10
0
    def test_create_tensorboard_with_default_encryption_key(
        self, create_tensorboard_mock
    ):
        aiplatform.init(
            project=_TEST_PROJECT,
            encryption_spec_key_name=_TEST_ENCRYPTION_KEY_NAME,
        )

        tensorboard.Tensorboard.create(
            display_name=_TEST_DISPLAY_NAME,
            create_request_timeout=None,
        )

        expected_tensorboard = gca_tensorboard.Tensorboard(
            display_name=_TEST_DISPLAY_NAME,
            encryption_spec=_TEST_ENCRYPTION_SPEC,
        )

        create_tensorboard_mock.assert_called_once_with(
            parent=_TEST_PARENT,
            tensorboard=expected_tensorboard,
            metadata=_TEST_REQUEST_METADATA,
            timeout=None,
        )
    def test_create_tensorboard_experiment(self,
                                           create_tensorboard_experiment_mock,
                                           get_tensorboard_experiment_mock):

        aiplatform.init(project=_TEST_PROJECT, )

        tensorboard.TensorboardExperiment.create(
            tensorboard_experiment_id=_TEST_TENSORBOARD_EXPERIMENT_ID,
            tensorboard_name=_TEST_NAME,
            display_name=_TEST_DISPLAY_NAME,
        )

        expected_tensorboard_experiment = gca_tensorboard_experiment.TensorboardExperiment(
            display_name=_TEST_DISPLAY_NAME, )

        create_tensorboard_experiment_mock.assert_called_once_with(
            parent=_TEST_NAME,
            tensorboard_experiment=expected_tensorboard_experiment,
            tensorboard_experiment_id=_TEST_TENSORBOARD_EXPERIMENT_ID,
            metadata=_TEST_REQUEST_METADATA,
        )

        get_tensorboard_experiment_mock.assert_called_once_with(
            name=_TEST_TENSORBOARD_EXPERIMENT_NAME, retry=base._DEFAULT_RETRY)
예제 #12
0
    def test_init_aiplatform_with_encryption_key_name_and_create_endpoint(
            self, create_endpoint_mock, sync):
        aiplatform.init(
            project=_TEST_PROJECT,
            location=_TEST_LOCATION,
            encryption_spec_key_name=_TEST_ENCRYPTION_KEY_NAME,
        )
        my_endpoint = models.Endpoint.create(display_name=_TEST_DISPLAY_NAME,
                                             sync=sync)

        if not sync:
            my_endpoint.wait()

        expected_endpoint = gca_endpoint.Endpoint(
            display_name=_TEST_DISPLAY_NAME,
            encryption_spec=_TEST_ENCRYPTION_SPEC)
        create_endpoint_mock.assert_called_once_with(
            parent=_TEST_PARENT,
            endpoint=expected_endpoint,
            metadata=(),
        )

        expected_endpoint.name = _TEST_ENDPOINT_NAME
        assert my_endpoint._gca_resource == expected_endpoint
예제 #13
0
    def test_create_from_local_script(self, get_custom_job_mock,
                                      create_custom_job_mock, sync):
        aiplatform.init(
            project=_TEST_PROJECT,
            location=_TEST_LOCATION,
            staging_bucket=_TEST_STAGING_BUCKET,
            encryption_spec_key_name=_TEST_DEFAULT_ENCRYPTION_KEY_NAME,
        )

        # configuration on this is tested in test_training_jobs.py
        job = aiplatform.CustomJob.from_local_script(
            display_name=_TEST_DISPLAY_NAME,
            script_path=test_training_jobs._TEST_LOCAL_SCRIPT_FILE_NAME,
            container_uri=_TEST_TRAINING_CONTAINER_IMAGE,
            base_output_dir=_TEST_BASE_OUTPUT_DIR,
            labels=_TEST_LABELS,
        )

        job.run(sync=sync)

        job.wait()

        assert (job._gca_resource.state ==
                gca_job_state_compat.JobState.JOB_STATE_SUCCEEDED)
예제 #14
0
    def test_init_experiment_reset(self):
        aiplatform.init(
            project=_TEST_PROJECT, location=_TEST_LOCATION, experiment=_TEST_EXPERIMENT
        )
        aiplatform.start_run(_TEST_RUN)

        aiplatform.init(project=_TEST_PROJECT, location=_TEST_LOCATION)

        assert metadata.metadata_service.experiment_name == _TEST_EXPERIMENT
        assert metadata.metadata_service.run_name == _TEST_RUN

        aiplatform.init(project=_TEST_OTHER_PROJECT, location=_TEST_LOCATION)

        assert metadata.metadata_service.experiment_name is None
        assert metadata.metadata_service.run_name is None
예제 #15
0
class BQToVertexAIDataset(VertexAIWorker):
  """Worker to export a BigQuery table to a Vertex AI dataset."""

  PARAMS = [
      ('bq_project_id', 'string', True, '', 'BQ Project ID'),
      ('bq_dataset_id', 'string', True, '', 'BQ Dataset ID'),
      ('bq_table_id', 'string', True, '', 'BQ Table ID'),
      ('bq_dataset_location', 'string', True, '', 'BQ Dataset Location'),
      ('vertex_ai_dataset_name', 'string', False, '', 'Vertex AI Dataset Name'),
      ('clean_up', 'boolean', True, True, 'Clean Up'),
  ]

  aiplatform.init()

  def _execute(self):
    project_id = self._params['bq_project_id']
    dataset_id = self._params['bq_dataset_id']
    table_id = self._params['bq_table_id']
    if not self._params['vertex_ai_dataset_name']:
      display_name = f'{project_id}.{dataset_id}.{table_id}'
    else:
      display_name = self._params['vertex_ai_dataset_name']
    if self._params['clean_up']:
      try:
        datasets = aiplatform.TabularDataset.list(
          filter=f"display_name={display_name}",
          order_by="create_time")
        if datasets:
          for i, dataset in enumerate(datasets[:-1]):
            d = datasets[i]
            aiplatform.TabularDataset.delete(d)
            self.log_info(f'Deleted dataset: {d.resource_name}.')
      except Exception as e:
        self.log_info(f'Exception: {e}')
    dataset = aiplatform.TabularDataset.create(
      display_name=display_name,
      bq_source=f'bq://{project_id}.{dataset_id}.{table_id}')
    dataset.wait()
    self.log_info(f'Dataset created: {dataset.resource_name}')
    self.log_info('Finished successfully')
예제 #16
0
    def test_splits_filter(
        self,
        mock_pipeline_service_create,
        mock_pipeline_service_get,
        mock_dataset_image,
        mock_model_service_get,
        mock_model,
        sync,
    ):
        """
        Initiate aiplatform with encryption key name.
        Create and run an AutoML Video Classification training job, verify calls and return value
        """

        aiplatform.init(
            project=_TEST_PROJECT,
            encryption_spec_key_name=_TEST_DEFAULT_ENCRYPTION_KEY_NAME,
        )

        job = training_jobs.AutoMLImageTrainingJob(
            display_name=_TEST_DISPLAY_NAME, base_model=mock_model
        )

        model_from_job = job.run(
            dataset=mock_dataset_image,
            model_display_name=_TEST_MODEL_DISPLAY_NAME,
            training_filter_split=_TEST_FILTER_SPLIT_TRAINING,
            validation_filter_split=_TEST_FILTER_SPLIT_VALIDATION,
            test_filter_split=_TEST_FILTER_SPLIT_TEST,
            budget_milli_node_hours=_TEST_TRAINING_BUDGET_MILLI_NODE_HOURS,
            disable_early_stopping=_TEST_TRAINING_DISABLE_EARLY_STOPPING,
            sync=sync,
        )

        if not sync:
            model_from_job.wait()

        true_filter_split = gca_training_pipeline.FilterSplit(
            training_filter=_TEST_FILTER_SPLIT_TRAINING,
            validation_filter=_TEST_FILTER_SPLIT_VALIDATION,
            test_filter=_TEST_FILTER_SPLIT_TEST,
        )

        true_managed_model = gca_model.Model(
            display_name=_TEST_MODEL_DISPLAY_NAME,
            description=mock_model._gca_resource.description,
            encryption_spec=_TEST_DEFAULT_ENCRYPTION_SPEC,
        )

        true_input_data_config = gca_training_pipeline.InputDataConfig(
            filter_split=true_filter_split, dataset_id=mock_dataset_image.name,
        )

        true_training_pipeline = gca_training_pipeline.TrainingPipeline(
            display_name=_TEST_DISPLAY_NAME,
            training_task_definition=schema.training_job.definition.automl_image_classification,
            training_task_inputs=_TEST_TRAINING_TASK_INPUTS_WITH_BASE_MODEL,
            model_to_upload=true_managed_model,
            input_data_config=true_input_data_config,
            encryption_spec=_TEST_DEFAULT_ENCRYPTION_SPEC,
        )

        mock_pipeline_service_create.assert_called_once_with(
            parent=initializer.global_config.common_location_path(),
            training_pipeline=true_training_pipeline,
        )
    def test_submit_call_pipeline_service_pipeline_job_create_legacy(
        self,
        mock_pipeline_service_create,
        mock_pipeline_service_get,
        job_spec_json,
        mock_load_json,
    ):
        aiplatform.init(
            project=_TEST_PROJECT,
            staging_bucket=_TEST_GCS_BUCKET_NAME,
            location=_TEST_LOCATION,
            credentials=_TEST_CREDENTIALS,
        )

        job = pipeline_jobs.PipelineJob(
            display_name=_TEST_PIPELINE_JOB_DISPLAY_NAME,
            template_path=_TEST_TEMPLATE_PATH,
            job_id=_TEST_PIPELINE_JOB_ID,
            parameter_values=_TEST_PIPELINE_PARAMETER_VALUES_LEGACY,
            enable_caching=True,
        )

        job.submit(service_account=_TEST_SERVICE_ACCOUNT, network=_TEST_NETWORK)

        expected_runtime_config_dict = {
            "parameters": {"string_param": {"stringValue": "hello"}},
            "gcsOutputDirectory": _TEST_GCS_BUCKET_NAME,
        }
        runtime_config = gca_pipeline_job_v1.PipelineJob.RuntimeConfig()._pb
        json_format.ParseDict(expected_runtime_config_dict, runtime_config)

        pipeline_spec = job_spec_json.get("pipelineSpec") or job_spec_json

        # Construct expected request
        expected_gapic_pipeline_job = gca_pipeline_job_v1.PipelineJob(
            display_name=_TEST_PIPELINE_JOB_DISPLAY_NAME,
            pipeline_spec={
                "components": {},
                "pipelineInfo": pipeline_spec["pipelineInfo"],
                "root": pipeline_spec["root"],
                "schemaVersion": "2.0.0",
            },
            runtime_config=runtime_config,
            service_account=_TEST_SERVICE_ACCOUNT,
            network=_TEST_NETWORK,
        )

        mock_pipeline_service_create.assert_called_once_with(
            parent=_TEST_PARENT,
            pipeline_job=expected_gapic_pipeline_job,
            pipeline_job_id=_TEST_PIPELINE_JOB_ID,
        )

        assert not mock_pipeline_service_get.called

        job.wait()

        mock_pipeline_service_get.assert_called_with(
            name=_TEST_PIPELINE_JOB_NAME, retry=base._DEFAULT_RETRY
        )

        assert job._gca_resource == make_pipeline_job(
            gca_pipeline_state_v1.PipelineState.PIPELINE_STATE_SUCCEEDED
        )
예제 #18
0
    def test_splits_filter(
        self,
        mock_pipeline_service_create,
        mock_pipeline_service_get,
        mock_dataset_text,
        mock_model_service_get,
        mock_model,
        sync,
    ):
        """
        Initiate aiplatform with encryption key name.
        Create and run an AutoML Video Classification training job, verify calls and return value
        """

        aiplatform.init(
            project=_TEST_PROJECT,
            encryption_spec_key_name=_TEST_DEFAULT_ENCRYPTION_KEY_NAME,
        )

        job = training_jobs.AutoMLTextTrainingJob(
            display_name=_TEST_DISPLAY_NAME,
            prediction_type=_TEST_PREDICTION_TYPE_CLASSIFICATION,
            multi_label=_TEST_CLASSIFICATION_MULTILABEL,
        )

        model_from_job = job.run(
            dataset=mock_dataset_text,
            model_display_name=_TEST_MODEL_DISPLAY_NAME,
            training_filter_split=_TEST_FILTER_SPLIT_TRAINING,
            validation_filter_split=_TEST_FILTER_SPLIT_VALIDATION,
            test_filter_split=_TEST_FILTER_SPLIT_TEST,
            sync=sync,
            create_request_timeout=None,
        )

        if not sync:
            model_from_job.wait()

        true_filter_split = gca_training_pipeline.FilterSplit(
            training_filter=_TEST_FILTER_SPLIT_TRAINING,
            validation_filter=_TEST_FILTER_SPLIT_VALIDATION,
            test_filter=_TEST_FILTER_SPLIT_TEST,
        )

        true_managed_model = gca_model.Model(
            display_name=_TEST_MODEL_DISPLAY_NAME,
            description=mock_model._gca_resource.description,
            encryption_spec=_TEST_DEFAULT_ENCRYPTION_SPEC,
        )

        true_input_data_config = gca_training_pipeline.InputDataConfig(
            filter_split=true_filter_split,
            dataset_id=mock_dataset_text.name,
        )

        true_training_pipeline = gca_training_pipeline.TrainingPipeline(
            display_name=_TEST_DISPLAY_NAME,
            training_task_definition=schema.training_job.definition.
            automl_text_classification,
            training_task_inputs=_TEST_TRAINING_TASK_INPUTS_CLASSIFICATION,
            model_to_upload=true_managed_model,
            input_data_config=true_input_data_config,
            encryption_spec=_TEST_DEFAULT_ENCRYPTION_SPEC,
        )

        mock_pipeline_service_create.assert_called_once_with(
            parent=initializer.global_config.common_location_path(),
            training_pipeline=true_training_pipeline,
            timeout=None,
        )
예제 #19
0
    def test_run_call_pipeline_service_create_sentiment(
        self,
        mock_pipeline_service_create,
        mock_pipeline_service_get,
        mock_dataset_text,
        mock_model_service_get,
        sync,
    ):
        """Create and run an AutoML Text Sentiment training job, verify calls and return value"""

        aiplatform.init(project=_TEST_PROJECT)

        job = training_jobs.AutoMLTextTrainingJob(
            display_name=_TEST_DISPLAY_NAME,
            labels=_TEST_LABELS,
            prediction_type=_TEST_PREDICTION_TYPE_SENTIMENT,
            sentiment_max=10,
        )

        model_from_job = job.run(
            dataset=mock_dataset_text,
            model_display_name=_TEST_MODEL_DISPLAY_NAME,
            model_labels=_TEST_MODEL_LABELS,
            training_filter_split=_TEST_FILTER_SPLIT_TRAINING,
            validation_filter_split=_TEST_FILTER_SPLIT_VALIDATION,
            test_filter_split=_TEST_FILTER_SPLIT_TEST,
            sync=sync,
            create_request_timeout=None,
        )

        if not sync:
            model_from_job.wait()

        true_filter_split = gca_training_pipeline.FilterSplit(
            training_filter=_TEST_FILTER_SPLIT_TRAINING,
            validation_filter=_TEST_FILTER_SPLIT_VALIDATION,
            test_filter=_TEST_FILTER_SPLIT_TEST,
        )

        true_managed_model = gca_model.Model(
            display_name=_TEST_MODEL_DISPLAY_NAME, labels=_TEST_MODEL_LABELS)

        true_input_data_config = gca_training_pipeline.InputDataConfig(
            filter_split=true_filter_split,
            dataset_id=mock_dataset_text.name,
        )

        true_training_pipeline = gca_training_pipeline.TrainingPipeline(
            display_name=_TEST_DISPLAY_NAME,
            labels=_TEST_LABELS,
            training_task_definition=schema.training_job.definition.
            automl_text_sentiment,
            training_task_inputs=_TEST_TRAINING_TASK_INPUTS_SENTIMENT,
            model_to_upload=true_managed_model,
            input_data_config=true_input_data_config,
        )

        mock_pipeline_service_create.assert_called_once_with(
            parent=initializer.global_config.common_location_path(),
            training_pipeline=true_training_pipeline,
            timeout=None,
        )

        mock_model_service_get.assert_called_once_with(
            name=_TEST_MODEL_NAME, retry=base._DEFAULT_RETRY)
        assert job._gca_resource is mock_pipeline_service_get.return_value
        assert model_from_job._gca_resource is mock_model_service_get.return_value
        assert job.get_model(
        )._gca_resource is mock_model_service_get.return_value
        assert not job.has_failed
        assert job.state == gca_pipeline_state.PipelineState.PIPELINE_STATE_SUCCEEDED
 def test_init_artifact_with_id(self, get_artifact_mock):
     aiplatform.init(project=_TEST_PROJECT, location=_TEST_LOCATION)
     artifact._Artifact(resource_name=_TEST_ARTIFACT_ID,
                        metadata_store_id=_TEST_METADATA_STORE)
     get_artifact_mock.assert_called_once_with(name=_TEST_ARTIFACT_NAME,
                                               retry=base._DEFAULT_RETRY)
 def test_init_artifact(self, get_artifact_mock):
     aiplatform.init(project=_TEST_PROJECT)
     artifact._Artifact(resource_name=_TEST_ARTIFACT_NAME)
     get_artifact_mock.assert_called_once_with(name=_TEST_ARTIFACT_NAME,
                                               retry=base._DEFAULT_RETRY)
 def test_init_execution_with_id(self, get_execution_mock):
     aiplatform.init(project=_TEST_PROJECT, location=_TEST_LOCATION)
     execution._Execution(resource_name=_TEST_EXECUTION_ID,
                          metadata_store_id=_TEST_METADATA_STORE)
     get_execution_mock.assert_called_once_with(name=_TEST_EXECUTION_NAME,
                                                retry=base._DEFAULT_RETRY)
 def test_init_execution(self, get_execution_mock):
     aiplatform.init(project=_TEST_PROJECT)
     execution._Execution(resource_name=_TEST_EXECUTION_NAME)
     get_execution_mock.assert_called_once_with(name=_TEST_EXECUTION_NAME,
                                                retry=base._DEFAULT_RETRY)
 def test_init_context(self, get_context_mock):
     aiplatform.init(project=_TEST_PROJECT)
     context._Context(resource_name=_TEST_CONTEXT_NAME)
     get_context_mock.assert_called_once_with(name=_TEST_CONTEXT_NAME,
                                              retry=base._DEFAULT_RETRY)
    def test_run_called_twice_raises(
        self,
        mock_dataset_time_series,
        sync,
    ):
        aiplatform.init(project=_TEST_PROJECT,
                        staging_bucket=_TEST_BUCKET_NAME)

        job = AutoMLForecastingTrainingJob(
            display_name=_TEST_DISPLAY_NAME,
            optimization_objective=_TEST_TRAINING_OPTIMIZATION_OBJECTIVE_NAME,
            column_transformations=_TEST_TRAINING_COLUMN_TRANSFORMATIONS,
        )

        job.run(
            dataset=mock_dataset_time_series,
            target_column=_TEST_TRAINING_TARGET_COLUMN,
            time_column=_TEST_TRAINING_TIME_COLUMN,
            time_series_identifier_column=
            _TEST_TRAINING_TIME_SERIES_IDENTIFIER_COLUMN,
            unavailable_at_forecast_columns=
            _TEST_TRAINING_UNAVAILABLE_AT_FORECAST_COLUMNS,
            available_at_forecast_columns=
            _TEST_TRAINING_AVAILABLE_AT_FORECAST_COLUMNS,
            forecast_horizon=_TEST_TRAINING_FORECAST_HORIZON,
            data_granularity_unit=_TEST_TRAINING_DATA_GRANULARITY_UNIT,
            data_granularity_count=_TEST_TRAINING_DATA_GRANULARITY_COUNT,
            model_display_name=_TEST_MODEL_DISPLAY_NAME,
            weight_column=_TEST_TRAINING_WEIGHT_COLUMN,
            time_series_attribute_columns=
            _TEST_TRAINING_TIME_SERIES_ATTRIBUTE_COLUMNS,
            context_window=_TEST_TRAINING_CONTEXT_WINDOW,
            budget_milli_node_hours=_TEST_TRAINING_BUDGET_MILLI_NODE_HOURS,
            export_evaluated_data_items=
            _TEST_TRAINING_EXPORT_EVALUATED_DATA_ITEMS,
            export_evaluated_data_items_bigquery_destination_uri=
            _TEST_TRAINING_EXPORT_EVALUATED_DATA_ITEMS_BIGQUERY_DESTINATION_URI,
            export_evaluated_data_items_override_destination=
            _TEST_TRAINING_EXPORT_EVALUATED_DATA_ITEMS_OVERRIDE_DESTINATION,
            quantiles=_TEST_TRAINING_QUANTILES,
            validation_options=_TEST_TRAINING_VALIDATION_OPTIONS,
            sync=sync,
        )

        with pytest.raises(RuntimeError):
            job.run(
                dataset=mock_dataset_time_series,
                target_column=_TEST_TRAINING_TARGET_COLUMN,
                time_column=_TEST_TRAINING_TIME_COLUMN,
                time_series_identifier_column=
                _TEST_TRAINING_TIME_SERIES_IDENTIFIER_COLUMN,
                unavailable_at_forecast_columns=
                _TEST_TRAINING_UNAVAILABLE_AT_FORECAST_COLUMNS,
                available_at_forecast_columns=
                _TEST_TRAINING_AVAILABLE_AT_FORECAST_COLUMNS,
                forecast_horizon=_TEST_TRAINING_FORECAST_HORIZON,
                data_granularity_unit=_TEST_TRAINING_DATA_GRANULARITY_UNIT,
                data_granularity_count=_TEST_TRAINING_DATA_GRANULARITY_COUNT,
                model_display_name=_TEST_MODEL_DISPLAY_NAME,
                weight_column=_TEST_TRAINING_WEIGHT_COLUMN,
                time_series_attribute_columns=
                _TEST_TRAINING_TIME_SERIES_ATTRIBUTE_COLUMNS,
                context_window=_TEST_TRAINING_CONTEXT_WINDOW,
                budget_milli_node_hours=_TEST_TRAINING_BUDGET_MILLI_NODE_HOURS,
                export_evaluated_data_items=
                _TEST_TRAINING_EXPORT_EVALUATED_DATA_ITEMS,
                export_evaluated_data_items_bigquery_destination_uri=
                _TEST_TRAINING_EXPORT_EVALUATED_DATA_ITEMS_BIGQUERY_DESTINATION_URI,
                export_evaluated_data_items_override_destination=
                _TEST_TRAINING_EXPORT_EVALUATED_DATA_ITEMS_OVERRIDE_DESTINATION,
                quantiles=_TEST_TRAINING_QUANTILES,
                validation_options=_TEST_TRAINING_VALIDATION_OPTIONS,
                sync=sync,
            )
    def test_run_call_pipeline_if_no_model_display_name(
        self,
        mock_pipeline_service_create,
        mock_dataset_time_series,
        mock_model_service_get,
        sync,
    ):
        aiplatform.init(project=_TEST_PROJECT,
                        staging_bucket=_TEST_BUCKET_NAME)

        job = AutoMLForecastingTrainingJob(
            display_name=_TEST_DISPLAY_NAME,
            optimization_objective=_TEST_TRAINING_OPTIMIZATION_OBJECTIVE_NAME,
            column_transformations=_TEST_TRAINING_COLUMN_TRANSFORMATIONS,
        )

        model_from_job = job.run(
            dataset=mock_dataset_time_series,
            target_column=_TEST_TRAINING_TARGET_COLUMN,
            time_column=_TEST_TRAINING_TIME_COLUMN,
            time_series_identifier_column=
            _TEST_TRAINING_TIME_SERIES_IDENTIFIER_COLUMN,
            unavailable_at_forecast_columns=
            _TEST_TRAINING_UNAVAILABLE_AT_FORECAST_COLUMNS,
            available_at_forecast_columns=
            _TEST_TRAINING_AVAILABLE_AT_FORECAST_COLUMNS,
            forecast_horizon=_TEST_TRAINING_FORECAST_HORIZON,
            data_granularity_unit=_TEST_TRAINING_DATA_GRANULARITY_UNIT,
            data_granularity_count=_TEST_TRAINING_DATA_GRANULARITY_COUNT,
            weight_column=_TEST_TRAINING_WEIGHT_COLUMN,
            time_series_attribute_columns=
            _TEST_TRAINING_TIME_SERIES_ATTRIBUTE_COLUMNS,
            context_window=_TEST_TRAINING_CONTEXT_WINDOW,
            budget_milli_node_hours=_TEST_TRAINING_BUDGET_MILLI_NODE_HOURS,
            export_evaluated_data_items=
            _TEST_TRAINING_EXPORT_EVALUATED_DATA_ITEMS,
            export_evaluated_data_items_bigquery_destination_uri=
            _TEST_TRAINING_EXPORT_EVALUATED_DATA_ITEMS_BIGQUERY_DESTINATION_URI,
            export_evaluated_data_items_override_destination=
            _TEST_TRAINING_EXPORT_EVALUATED_DATA_ITEMS_OVERRIDE_DESTINATION,
            quantiles=_TEST_TRAINING_QUANTILES,
            validation_options=_TEST_TRAINING_VALIDATION_OPTIONS,
            sync=sync,
        )

        if not sync:
            model_from_job.wait()

        true_fraction_split = gca_training_pipeline.FractionSplit(
            training_fraction=_TEST_TRAINING_FRACTION_SPLIT,
            validation_fraction=_TEST_VALIDATION_FRACTION_SPLIT,
            test_fraction=_TEST_TEST_FRACTION_SPLIT,
        )

        # Test that if defaults to the job display name
        true_managed_model = gca_model.Model(display_name=_TEST_DISPLAY_NAME)

        true_input_data_config = gca_training_pipeline.InputDataConfig(
            fraction_split=true_fraction_split,
            dataset_id=mock_dataset_time_series.name,
        )

        true_training_pipeline = gca_training_pipeline.TrainingPipeline(
            display_name=_TEST_DISPLAY_NAME,
            training_task_definition=schema.training_job.definition.
            automl_forecasting,
            training_task_inputs=_TEST_TRAINING_TASK_INPUTS,
            model_to_upload=true_managed_model,
            input_data_config=true_input_data_config,
        )

        mock_pipeline_service_create.assert_called_once_with(
            parent=initializer.global_config.common_location_path(),
            training_pipeline=true_training_pipeline,
        )
    def test_create_hyperparameter_tuning_job_with_enable_web_access(
        self,
        create_hyperparameter_tuning_job_mock_with_enable_web_access,
        get_hyperparameter_tuning_job_mock_with_enable_web_access,
        sync,
        caplog,
    ):
        caplog.set_level(logging.INFO)

        aiplatform.init(
            project=_TEST_PROJECT,
            location=_TEST_LOCATION,
            staging_bucket=_TEST_STAGING_BUCKET,
            encryption_spec_key_name=_TEST_DEFAULT_ENCRYPTION_KEY_NAME,
        )

        custom_job = aiplatform.CustomJob(
            display_name=test_custom_job._TEST_DISPLAY_NAME,
            worker_pool_specs=test_custom_job._TEST_WORKER_POOL_SPEC,
            base_output_dir=test_custom_job._TEST_BASE_OUTPUT_DIR,
        )

        job = aiplatform.HyperparameterTuningJob(
            display_name=_TEST_DISPLAY_NAME,
            custom_job=custom_job,
            metric_spec={_TEST_METRIC_SPEC_KEY: _TEST_METRIC_SPEC_VALUE},
            parameter_spec={
                "lr":
                hpt.DoubleParameterSpec(min=0.001, max=0.1, scale="log"),
                "units":
                hpt.IntegerParameterSpec(min=4, max=1028, scale="linear"),
                "activation":
                hpt.CategoricalParameterSpec(
                    values=["relu", "sigmoid", "elu", "selu", "tanh"]),
                "batch_size":
                hpt.DiscreteParameterSpec(values=[16, 32], scale="linear"),
            },
            parallel_trial_count=_TEST_PARALLEL_TRIAL_COUNT,
            max_trial_count=_TEST_MAX_TRIAL_COUNT,
            max_failed_trial_count=_TEST_MAX_FAILED_TRIAL_COUNT,
            search_algorithm=_TEST_SEARCH_ALGORITHM,
            measurement_selection=_TEST_MEASUREMENT_SELECTION,
            labels=_TEST_LABELS,
        )

        job.run(
            service_account=_TEST_SERVICE_ACCOUNT,
            network=_TEST_NETWORK,
            timeout=_TEST_TIMEOUT,
            restart_job_on_worker_restart=_TEST_RESTART_JOB_ON_WORKER_RESTART,
            enable_web_access=test_custom_job._TEST_ENABLE_WEB_ACCESS,
            sync=sync,
        )

        job.wait()

        assert "workerpool0-0" in caplog.text

        expected_hyperparameter_tuning_job = (
            _get_hyperparameter_tuning_job_proto_with_enable_web_access())

        create_hyperparameter_tuning_job_mock_with_enable_web_access.assert_called_once_with(
            parent=_TEST_PARENT,
            hyperparameter_tuning_job=expected_hyperparameter_tuning_job,
        )

        assert job.state == gca_job_state_compat.JobState.JOB_STATE_SUCCEEDED
        assert job.network == _TEST_NETWORK
        assert job.trials == []

        caplog.clear()
예제 #28
0
 def setup_method(self):
     reload(initializer)
     reload(aiplatform)
     aiplatform.init(project=_TEST_PROJECT, location=_TEST_LOCATION)
예제 #29
0
    def test_batch_predict_with_all_args(
        self, create_batch_prediction_job_with_explanations_mock, sync
    ):
        aiplatform.init(project=_TEST_PROJECT, location=_TEST_LOCATION)
        creds = auth_credentials.AnonymousCredentials()

        batch_prediction_job = jobs.BatchPredictionJob.create(
            model_name=_TEST_MODEL_NAME,
            job_display_name=_TEST_BATCH_PREDICTION_JOB_DISPLAY_NAME,
            gcs_source=_TEST_BATCH_PREDICTION_GCS_SOURCE,
            gcs_destination_prefix=_TEST_BATCH_PREDICTION_GCS_DEST_PREFIX,
            predictions_format="csv",
            model_parameters={},
            machine_type=_TEST_MACHINE_TYPE,
            accelerator_type=_TEST_ACCELERATOR_TYPE,
            accelerator_count=_TEST_ACCELERATOR_COUNT,
            starting_replica_count=_TEST_STARTING_REPLICA_COUNT,
            max_replica_count=_TEST_MAX_REPLICA_COUNT,
            generate_explanation=True,
            explanation_metadata=_TEST_EXPLANATION_METADATA,
            explanation_parameters=_TEST_EXPLANATION_PARAMETERS,
            labels=_TEST_LABEL,
            credentials=creds,
            sync=sync,
            create_request_timeout=None,
        )

        batch_prediction_job.wait_for_resource_creation()

        batch_prediction_job.wait()

        # Construct expected request
        expected_gapic_batch_prediction_job = gca_batch_prediction_job_compat.BatchPredictionJob(
            display_name=_TEST_BATCH_PREDICTION_JOB_DISPLAY_NAME,
            model=_TEST_MODEL_NAME,
            input_config=gca_batch_prediction_job_compat.BatchPredictionJob.InputConfig(
                instances_format="jsonl",
                gcs_source=gca_io_compat.GcsSource(
                    uris=[_TEST_BATCH_PREDICTION_GCS_SOURCE]
                ),
            ),
            output_config=gca_batch_prediction_job_compat.BatchPredictionJob.OutputConfig(
                gcs_destination=gca_io_compat.GcsDestination(
                    output_uri_prefix=_TEST_BATCH_PREDICTION_GCS_DEST_PREFIX
                ),
                predictions_format="csv",
            ),
            dedicated_resources=gca_machine_resources_compat.BatchDedicatedResources(
                machine_spec=gca_machine_resources_compat.MachineSpec(
                    machine_type=_TEST_MACHINE_TYPE,
                    accelerator_type=_TEST_ACCELERATOR_TYPE,
                    accelerator_count=_TEST_ACCELERATOR_COUNT,
                ),
                starting_replica_count=_TEST_STARTING_REPLICA_COUNT,
                max_replica_count=_TEST_MAX_REPLICA_COUNT,
            ),
            generate_explanation=True,
            explanation_spec=gca_explanation_compat.ExplanationSpec(
                metadata=_TEST_EXPLANATION_METADATA,
                parameters=_TEST_EXPLANATION_PARAMETERS,
            ),
            labels=_TEST_LABEL,
        )

        create_batch_prediction_job_with_explanations_mock.assert_called_once_with(
            parent=f"projects/{_TEST_PROJECT}/locations/{_TEST_LOCATION}",
            batch_prediction_job=expected_gapic_batch_prediction_job,
            timeout=None,
        )
    def test_run_hyperparameter_tuning_job_with_fail_at_creation(self):
        aiplatform.init(
            project=_TEST_PROJECT,
            location=_TEST_LOCATION,
            staging_bucket=_TEST_STAGING_BUCKET,
            encryption_spec_key_name=_TEST_DEFAULT_ENCRYPTION_KEY_NAME,
        )

        custom_job = aiplatform.CustomJob(
            display_name=test_custom_job._TEST_DISPLAY_NAME,
            worker_pool_specs=test_custom_job._TEST_WORKER_POOL_SPEC,
            base_output_dir=test_custom_job._TEST_BASE_OUTPUT_DIR,
        )

        job = aiplatform.HyperparameterTuningJob(
            display_name=_TEST_DISPLAY_NAME,
            custom_job=custom_job,
            metric_spec={_TEST_METRIC_SPEC_KEY: _TEST_METRIC_SPEC_VALUE},
            parameter_spec={
                "lr":
                hpt.DoubleParameterSpec(min=0.001, max=0.1, scale="log"),
                "units":
                hpt.IntegerParameterSpec(min=4, max=1028, scale="linear"),
                "activation":
                hpt.CategoricalParameterSpec(
                    values=["relu", "sigmoid", "elu", "selu", "tanh"]),
                "batch_size":
                hpt.DiscreteParameterSpec(values=[16, 32], scale="linear"),
            },
            parallel_trial_count=_TEST_PARALLEL_TRIAL_COUNT,
            max_trial_count=_TEST_MAX_TRIAL_COUNT,
            max_failed_trial_count=_TEST_MAX_FAILED_TRIAL_COUNT,
            search_algorithm=_TEST_SEARCH_ALGORITHM,
            measurement_selection=_TEST_MEASUREMENT_SELECTION,
        )

        job.run(
            service_account=_TEST_SERVICE_ACCOUNT,
            network=_TEST_NETWORK,
            timeout=_TEST_TIMEOUT,
            restart_job_on_worker_restart=_TEST_RESTART_JOB_ON_WORKER_RESTART,
            sync=False,
        )

        with pytest.raises(RuntimeError) as e:
            job.wait_for_resource_creation()
        assert e.match("Mock fail")

        with pytest.raises(RuntimeError) as e:
            job.resource_name
        assert e.match(
            "HyperparameterTuningJob resource has not been created. Resource failed with: Mock fail"
        )

        with pytest.raises(RuntimeError) as e:
            job.network
        assert e.match(
            "HyperparameterTuningJob resource has not been created. Resource failed with: Mock fail"
        )

        with pytest.raises(RuntimeError) as e:
            job.trials
        assert e.match(
            "HyperparameterTuningJob resource has not been created. Resource failed with: Mock fail"
        )