def create( cls, display_name: str, gcs_source: Optional[Union[str, Sequence[str]]] = None, bq_source: Optional[str] = None, project: Optional[str] = None, location: Optional[str] = None, credentials: Optional[auth_credentials.Credentials] = None, request_metadata: Optional[Sequence[Tuple[str, str]]] = (), encryption_spec_key_name: Optional[str] = None, sync: bool = True, ) -> "TabularDataset": """Creates a new tabular dataset. Args: display_name (str): Required. The user-defined name of the Dataset. The name can be up to 128 characters long and can be consist of any UTF-8 characters. gcs_source (Union[str, Sequence[str]]): Google Cloud Storage URI(-s) to the input file(s). May contain wildcards. For more information on wildcards, see https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames. examples: str: "gs://bucket/file.csv" Sequence[str]: ["gs://bucket/file1.csv", "gs://bucket/file2.csv"] bq_source (str): BigQuery URI to the input table. example: "bq://project.dataset.table_name" project (str): Project to upload this model to. Overrides project set in aiplatform.init. location (str): Location to upload this model to. Overrides location set in aiplatform.init. credentials (auth_credentials.Credentials): Custom credentials to use to upload this model. Overrides credentials set in aiplatform.init. request_metadata (Sequence[Tuple[str, str]]): Strings which should be sent along with the request as metadata. encryption_spec_key_name (Optional[str]): Optional. The Cloud KMS resource identifier of the customer managed encryption key used to protect the dataset. Has the form: ``projects/my-project/locations/my-region/keyRings/my-kr/cryptoKeys/my-key``. The key needs to be in the same region as where the compute resource is created. If set, this Dataset and all sub-resources of this Dataset will be secured by this key. Overrides encryption_spec_key_name set in aiplatform.init. sync (bool): Whether to execute this method synchronously. If False, this method will be executed in concurrent Future and any downstream object will be immediately returned and synced when the Future has completed. Returns: tabular_dataset (TabularDataset): Instantiated representation of the managed tabular dataset resource. """ utils.validate_display_name(display_name) api_client = cls._instantiate_client(location=location, credentials=credentials) metadata_schema_uri = schema.dataset.metadata.tabular datasource = _datasources.create_datasource( metadata_schema_uri=metadata_schema_uri, gcs_source=gcs_source, bq_source=bq_source, ) return cls._create_and_import( api_client=api_client, parent=initializer.global_config.common_location_path( project=project, location=location), display_name=display_name, metadata_schema_uri=metadata_schema_uri, datasource=datasource, project=project or initializer.global_config.project, location=location or initializer.global_config.location, credentials=credentials or initializer.global_config.credentials, request_metadata=request_metadata, encryption_spec=initializer.global_config.get_encryption_spec( encryption_spec_key_name=encryption_spec_key_name), sync=sync, )
def create( cls, display_name: Optional[str] = None, gcs_source: Optional[Union[str, Sequence[str]]] = None, bq_source: Optional[str] = None, project: Optional[str] = None, location: Optional[str] = None, credentials: Optional[auth_credentials.Credentials] = None, request_metadata: Optional[Sequence[Tuple[str, str]]] = (), labels: Optional[Dict[str, str]] = None, encryption_spec_key_name: Optional[str] = None, sync: bool = True, create_request_timeout: Optional[float] = None, ) -> "TabularDataset": """Creates a new tabular dataset. Args: display_name (str): Optional. The user-defined name of the Dataset. The name can be up to 128 characters long and can be consist of any UTF-8 characters. gcs_source (Union[str, Sequence[str]]): Google Cloud Storage URI(-s) to the input file(s). May contain wildcards. For more information on wildcards, see https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames. examples: str: "gs://bucket/file.csv" Sequence[str]: ["gs://bucket/file1.csv", "gs://bucket/file2.csv"] bq_source (str): BigQuery URI to the input table. example: "bq://project.dataset.table_name" project (str): Project to upload this model to. Overrides project set in aiplatform.init. location (str): Location to upload this model to. Overrides location set in aiplatform.init. credentials (auth_credentials.Credentials): Custom credentials to use to upload this model. Overrides credentials set in aiplatform.init. request_metadata (Sequence[Tuple[str, str]]): Strings which should be sent along with the request as metadata. labels (Dict[str, str]): Optional. Labels with user-defined metadata to organize your Tensorboards. Label keys and values can be no longer than 64 characters (Unicode codepoints), can only contain lowercase letters, numeric characters, underscores and dashes. International characters are allowed. No more than 64 user labels can be associated with one Tensorboard (System labels are excluded). See https://goo.gl/xmQnxf for more information and examples of labels. System reserved label keys are prefixed with "aiplatform.googleapis.com/" and are immutable. encryption_spec_key_name (Optional[str]): Optional. The Cloud KMS resource identifier of the customer managed encryption key used to protect the dataset. Has the form: ``projects/my-project/locations/my-region/keyRings/my-kr/cryptoKeys/my-key``. The key needs to be in the same region as where the compute resource is created. If set, this Dataset and all sub-resources of this Dataset will be secured by this key. Overrides encryption_spec_key_name set in aiplatform.init. sync (bool): Whether to execute this method synchronously. If False, this method will be executed in concurrent Future and any downstream object will be immediately returned and synced when the Future has completed. create_request_timeout (float): Optional. The timeout for the create request in seconds. Returns: tabular_dataset (TabularDataset): Instantiated representation of the managed tabular dataset resource. """ if not display_name: display_name = cls._generate_display_name() utils.validate_display_name(display_name) if labels: utils.validate_labels(labels) api_client = cls._instantiate_client(location=location, credentials=credentials) metadata_schema_uri = schema.dataset.metadata.tabular datasource = _datasources.create_datasource( metadata_schema_uri=metadata_schema_uri, gcs_source=gcs_source, bq_source=bq_source, ) return cls._create_and_import( api_client=api_client, parent=initializer.global_config.common_location_path( project=project, location=location), display_name=display_name, metadata_schema_uri=metadata_schema_uri, datasource=datasource, project=project or initializer.global_config.project, location=location or initializer.global_config.location, credentials=credentials or initializer.global_config.credentials, request_metadata=request_metadata, labels=labels, encryption_spec=initializer.global_config.get_encryption_spec( encryption_spec_key_name=encryption_spec_key_name), sync=sync, create_request_timeout=create_request_timeout, )
def create( cls, display_name: str, gcs_source: Optional[Union[str, Sequence[str]]] = None, import_schema_uri: Optional[str] = None, data_item_labels: Optional[Dict] = None, project: Optional[str] = None, location: Optional[str] = None, credentials: Optional[auth_credentials.Credentials] = None, request_metadata: Optional[Sequence[Tuple[str, str]]] = (), encryption_spec_key_name: Optional[str] = None, sync: bool = True, ) -> "ImageDataset": """Creates a new image dataset and optionally imports data into dataset when source and import_schema_uri are passed. Args: display_name (str): Required. The user-defined name of the Dataset. The name can be up to 128 characters long and can be consist of any UTF-8 characters. gcs_source (Union[str, Sequence[str]]): Google Cloud Storage URI(-s) to the input file(s). May contain wildcards. For more information on wildcards, see https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames. examples: str: "gs://bucket/file.csv" Sequence[str]: ["gs://bucket/file1.csv", "gs://bucket/file2.csv"] import_schema_uri (str): Points to a YAML file stored on Google Cloud Storage describing the import format. Validation will be done against the schema. The schema is defined as an `OpenAPI 3.0.2 Schema Object <https://tinyurl.com/y538mdwt>`__. data_item_labels (Dict): Labels that will be applied to newly imported DataItems. If an identical DataItem as one being imported already exists in the Dataset, then these labels will be appended to these of the already existing one, and if labels with identical key is imported before, the old label value will be overwritten. If two DataItems are identical in the same import data operation, the labels will be combined and if key collision happens in this case, one of the values will be picked randomly. Two DataItems are considered identical if their content bytes are identical (e.g. image bytes or pdf bytes). These labels will be overridden by Annotation labels specified inside index file refenced by [import_schema_uri][google.cloud.aiplatform.v1beta1.ImportDataConfig.import_schema_uri], e.g. jsonl file. project (str): Project to upload this model to. Overrides project set in aiplatform.init. location (str): Location to upload this model to. Overrides location set in aiplatform.init. credentials (auth_credentials.Credentials): Custom credentials to use to upload this model. Overrides credentials set in aiplatform.init. request_metadata (Sequence[Tuple[str, str]]): Strings which should be sent along with the request as metadata. encryption_spec_key_name (Optional[str]): Optional. The Cloud KMS resource identifier of the customer managed encryption key used to protect the dataset. Has the form: ``projects/my-project/locations/my-region/keyRings/my-kr/cryptoKeys/my-key``. The key needs to be in the same region as where the compute resource is created. If set, this Dataset and all sub-resources of this Dataset will be secured by this key. Overrides encryption_spec_key_name set in aiplatform.init. sync (bool): Whether to execute this method synchronously. If False, this method will be executed in concurrent Future and any downstream object will be immediately returned and synced when the Future has completed. Returns: image_dataset (ImageDataset): Instantiated representation of the managed image dataset resource. """ utils.validate_display_name(display_name) api_client = cls._instantiate_client(location=location, credentials=credentials) metadata_schema_uri = schema.dataset.metadata.image datasource = _datasources.create_datasource( metadata_schema_uri=metadata_schema_uri, import_schema_uri=import_schema_uri, gcs_source=gcs_source, data_item_labels=data_item_labels, ) return cls._create_and_import( api_client=api_client, parent=initializer.global_config.common_location_path( project=project, location=location ), display_name=display_name, metadata_schema_uri=metadata_schema_uri, datasource=datasource, project=project or initializer.global_config.project, location=location or initializer.global_config.location, credentials=credentials or initializer.global_config.credentials, request_metadata=request_metadata, encryption_spec=initializer.global_config.get_encryption_spec( encryption_spec_key_name=encryption_spec_key_name ), sync=sync, )
def create( cls, display_name: Optional[str] = None, gcs_source: Optional[Union[str, Sequence[str]]] = None, import_schema_uri: Optional[str] = None, data_item_labels: Optional[Dict] = None, project: Optional[str] = None, location: Optional[str] = None, credentials: Optional[auth_credentials.Credentials] = None, request_metadata: Optional[Sequence[Tuple[str, str]]] = (), labels: Optional[Dict[str, str]] = None, encryption_spec_key_name: Optional[str] = None, sync: bool = True, create_request_timeout: Optional[float] = None, ) -> "TextDataset": """Creates a new text dataset and optionally imports data into dataset when source and import_schema_uri are passed. Example Usage: ds = aiplatform.TextDataset.create( display_name='my-dataset', gcs_source='gs://my-bucket/dataset.csv', import_schema_uri=aiplatform.schema.dataset.ioformat.text.multi_label_classification ) Args: display_name (str): Optional. The user-defined name of the Dataset. The name can be up to 128 characters long and can be consist of any UTF-8 characters. gcs_source (Union[str, Sequence[str]]): Google Cloud Storage URI(-s) to the input file(s). May contain wildcards. For more information on wildcards, see https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames. examples: str: "gs://bucket/file.csv" Sequence[str]: ["gs://bucket/file1.csv", "gs://bucket/file2.csv"] import_schema_uri (str): Points to a YAML file stored on Google Cloud Storage describing the import format. Validation will be done against the schema. The schema is defined as an `OpenAPI 3.0.2 Schema Object <https://tinyurl.com/y538mdwt>`__. data_item_labels (Dict): Labels that will be applied to newly imported DataItems. If an identical DataItem as one being imported already exists in the Dataset, then these labels will be appended to these of the already existing one, and if labels with identical key is imported before, the old label value will be overwritten. If two DataItems are identical in the same import data operation, the labels will be combined and if key collision happens in this case, one of the values will be picked randomly. Two DataItems are considered identical if their content bytes are identical (e.g. image bytes or pdf bytes). These labels will be overridden by Annotation labels specified inside index file referenced by ``import_schema_uri``, e.g. jsonl file. project (str): Project to upload this model to. Overrides project set in aiplatform.init. location (str): Location to upload this model to. Overrides location set in aiplatform.init. credentials (auth_credentials.Credentials): Custom credentials to use to upload this model. Overrides credentials set in aiplatform.init. request_metadata (Sequence[Tuple[str, str]]): Strings which should be sent along with the request as metadata. labels (Dict[str, str]): Optional. Labels with user-defined metadata to organize your Tensorboards. Label keys and values can be no longer than 64 characters (Unicode codepoints), can only contain lowercase letters, numeric characters, underscores and dashes. International characters are allowed. No more than 64 user labels can be associated with one Tensorboard (System labels are excluded). See https://goo.gl/xmQnxf for more information and examples of labels. System reserved label keys are prefixed with "aiplatform.googleapis.com/" and are immutable. encryption_spec_key_name (Optional[str]): Optional. The Cloud KMS resource identifier of the customer managed encryption key used to protect the dataset. Has the form: ``projects/my-project/locations/my-region/keyRings/my-kr/cryptoKeys/my-key``. The key needs to be in the same region as where the compute resource is created. If set, this Dataset and all sub-resources of this Dataset will be secured by this key. Overrides encryption_spec_key_name set in aiplatform.init. create_request_timeout (float): Optional. The timeout for the create request in seconds. sync (bool): Whether to execute this method synchronously. If False, this method will be executed in concurrent Future and any downstream object will be immediately returned and synced when the Future has completed. Returns: text_dataset (TextDataset): Instantiated representation of the managed text dataset resource. """ if not display_name: display_name = cls._generate_display_name() utils.validate_display_name(display_name) if labels: utils.validate_labels(labels) api_client = cls._instantiate_client(location=location, credentials=credentials) metadata_schema_uri = schema.dataset.metadata.text datasource = _datasources.create_datasource( metadata_schema_uri=metadata_schema_uri, import_schema_uri=import_schema_uri, gcs_source=gcs_source, data_item_labels=data_item_labels, ) return cls._create_and_import( api_client=api_client, parent=initializer.global_config.common_location_path( project=project, location=location), display_name=display_name, metadata_schema_uri=metadata_schema_uri, datasource=datasource, project=project or initializer.global_config.project, location=location or initializer.global_config.location, credentials=credentials or initializer.global_config.credentials, request_metadata=request_metadata, labels=labels, encryption_spec=initializer.global_config.get_encryption_spec( encryption_spec_key_name=encryption_spec_key_name), sync=sync, create_request_timeout=create_request_timeout, )
def __init__( self, # TODO(b/223262536): Make the display_name parameter optional in the next major release display_name: str, template_path: str, job_id: Optional[str] = None, pipeline_root: Optional[str] = None, parameter_values: Optional[Dict[str, Any]] = None, enable_caching: Optional[bool] = None, encryption_spec_key_name: Optional[str] = None, labels: Optional[Dict[str, str]] = None, credentials: Optional[auth_credentials.Credentials] = None, project: Optional[str] = None, location: Optional[str] = None, ): """Retrieves a PipelineJob resource and instantiates its representation. Args: display_name (str): Required. The user-defined name of this Pipeline. template_path (str): Required. The path of PipelineJob or PipelineSpec JSON or YAML file. It can be a local path or a Google Cloud Storage URI. Example: "gs://project.name" job_id (str): Optional. The unique ID of the job run. If not specified, pipeline name + timestamp will be used. pipeline_root (str): Optional. The root of the pipeline outputs. Default to be staging bucket. parameter_values (Dict[str, Any]): Optional. The mapping from runtime parameter names to its values that control the pipeline run. enable_caching (bool): Optional. Whether to turn on caching for the run. If this is not set, defaults to the compile time settings, which are True for all tasks by default, while users may specify different caching options for individual tasks. If this is set, the setting applies to all tasks in the pipeline. Overrides the compile time settings. encryption_spec_key_name (str): Optional. The Cloud KMS resource identifier of the customer managed encryption key used to protect the job. Has the form: ``projects/my-project/locations/my-region/keyRings/my-kr/cryptoKeys/my-key``. The key needs to be in the same region as where the compute resource is created. If this is set, then all resources created by the PipelineJob will be encrypted with the provided encryption key. Overrides encryption_spec_key_name set in aiplatform.init. labels (Dict[str,str]): Optional. The user defined metadata to organize PipelineJob. credentials (auth_credentials.Credentials): Optional. Custom credentials to use to create this PipelineJob. Overrides credentials set in aiplatform.init. project (str), Optional. The project that you want to run this PipelineJob in. If not set, the project set in aiplatform.init will be used. location (str), Optional. Location to create PipelineJob. If not set, location set in aiplatform.init will be used. Raises: ValueError: If job_id or labels have incorrect format. """ if not display_name: display_name = self.__class__._generate_display_name() utils.validate_display_name(display_name) if labels: utils.validate_labels(labels) super().__init__(project=project, location=location, credentials=credentials) self._parent = initializer.global_config.common_location_path( project=project, location=location) # this loads both .yaml and .json files because YAML is a superset of JSON pipeline_json = yaml_utils.load_yaml(template_path, self.project, self.credentials) # Pipeline_json can be either PipelineJob or PipelineSpec. if pipeline_json.get("pipelineSpec") is not None: pipeline_job = pipeline_json pipeline_root = ( pipeline_root or pipeline_job["pipelineSpec"].get("defaultPipelineRoot") or pipeline_job["runtimeConfig"].get("gcsOutputDirectory") or initializer.global_config.staging_bucket) else: pipeline_job = { "pipelineSpec": pipeline_json, "runtimeConfig": {}, } pipeline_root = ( pipeline_root or pipeline_job["pipelineSpec"].get("defaultPipelineRoot") or initializer.global_config.staging_bucket) builder = pipeline_utils.PipelineRuntimeConfigBuilder.from_job_spec_json( pipeline_job) builder.update_pipeline_root(pipeline_root) builder.update_runtime_parameters(parameter_values) runtime_config_dict = builder.build() runtime_config = gca_pipeline_job_v1.PipelineJob.RuntimeConfig()._pb json_format.ParseDict(runtime_config_dict, runtime_config) pipeline_name = pipeline_job["pipelineSpec"]["pipelineInfo"]["name"] self.job_id = job_id or "{pipeline_name}-{timestamp}".format( pipeline_name=re.sub( "[^-0-9a-z]+", "-", pipeline_name.lower()).lstrip("-").rstrip("-"), timestamp=_get_current_time().strftime("%Y%m%d%H%M%S"), ) if not _VALID_NAME_PATTERN.match(self.job_id): raise ValueError( "Generated job ID: {} is illegal as a Vertex pipelines job ID. " "Expecting an ID following the regex pattern " '"[a-z][-a-z0-9]{{0,127}}"'.format(job_id)) if enable_caching is not None: _set_enable_caching_value(pipeline_job["pipelineSpec"], enable_caching) self._gca_resource = gca_pipeline_job_v1.PipelineJob( display_name=display_name, pipeline_spec=pipeline_job["pipelineSpec"], labels=labels, runtime_config=runtime_config, encryption_spec=initializer.global_config.get_encryption_spec( encryption_spec_key_name=encryption_spec_key_name), )
def create( cls, job_display_name: str, model_name: str, instances_format: str = "jsonl", predictions_format: str = "jsonl", gcs_source: Optional[Union[str, Sequence[str]]] = None, bigquery_source: Optional[str] = None, gcs_destination_prefix: Optional[str] = None, bigquery_destination_prefix: Optional[str] = None, model_parameters: Optional[Dict] = None, machine_type: Optional[str] = None, accelerator_type: Optional[str] = None, accelerator_count: Optional[int] = None, starting_replica_count: Optional[int] = None, max_replica_count: Optional[int] = None, generate_explanation: Optional[bool] = False, explanation_metadata: Optional[ "aiplatform.explain.ExplanationMetadata"] = None, explanation_parameters: Optional[ "aiplatform.explain.ExplanationParameters"] = None, labels: Optional[dict] = None, project: Optional[str] = None, location: Optional[str] = None, credentials: Optional[auth_credentials.Credentials] = None, encryption_spec_key_name: Optional[str] = None, sync: bool = True, ) -> "BatchPredictionJob": """Create a batch prediction job. Args: job_display_name (str): Required. The user-defined name of the BatchPredictionJob. The name can be up to 128 characters long and can be consist of any UTF-8 characters. model_name (str): Required. A fully-qualified model resource name or model ID. Example: "projects/123/locations/us-central1/models/456" or "456" when project and location are initialized or passed. instances_format (str): Required. The format in which instances are given, must be one of "jsonl", "csv", "bigquery", "tf-record", "tf-record-gzip", or "file-list". Default is "jsonl" when using `gcs_source`. If a `bigquery_source` is provided, this is overriden to "bigquery". predictions_format (str): Required. The format in which AI Platform gives the predictions, must be one of "jsonl", "csv", or "bigquery". Default is "jsonl" when using `gcs_destination_prefix`. If a `bigquery_destination_prefix` is provided, this is overriden to "bigquery". gcs_source (Optional[Sequence[str]]): Google Cloud Storage URI(-s) to your instances to run batch prediction on. They must match `instances_format`. May contain wildcards. For more information on wildcards, see https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames. bigquery_source (Optional[str]): BigQuery URI to a table, up to 2000 characters long. For example: `projectId.bqDatasetId.bqTableId` gcs_destination_prefix (Optional[str]): The Google Cloud Storage location of the directory where the output is to be written to. In the given directory a new directory is created. Its name is ``prediction-<model-display-name>-<job-create-time>``, where timestamp is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files ``predictions_0001.<extension>``, ``predictions_0002.<extension>``, ..., ``predictions_N.<extension>`` are created where ``<extension>`` depends on chosen ``predictions_format``, and N may equal 0001 and depends on the total number of successfully predicted instances. If the Model has both ``instance`` and ``prediction`` schemata defined then each such file contains predictions as per the ``predictions_format``. If prediction for any instance failed (partially or completely), then an additional ``errors_0001.<extension>``, ``errors_0002.<extension>``,..., ``errors_N.<extension>`` files are created (N depends on total number of failed predictions). These files contain the failed instances, as per their schema, followed by an additional ``error`` field which as value has ```google.rpc.Status`` <Status>`__ containing only ``code`` and ``message`` fields. bigquery_destination_prefix (Optional[str]): The BigQuery project location where the output is to be written to. In the given project a new dataset is created with name ``prediction_<model-display-name>_<job-create-time>`` where is made BigQuery-dataset-name compatible (for example, most special characters become underscores), and timestamp is in YYYY_MM_DDThh_mm_ss_sssZ "based on ISO-8601" format. In the dataset two tables will be created, ``predictions``, and ``errors``. If the Model has both ``instance`` and ``prediction`` schemata defined then the tables have columns as follows: The ``predictions`` table contains instances for which the prediction succeeded, it has columns as per a concatenation of the Model's instance and prediction schemata. The ``errors`` table contains rows for which the prediction has failed, it has instance columns, as per the instance schema, followed by a single "errors" column, which as values has ```google.rpc.Status`` <Status>`__ represented as a STRUCT, and containing only ``code`` and ``message``. model_parameters (Optional[Dict]): The parameters that govern the predictions. The schema of the parameters may be specified via the Model's `parameters_schema_uri`. machine_type (Optional[str]): The type of machine for running batch prediction on dedicated resources. Not specifying machine type will result in batch prediction job being run with automatic resources. accelerator_type (Optional[str]): The type of accelerator(s) that may be attached to the machine as per `accelerator_count`. Only used if `machine_type` is set. accelerator_count (Optional[int]): The number of accelerators to attach to the `machine_type`. Only used if `machine_type` is set. starting_replica_count (Optional[int]): The number of machine replicas used at the start of the batch operation. If not set, AI Platform decides starting number, not greater than `max_replica_count`. Only used if `machine_type` is set. max_replica_count (Optional[int]): The maximum number of machine replicas the batch operation may be scaled to. Only used if `machine_type` is set. Default is 10. generate_explanation (bool): Optional. Generate explanation along with the batch prediction results. This will cause the batch prediction output to include explanations based on the `prediction_format`: - `bigquery`: output includes a column named `explanation`. The value is a struct that conforms to the [aiplatform.gapic.Explanation] object. - `jsonl`: The JSON objects on each line include an additional entry keyed `explanation`. The value of the entry is a JSON object that conforms to the [aiplatform.gapic.Explanation] object. - `csv`: Generating explanations for CSV format is not supported. explanation_metadata (aiplatform.explain.ExplanationMetadata): Optional. Explanation metadata configuration for this BatchPredictionJob. Can be specified only if `generate_explanation` is set to `True`. This value overrides the value of `Model.explanation_metadata`. All fields of `explanation_metadata` are optional in the request. If a field of the `explanation_metadata` object is not populated, the corresponding field of the `Model.explanation_metadata` object is inherited. For more details, see `Ref docs <http://tinyurl.com/1igh60kt>` explanation_parameters (aiplatform.explain.ExplanationParameters): Optional. Parameters to configure explaining for Model's predictions. Can be specified only if `generate_explanation` is set to `True`. This value overrides the value of `Model.explanation_parameters`. All fields of `explanation_parameters` are optional in the request. If a field of the `explanation_parameters` object is not populated, the corresponding field of the `Model.explanation_parameters` object is inherited. For more details, see `Ref docs <http://tinyurl.com/1an4zake>` labels (Optional[dict]): The labels with user-defined metadata to organize your BatchPredictionJobs. Label keys and values can be no longer than 64 characters (Unicode codepoints), can only contain lowercase letters, numeric characters, underscores and dashes. International characters are allowed. See https://goo.gl/xmQnxf for more information and examples of labels. credentials (Optional[auth_credentials.Credentials]): Custom credentials to use to create this batch prediction job. Overrides credentials set in aiplatform.init. encryption_spec_key_name (Optional[str]): Optional. The Cloud KMS resource identifier of the customer managed encryption key used to protect the job. Has the form: ``projects/my-project/locations/my-region/keyRings/my-kr/cryptoKeys/my-key``. The key needs to be in the same region as where the compute resource is created. If this is set, then all resources created by the BatchPredictionJob will be encrypted with the provided encryption key. Overrides encryption_spec_key_name set in aiplatform.init. sync (bool): Whether to execute this method synchronously. If False, this method will be executed in concurrent Future and any downstream object will be immediately returned and synced when the Future has completed. Returns: (jobs.BatchPredictionJob): Instantiated representation of the created batch prediction job. """ utils.validate_display_name(job_display_name) model_name = utils.full_resource_name( resource_name=model_name, resource_noun="models", project=project, location=location, ) # Raise error if both or neither source URIs are provided if bool(gcs_source) == bool(bigquery_source): raise ValueError( "Please provide either a gcs_source or bigquery_source, " "but not both.") # Raise error if both or neither destination prefixes are provided if bool(gcs_destination_prefix) == bool(bigquery_destination_prefix): raise ValueError( "Please provide either a gcs_destination_prefix or " "bigquery_destination_prefix, but not both.") # Raise error if unsupported instance format is provided if instances_format not in constants.BATCH_PREDICTION_INPUT_STORAGE_FORMATS: raise ValueError( f"{predictions_format} is not an accepted instances format " f"type. Please choose from: {constants.BATCH_PREDICTION_INPUT_STORAGE_FORMATS}" ) # Raise error if unsupported prediction format is provided if predictions_format not in constants.BATCH_PREDICTION_OUTPUT_STORAGE_FORMATS: raise ValueError( f"{predictions_format} is not an accepted prediction format " f"type. Please choose from: {constants.BATCH_PREDICTION_OUTPUT_STORAGE_FORMATS}" ) gca_bp_job = gca_bp_job_compat gca_io = gca_io_compat gca_machine_resources = gca_machine_resources_compat select_version = compat.DEFAULT_VERSION if generate_explanation: gca_bp_job = gca_bp_job_v1beta1 gca_io = gca_io_v1beta1 gca_machine_resources = gca_machine_resources_v1beta1 select_version = compat.V1BETA1 gapic_batch_prediction_job = gca_bp_job.BatchPredictionJob() # Required Fields gapic_batch_prediction_job.display_name = job_display_name gapic_batch_prediction_job.model = model_name input_config = gca_bp_job.BatchPredictionJob.InputConfig() output_config = gca_bp_job.BatchPredictionJob.OutputConfig() if bigquery_source: input_config.instances_format = "bigquery" input_config.bigquery_source = gca_io.BigQuerySource() input_config.bigquery_source.input_uri = bigquery_source else: input_config.instances_format = instances_format input_config.gcs_source = gca_io.GcsSource( uris=gcs_source if type(gcs_source) == list else [gcs_source]) if bigquery_destination_prefix: output_config.predictions_format = "bigquery" output_config.bigquery_destination = gca_io.BigQueryDestination() bq_dest_prefix = bigquery_destination_prefix if not bq_dest_prefix.startswith("bq://"): bq_dest_prefix = f"bq://{bq_dest_prefix}" output_config.bigquery_destination.output_uri = bq_dest_prefix else: output_config.predictions_format = predictions_format output_config.gcs_destination = gca_io.GcsDestination( output_uri_prefix=gcs_destination_prefix) gapic_batch_prediction_job.input_config = input_config gapic_batch_prediction_job.output_config = output_config # Optional Fields gapic_batch_prediction_job.encryption_spec = initializer.global_config.get_encryption_spec( encryption_spec_key_name=encryption_spec_key_name, select_version=select_version, ) if model_parameters: gapic_batch_prediction_job.model_parameters = model_parameters # Custom Compute if machine_type: machine_spec = gca_machine_resources.MachineSpec() machine_spec.machine_type = machine_type machine_spec.accelerator_type = accelerator_type machine_spec.accelerator_count = accelerator_count dedicated_resources = gca_machine_resources.BatchDedicatedResources( ) dedicated_resources.machine_spec = machine_spec dedicated_resources.starting_replica_count = starting_replica_count dedicated_resources.max_replica_count = max_replica_count gapic_batch_prediction_job.dedicated_resources = dedicated_resources gapic_batch_prediction_job.manual_batch_tuning_parameters = None # User Labels gapic_batch_prediction_job.labels = labels # Explanations if generate_explanation: gapic_batch_prediction_job.generate_explanation = generate_explanation if explanation_metadata or explanation_parameters: gapic_batch_prediction_job.explanation_spec = gca_explanation_v1beta1.ExplanationSpec( metadata=explanation_metadata, parameters=explanation_parameters) # TODO (b/174502913): Support private feature once released api_client = cls._instantiate_client(location=location, credentials=credentials) return cls._create( api_client=api_client, parent=initializer.global_config.common_location_path( project=project, location=location), batch_prediction_job=gapic_batch_prediction_job, generate_explanation=generate_explanation, project=project or initializer.global_config.project, location=location or initializer.global_config.location, credentials=credentials or initializer.global_config.credentials, sync=sync, )
def create( cls, display_name: str, description: Optional[str] = None, labels: Optional[Dict[str, str]] = None, project: Optional[str] = None, location: Optional[str] = None, credentials: Optional[auth_credentials.Credentials] = None, request_metadata: Optional[Sequence[Tuple[str, str]]] = (), encryption_spec_key_name: Optional[str] = None, ) -> "Tensorboard": """Creates a new tensorboard. Example Usage: tb = aiplatform.Tensorboard.create( display_name='my display name', description='my description', labels={ 'key1': 'value1', 'key2': 'value2' } ) Args: display_name (str): Required. The user-defined name of the Tensorboard. The name can be up to 128 characters long and can be consist of any UTF-8 characters. description (str): Optional. Description of this Tensorboard. labels (Dict[str, str]): Optional. Labels with user-defined metadata to organize your Tensorboards. Label keys and values can be no longer than 64 characters (Unicode codepoints), can only contain lowercase letters, numeric characters, underscores and dashes. International characters are allowed. No more than 64 user labels can be associated with one Tensorboard (System labels are excluded). See https://goo.gl/xmQnxf for more information and examples of labels. System reserved label keys are prefixed with "aiplatform.googleapis.com/" and are immutable. project (str): Optional. Project to upload this model to. Overrides project set in aiplatform.init. location (str): Optional. Location to upload this model to. Overrides location set in aiplatform.init. credentials (auth_credentials.Credentials): Optional. Custom credentials to use to upload this model. Overrides credentials set in aiplatform.init. request_metadata (Sequence[Tuple[str, str]]): Optional. Strings which should be sent along with the request as metadata. encryption_spec_key_name (str): Optional. Cloud KMS resource identifier of the customer managed encryption key used to protect the tensorboard. Has the form: ``projects/my-project/locations/my-region/keyRings/my-kr/cryptoKeys/my-key``. The key needs to be in the same region as where the compute resource is created. If set, this Tensorboard and all sub-resources of this Tensorboard will be secured by this key. Overrides encryption_spec_key_name set in aiplatform.init. Returns: tensorboard (Tensorboard): Instantiated representation of the managed tensorboard resource. """ utils.validate_display_name(display_name) if labels: utils.validate_labels(labels) api_client = cls._instantiate_client(location=location, credentials=credentials) parent = initializer.global_config.common_location_path( project=project, location=location) encryption_spec = initializer.global_config.get_encryption_spec( encryption_spec_key_name=encryption_spec_key_name) gapic_tensorboard = gca_tensorboard.Tensorboard( display_name=display_name, description=description, labels=labels, encryption_spec=encryption_spec, ) create_tensorboard_lro = api_client.create_tensorboard( parent=parent, tensorboard=gapic_tensorboard, metadata=request_metadata) _LOGGER.log_create_with_lro(cls, create_tensorboard_lro) created_tensorboard = create_tensorboard_lro.result() _LOGGER.log_create_complete(cls, created_tensorboard, "tb") return cls( tensorboard_name=created_tensorboard.name, credentials=credentials, )
def create( cls, tensorboard_run_id: str, tensorboard_experiment_name: str, tensorboard_id: Optional[str] = None, display_name: Optional[str] = None, description: Optional[str] = None, labels: Optional[Dict[str, str]] = None, project: Optional[str] = None, location: Optional[str] = None, credentials: Optional[auth_credentials.Credentials] = None, request_metadata: Sequence[Tuple[str, str]] = (), ) -> "TensorboardRun": """Creates a new tensorboard. Example Usage: tb = aiplatform.TensorboardExperiment.create( tensorboard_experiment_id='my-experiment' tensorboard_id='456' display_name='my display name', description='my description', labels={ 'key1': 'value1', 'key2': 'value2' } ) Args: tensorboard_run_id (str): Required. The ID to use for the Tensorboard run, which will become the final component of the Tensorboard run's resource name. This value should be 1-128 characters, and valid: characters are /[a-z][0-9]-/. tensorboard_experiment_name (str): Required. The resource name or ID of the TensorboardExperiment to create the TensorboardRun in. Resource name format: ``projects/{project}/locations/{location}/tensorboards/{tensorboard}/experiments/{experiment}`` If resource ID is provided then tensorboard_id must be provided. tensorboard_id (str): Optional. The resource ID of the Tensorboard to create the TensorboardRun in. Format of resource name. display_name (str): Optional. The user-defined name of the Tensorboard Run. This value must be unique among all TensorboardRuns belonging to the same parent TensorboardExperiment. If not provided tensorboard_run_id will be used. description (str): Optional. Description of this Tensorboard Run. labels (Dict[str, str]): Optional. Labels with user-defined metadata to organize your Tensorboards. Label keys and values can be no longer than 64 characters (Unicode codepoints), can only contain lowercase letters, numeric characters, underscores and dashes. International characters are allowed. No more than 64 user labels can be associated with one Tensorboard (System labels are excluded). See https://goo.gl/xmQnxf for more information and examples of labels. System reserved label keys are prefixed with "aiplatform.googleapis.com/" and are immutable. project (str): Optional. Project to upload this model to. Overrides project set in aiplatform.init. location (str): Optional. Location to upload this model to. Overrides location set in aiplatform.init. credentials (auth_credentials.Credentials): Optional. Custom credentials to use to upload this model. Overrides credentials set in aiplatform.init. request_metadata (Sequence[Tuple[str, str]]): Optional. Strings which should be sent along with the request as metadata. Returns: TensorboardExperiment: The TensorboardExperiment resource. """ if display_name: utils.validate_display_name(display_name) if labels: utils.validate_labels(labels) display_name = display_name or tensorboard_run_id api_client = cls._instantiate_client(location=location, credentials=credentials) parent = utils.full_resource_name( resource_name=tensorboard_experiment_name, resource_noun=TensorboardExperiment._resource_noun, parse_resource_name_method=TensorboardExperiment. _parse_resource_name, format_resource_name_method=TensorboardExperiment. _format_resource_name, parent_resource_name_fields={ Tensorboard._resource_noun: tensorboard_id }, project=project, location=location, ) gapic_tensorboard_run = gca_tensorboard_run.TensorboardRun( display_name=display_name, description=description, labels=labels, ) _LOGGER.log_create_with_lro(cls) tensorboard_run = api_client.create_tensorboard_run( parent=parent, tensorboard_run=gapic_tensorboard_run, tensorboard_run_id=tensorboard_run_id, metadata=request_metadata, ) _LOGGER.log_create_complete(cls, tensorboard_run, "tb_run") return cls( tensorboard_run_name=tensorboard_run.name, credentials=credentials, )
def update( self, display_name: Optional[str] = None, description: Optional[str] = None, labels: Optional[Dict[str, str]] = None, request_metadata: Optional[Sequence[Tuple[str, str]]] = (), encryption_spec_key_name: Optional[str] = None, ) -> "Tensorboard": """Updates an existing tensorboard. Example Usage: tb = aiplatform.Tensorboard(tensorboard_name='123456') tb.update( display_name='update my display name', description='update my description', ) Args: display_name (str): Optional. User-defined name of the Tensorboard. The name can be up to 128 characters long and can be consist of any UTF-8 characters. description (str): Optional. Description of this Tensorboard. labels (Dict[str, str]): Optional. Labels with user-defined metadata to organize your Tensorboards. Label keys and values can be no longer than 64 characters (Unicode codepoints), can only contain lowercase letters, numeric characters, underscores and dashes. International characters are allowed. No more than 64 user labels can be associated with one Tensorboard (System labels are excluded). See https://goo.gl/xmQnxf for more information and examples of labels. System reserved label keys are prefixed with "aiplatform.googleapis.com/" and are immutable. request_metadata (Sequence[Tuple[str, str]]): Optional. Strings which should be sent along with the request as metadata. encryption_spec_key_name (str): Optional. Cloud KMS resource identifier of the customer managed encryption key used to protect the tensorboard. Has the form: ``projects/my-project/locations/my-region/keyRings/my-kr/cryptoKeys/my-key``. The key needs to be in the same region as where the compute resource is created. If set, this Tensorboard and all sub-resources of this Tensorboard will be secured by this key. Overrides encryption_spec_key_name set in aiplatform.init. Returns: Tensorboard: The managed tensorboard resource. """ update_mask = list() if display_name: utils.validate_display_name(display_name) update_mask.append("display_name") if description: update_mask.append("description") if labels: utils.validate_labels(labels) update_mask.append("labels") encryption_spec = None if encryption_spec_key_name: encryption_spec = initializer.global_config.get_encryption_spec( encryption_spec_key_name=encryption_spec_key_name, ) update_mask.append("encryption_spec") update_mask = field_mask_pb2.FieldMask(paths=update_mask) gapic_tensorboard = gca_tensorboard.Tensorboard( name=self.resource_name, display_name=display_name, description=description, labels=labels, encryption_spec=encryption_spec, ) _LOGGER.log_action_start_against_resource( "Updating", "tensorboard", self, ) update_tensorboard_lro = self.api_client.update_tensorboard( tensorboard=gapic_tensorboard, update_mask=update_mask, metadata=request_metadata, ) _LOGGER.log_action_started_against_resource_with_lro( "Update", "tensorboard", self.__class__, update_tensorboard_lro) update_tensorboard_lro.result() _LOGGER.log_action_completed_against_resource("tensorboard", "updated", self) return self
def __init__( self, display_name: str, template_path: str, job_id: Optional[str] = None, pipeline_root: Optional[str] = None, parameter_values: Optional[Dict[str, Any]] = None, enable_caching: Optional[bool] = True, encryption_spec_key_name: Optional[str] = None, labels: Optional[Dict[str, str]] = None, credentials: Optional[auth_credentials.Credentials] = None, project: Optional[str] = None, location: Optional[str] = None, ): """Retrieves a PipelineJob resource and instantiates its representation. Args: display_name (str): Required. The user-defined name of this Pipeline. template_path (str): Required. The path of PipelineJob JSON file. It can be a local path or a Google Cloud Storage URI. Example: "gs://project.name" job_id (str): Optional. The unique ID of the job run. If not specified, pipeline name + timestamp will be used. pipeline_root (str): Optional. The root of the pipeline outputs. Default to be staging bucket. parameter_values (Dict[str, Any]): Optional. The mapping from runtime parameter names to its values that control the pipeline run. enable_caching (bool): Optional. Whether to turn on caching for the run. Defaults to True. encryption_spec_key_name (str): Optional. The Cloud KMS resource identifier of the customer managed encryption key used to protect the job. Has the form: ``projects/my-project/locations/my-region/keyRings/my-kr/cryptoKeys/my-key``. The key needs to be in the same region as where the compute resource is created. If this is set, then all resources created by the BatchPredictionJob will be encrypted with the provided encryption key. Overrides encryption_spec_key_name set in aiplatform.init. labels (Dict[str,str]): Optional. The user defined metadata to organize PipelineJob. credentials (auth_credentials.Credentials): Optional. Custom credentials to use to create this batch prediction job. Overrides credentials set in aiplatform.init. project (str), Optional. Project to retrieve PipelineJob from. If not set, project set in aiplatform.init will be used. location (str), Optional. Location to create PipelineJob. If not set, location set in aiplatform.init will be used. Raises: ValueError: If job_id or labels have incorrect format. """ utils.validate_display_name(display_name) if labels: for k, v in labels.items(): if not isinstance(k, str) or not isinstance(v, str): raise ValueError( "Expect labels to be a mapping of string key value pairs. " 'Got "{}".'.format(labels)) super().__init__(project=project, location=location, credentials=credentials) self._parent = initializer.global_config.common_location_path( project=project, location=location) pipeline_job = json_utils.load_json(template_path, self.project, self.credentials) pipeline_root = ( pipeline_root or pipeline_job["runtimeConfig"].get("gcsOutputDirectory") or initializer.global_config.staging_bucket) pipeline_name = pipeline_job["pipelineSpec"]["pipelineInfo"]["name"] job_id = job_id or "{pipeline_name}-{timestamp}".format( pipeline_name=re.sub( "[^-0-9a-z]+", "-", pipeline_name.lower()).lstrip("-").rstrip("-"), timestamp=_get_current_time().strftime("%Y%m%d%H%M%S"), ) if not _VALID_NAME_PATTERN.match(job_id): raise ValueError( "Generated job ID: {} is illegal as a Vertex pipelines job ID. " "Expecting an ID following the regex pattern " '"[a-z][-a-z0-9]{{0,127}}"'.format(job_id)) job_name = _JOB_NAME_PATTERN.format(parent=self._parent, job_id=job_id) builder = pipeline_utils.PipelineRuntimeConfigBuilder.from_job_spec_json( pipeline_job) builder.update_pipeline_root(pipeline_root) builder.update_runtime_parameters(parameter_values) runtime_config_dict = builder.build() runtime_config = gca_pipeline_job_v1beta1.PipelineJob.RuntimeConfig( )._pb json_format.ParseDict(runtime_config_dict, runtime_config) _set_enable_caching_value(pipeline_job["pipelineSpec"], enable_caching) self._gca_resource = gca_pipeline_job_v1beta1.PipelineJob( display_name=display_name, name=job_name, pipeline_spec=pipeline_job["pipelineSpec"], labels=labels, runtime_config=runtime_config, encryption_spec=initializer.global_config.get_encryption_spec( encryption_spec_key_name=encryption_spec_key_name), )