Beispiel #1
0
def check_if_job_exists(gcp_resources) -> Optional[str]:
    """Check if the BigQuery job already created.

  Return the job url if created. Return None otherwise
  """
    if path.exists(gcp_resources) and os.stat(gcp_resources).st_size != 0:
        with open(gcp_resources) as f:
            serialized_gcp_resources = f.read()
            job_resources = json_format.Parse(serialized_gcp_resources,
                                              GcpResources())
            # Resources should only contain one item.
            if len(job_resources.resources) != 1:
                raise ValueError(
                    f'gcp_resources should contain one resource, found {len(job_resources.resources)}'
                )
            # Validate the format of the resource uri.
            job_name_pattern = re.compile(_BQ_JOB_NAME_TEMPLATE)
            match = job_name_pattern.match(
                job_resources.resources[0].resource_uri)
            try:
                project = match.group('project')
                job = match.group('job')
            except AttributeError as err:
                raise ValueError(
                    'Invalid bigquery job uri: {}. Expect: {}.'.format(
                        job_resources.resources[0].resource_uri,
                        'https://www.googleapis.com/bigquery/v2/projects/[projectId]/jobs/[jobId]?location=[location]'
                    ))

        return job_resources.resources[0].resource_uri
    else:
        return None
    def check_if_job_exists(self) -> Optional[str]:
        """Check if the job already exists."""
        if path.exists(self.gcp_resources) and os.stat(
                self.gcp_resources).st_size != 0:
            with open(self.gcp_resources) as f:
                serialized_gcp_resources = f.read()
                job_resources = json_format.Parse(serialized_gcp_resources,
                                                  GcpResources())
                # Resources should only contain one item.
                if len(job_resources.resources) != 1:
                    raise ValueError(
                        f'gcp_resources should contain one resource, found {len(job_resources.resources)}'
                    )

                job_name_group = re.findall(
                    job_resources.resources[0].resource_uri,
                    f'{self.job_uri_prefix}(.*)')

                if not job_name_group or not job_name_group[0]:
                    raise ValueError(
                        'Job Name in gcp_resource is not formatted correctly or is empty.'
                    )
                job_name = job_name_group[0]

                logging.info(
                    '%s name already exists: %s. Continue polling the status',
                    self.job_type, job_name)
            return job_name
        else:
            return None
Beispiel #3
0
    def test_hptuning_job_remote_runner_raises_exception_with_more_than_one_resource_in_gcp_resources(
            self, _, mock_job_service_client):
        job_client = mock.Mock()
        mock_job_service_client.return_value = job_client

        create_hptuning_job_response = mock.Mock()
        job_client.create_hyperparameter_tuning_job.return_value = create_hptuning_job_response
        create_hptuning_job_response.name = self._hptuning_job_name

        get_hptuning_job_response_success = mock.Mock()
        get_hptuning_job_response_success.state = gca_job_state.JobState.JOB_STATE_SUCCEEDED

        job_client.get_hyperparameter_tuning_job.side_effect = [
            get_hptuning_job_response_success
        ]

        # Write the job proto to output
        hptuning_job_resources = GcpResources()
        hptuning_job_resource_1 = hptuning_job_resources.resources.add()
        hptuning_job_resource_1.resource_type = "HyperparameterTuningJob"
        hptuning_job_resource_1.resource_uri = f"{self._hptuning_job_uri_prefix}{self._hptuning_job_name}"

        hptuning_job_resource_2 = hptuning_job_resources.resources.add()
        hptuning_job_resource_2.resource_type = "HyperparameterTuningJob"
        hptuning_job_resource_2.resource_uri = f"{self._hptuning_job_uri_prefix}{self._hptuning_job_name}"

        with open(self._gcp_resources, "w") as f:
            f.write(json_format.MessageToJson(hptuning_job_resources))

        with self.assertRaisesRegex(
                ValueError,
                "gcp_resources should contain one resource, found 2"):
            hyperparameter_tuning_job_remote_runner.create_hyperparameter_tuning_job(
                self._type, self._project, self._location, self._payload,
                self._gcp_resources)
    def test_model_deploy_remote_runner_succeeded(self, mock_post_requests, _,
                                                  mock_auth):
        creds = mock.Mock()
        creds.token = 'fake_token'
        mock_auth.return_value = [creds, "project"]
        deploy_model_lro = mock.Mock()
        deploy_model_lro.json.return_value = {
            'name': self._lro_name,
            'done': True,
        }
        mock_post_requests.return_value = deploy_model_lro

        deploy_model_remote_runner.deploy_model(self._type, '', '',
                                                self._payload,
                                                self._gcp_resouces_path)
        mock_post_requests.assert_called_once_with(
            url=
            f'{self._uri_prefix}projects/test_project/locations/test_region/endpoints/e12:deployModel',
            data=self._payload,
            headers={
                'Content-type': 'application/json',
                'Authorization': 'Bearer fake_token',
                'User-Agent': 'google-cloud-pipeline-components'
            })

        with open(self._gcp_resouces_path) as f:
            serialized_gcp_resources = f.read()
            # Instantiate GCPResources Proto
            lro_resources = json_format.Parse(serialized_gcp_resources,
                                              GcpResources())

            self.assertEqual(len(lro_resources.resources), 1)
            self.assertEqual(lro_resources.resources[0].resource_uri,
                             self._uri_prefix + self._lro_name)
Beispiel #5
0
    def create_lro(self, create_url: str, request_body: str,
                   gcp_resources: str, http_request: str = 'post') -> Any:
        """call the create API and get a LRO"""

        # Currently we don't check if operation already exists and continue from there
        # If this is desirable to the user and improves the reliability, we could do the following
        # ```
        # from google.api_core import operations_v1, grpc_helpers
        # channel = grpc_helpers.create_channel(location + '-aiplatform.googleapis.com')
        # api = operations_v1.OperationsClient(channel)
        # current_status = api.get_operation(lro.operation.name)
        # ```

        lro = self.request(request_url=create_url,
                           request_body=request_body, http_request=http_request)

        lro_name = lro['name']
        get_operation_uri = f"{self.vertex_uri_prefix}{lro_name}"

        # Write the lro to the gcp_resources output parameter
        long_running_operations = GcpResources()
        long_running_operation = long_running_operations.resources.add()
        long_running_operation.resource_type = "VertexLro"
        long_running_operation.resource_uri = get_operation_uri
        with open(gcp_resources, 'w') as f:
            f.write(json_format.MessageToJson(long_running_operations))

        return lro
Beispiel #6
0
def GetTrialsOp(gcp_resources: str) -> list:
    """Retrieves the best trial from the trials.

  Args:
      gcp_resources (str): Proto tracking the hyperparameter tuning job.

  Returns:
      List of strings representing the intermediate JSON representation of the
      trials from the hyperparameter tuning job.
  """
    from google.cloud import aiplatform
    from google_cloud_pipeline_components.proto.gcp_resources_pb2 import GcpResources
    from google.protobuf.json_format import Parse
    from google.cloud.aiplatform_v1.types import study

    api_endpoint_suffix = '-aiplatform.googleapis.com'
    gcp_resources_proto = Parse(gcp_resources, GcpResources())
    gcp_resources_split = gcp_resources_proto.resources[
        0].resource_uri.partition('projects')
    resource_name = gcp_resources_split[1] + gcp_resources_split[2]
    prefix_str = gcp_resources_split[0]
    prefix_str = prefix_str[:prefix_str.find(api_endpoint_suffix)]
    api_endpoint = prefix_str[(prefix_str.rfind('//') +
                               2):] + api_endpoint_suffix

    client_options = {'api_endpoint': api_endpoint}
    job_client = aiplatform.gapic.JobServiceClient(
        client_options=client_options)
    response = job_client.get_hyperparameter_tuning_job(name=resource_name)

    return [study.Trial.to_json(trial) for trial in response.trials]
Beispiel #7
0
    def test_custom_job_remote_runner_raises_exception_empty_URI_in_gcp_resources(
            self, mock_time_sleep, mock_job_service_client):
        job_client = mock.Mock()
        mock_job_service_client.return_value = job_client

        create_custom_job_response = mock.Mock()
        job_client.create_custom_job.return_value = create_custom_job_response
        create_custom_job_response.name = self._custom_job_name

        get_custom_job_response_success = mock.Mock()
        get_custom_job_response_success.state = gca_job_state.JobState.JOB_STATE_SUCCEEDED

        job_client.get_custom_job.side_effect = [
            get_custom_job_response_success
        ]

        # Write the job proto to output
        custom_job_resources = GcpResources()
        custom_job_resource_1 = custom_job_resources.resources.add()
        custom_job_resource_1.resource_type = "CustomJob"
        custom_job_resource_1.resource_uri = ""

        with open(self._gcp_resources, "w") as f:
            f.write(json_format.MessageToJson(custom_job_resources))

        with self.assertRaisesRegex(
                ValueError,
                "Job Name in gcp_resource is not formatted correctly or is empty."
        ):
            custom_job_remote_runner.create_custom_job(self._type,
                                                       self._project,
                                                       self._location,
                                                       self._payload,
                                                       self._gcp_resources)
Beispiel #8
0
    def test_custom_job_remote_runner_returns_gcp_resources(
            self, mock_time_sleep, mock_path_exists, mock_job_service_client):
        job_client = mock.Mock()
        mock_job_service_client.return_value = job_client

        create_custom_job_response = mock.Mock()
        job_client.create_custom_job.return_value = create_custom_job_response
        create_custom_job_response.name = self._custom_job_name

        get_custom_job_response_success = mock.Mock()
        get_custom_job_response_success.state = gca_job_state.JobState.JOB_STATE_SUCCEEDED

        job_client.get_custom_job.side_effect = [
            get_custom_job_response_success
        ]

        mock_path_exists.return_value = False

        custom_job_remote_runner.create_custom_job(self._type, self._project,
                                                   self._location,
                                                   self._payload,
                                                   self._gcp_resources)

        with open(self._gcp_resources) as f:
            serialized_gcp_resources = f.read()

            # Instantiate GCPResources Proto
            custom_job_resources = json_format.Parse(serialized_gcp_resources,
                                                     GcpResources())

            self.assertEqual(len(custom_job_resources.resources), 1)
            custom_job_name = custom_job_resources.resources[0].resource_uri[
                len(self._custom_job_uri_prefix):]
            self.assertEqual(custom_job_name, self._custom_job_name)
    def test_batch_prediction_job_remote_runner_succeeded_output_bq_table(
            self, mock_path_exists, mock_job_service_client):
        job_client = mock.Mock()
        mock_job_service_client.return_value = job_client

        create_batch_prediction_job_response = mock.Mock()
        job_client.create_batch_prediction_job.return_value = create_batch_prediction_job_response
        create_batch_prediction_job_response.name = self._batch_prediction_job_name

        get_batch_prediction_job_response = mock.Mock()
        job_client.get_batch_prediction_job.return_value = get_batch_prediction_job_response
        get_batch_prediction_job_response.state = gca_job_state.JobState.JOB_STATE_SUCCEEDED
        get_batch_prediction_job_response.name = 'job1'
        get_batch_prediction_job_response.output_info.bigquery_output_table = 'bigquery_output_table'
        get_batch_prediction_job_response.output_info.bigquery_output_dataset = 'bq://bq_project.bigquery_output_dataset'
        get_batch_prediction_job_response.output_info.gcs_output_directory = ''

        mock_path_exists.return_value = False

        batch_prediction_job_remote_runner.create_batch_prediction_job(
            self._job_type, self._project, self._location, self._payload,
            self._gcp_resources, self._executor_input)

        mock_job_service_client.assert_called_once_with(client_options={
            'api_endpoint':
            'test_region-aiplatform.googleapis.com'
        },
                                                        client_info=mock.ANY)

        expected_parent = f'projects/{self._project}/locations/{self._location}'
        expected_job_spec = json.loads(self._payload, strict=False)

        job_client.create_batch_prediction_job.assert_called_once_with(
            parent=expected_parent, batch_prediction_job=expected_job_spec)

        with open(self._gcp_resources) as f:
            serialized_gcp_resources = f.read()

            # Instantiate GCPResources Proto
            batch_prediction_job_resources = json_format.Parse(
                serialized_gcp_resources, GcpResources())

            self.assertEqual(len(batch_prediction_job_resources.resources), 1)
            batch_prediction_job_name = batch_prediction_job_resources.resources[
                0].resource_uri[len(self._batch_prediction_job_uri_prefix):]
            self.assertEqual(batch_prediction_job_name,
                             self._batch_prediction_job_name)

        with open(self._output_file_path) as f:
            executor_output = json.load(f, strict=False)
            self.assertEqual(
                executor_output,
                json.loads('{"artifacts": {\
              "batchpredictionjob": {"artifacts": [{"metadata": {"resourceName": "job1", "bigqueryOutputDataset": "bq://bq_project.bigquery_output_dataset","bigqueryOutputTable": "bigquery_output_table","gcsOutputDirectory": ""}, "name": "foobar", "type": {"schemaTitle": "google.VertexBatchPredictionJob"}, "uri": "https://test_region-aiplatform.googleapis.com/v1/job1"}]},\
              "bigquery_output_table": {"artifacts": [{"metadata": {"projectId": "bq_project", "datasetId": "bigquery_output_dataset", "tableId": "bigquery_output_table"}, "name": "bq_table", "type": {"schemaTitle": "google.BQTable"}, "uri": "https://www.googleapis.com/bigquery/v2/projects/bq_project/datasets/bigquery_output_dataset/tables/bigquery_output_table"}]}}}'
                           ))
    def test_model_upload_remote_runner_append_unmanaged_model_succeeded(
            self, mock_post_requests, _, mock_auth):
        creds = mock.Mock()
        creds.token = 'fake_token'
        mock_auth.return_value = [creds, 'project']
        upload_model_lro = mock.Mock()
        upload_model_lro.json.return_value = {
            'name': self._lro_name,
            'done': True,
            'response': {
                'model': self._model_name
            }
        }
        mock_post_requests.return_value = upload_model_lro

        self._executor_input = (
            '{"inputs":{"artifacts":{"unmanaged_container_model":{"artifacts":[{"metadata":{"predictSchemata":{"instanceSchemaUri":"instance_a"},'
            ' '
            '"containerSpec":{"imageUri":"image_foo"}},"name":"unmanaged_container_model","type":{"schemaTitle":"google.UnmanagedContainerModel"},"uri":"gs://abc"}]}}},"outputs":{"artifacts":{"model":{"artifacts":[{"metadata":{},"name":"foobar","type":{"schemaTitle":"google.VertexModel"},"uri":"gs://abc"}]}},"outputFile":"'
        ) + self._output_file_path + '"}}'
        upload_model_remote_runner.upload_model(self._type, self._project,
                                                self._location, self._payload,
                                                self._gcp_resources_path,
                                                self._executor_input)
        mock_post_requests.assert_called_once_with(
            url=
            f'{self._uri_prefix}projects/test_project/locations/test_region/models:upload',
            data=
            '{"model": {"display_name": "model1", "predict_schemata": {"instance_schema_uri": "instance_a"}, "container_spec": {"image_uri": "image_foo"}, "artifact_uri": "gs://abc"}}',
            headers={
                'Content-type': 'application/json',
                'Authorization': 'Bearer fake_token',
                'User-Agent': 'google-cloud-pipeline-components'
            })

        with open(self._output_file_path) as f:
            executor_output = json.load(f, strict=False)
            self.assertEqual(
                executor_output,
                json.loads(
                    '{"artifacts": {"model": {"artifacts": [{"metadata": {"resourceName": "projects/test_project/locations/test_region/models/123"}, "name": "foobar", "type": {"schemaTitle": "google.VertexModel"}, "uri": "https://test_region-aiplatform.googleapis.com/v1/projects/test_project/locations/test_region/models/123"}]}}}'
                ))

        with open(self._gcp_resources_path) as f:
            serialized_gcp_resources = f.read()
            # Instantiate GCPResources Proto
            lro_resources = json_format.Parse(serialized_gcp_resources,
                                              GcpResources())
            self.assertEqual(len(lro_resources.resources), 1)
            self.assertEqual(lro_resources.resources[0].resource_uri,
                             self._uri_prefix + self._lro_name)
def _create_job(job_type, project, location, job_request_json, creds,
                gcp_resources) -> str:
  """Create a new BigQuery job


    Args:
        job_type: BigQuery job type.
        project: Project to launch the job.
        location: location to launch the job. For more details, see
          https://cloud.google.com/bigquery/docs/locations#specifying_your_location
        job_request_json: A json object of Job proto. For more details, see
          https://cloud.google.com/bigquery/docs/reference/rest/v2/Job
        creds: Google auth credential.
        gcp_resources: File path for storing `gcp_resources` output parameter.

   Returns:
        The URI of the BigQuery Job.
  """
  # Overrides the location
  if location:
    if 'jobReference' not in job_request_json:
      job_request_json['jobReference'] = {}
    job_request_json['jobReference']['location'] = location

  creds.refresh(google.auth.transport.requests.Request())
  headers = {
      'Content-type': 'application/json',
      'Authorization': 'Bearer ' + creds.token,
      'User-Agent': 'google-cloud-pipeline-components'
  }
  insert_job_url = f'https://www.googleapis.com/bigquery/v2/projects/{project}/jobs'
  job = requests.post(
      url=insert_job_url, data=json.dumps(job_request_json),
      headers=headers).json()
  if 'selfLink' not in job:
    raise RuntimeError(
        'BigQquery Job failed. Cannot retrieve the job name. Response: {}.'
        .format(job))

  # Write the bigquey job uri to gcp resource.
  job_uri = job['selfLink']
  job_resources = GcpResources()
  job_resource = job_resources.resources.add()
  job_resource.resource_type = job_type
  job_resource.resource_uri = job_uri
  with open(gcp_resources, 'w') as f:
    f.write(json_format.MessageToJson(job_resources))

  return job_uri
Beispiel #12
0
def GetTrialsOp(gcp_resources: str, region: str) -> list:
    from google.cloud import aiplatform
    from google_cloud_pipeline_components.proto.gcp_resources_pb2 import GcpResources
    from google.protobuf.json_format import Parse
    from google.cloud.aiplatform_v1.types import study

    client_options = {'api_endpoint': region + '-aiplatform.googleapis.com'}
    job_client = aiplatform.gapic.JobServiceClient(
        client_options=client_options)
    gcp_resources_proto = Parse(gcp_resources, GcpResources())
    gcp_resources_split = gcp_resources_proto.resources[
        0].resource_uri.partition('projects')
    resource_name = gcp_resources_split[1] + gcp_resources_split[2]
    response = job_client.get_hyperparameter_tuning_job(name=resource_name)

    return [study.Trial.to_json(trial) for trial in response.trials]
Beispiel #13
0
    def test_create_endpoint_remote_runner_succeeded(self, mock_post_requests,
                                                     _, mock_auth):
        creds = mock.Mock()
        creds.token = 'fake_token'
        mock_auth.return_value = [creds, "project"]
        create_endpoint_lro = mock.Mock()
        create_endpoint_lro.json.return_value = {
            'name': self._lro_name,
            'done': True,
            'response': {
                'name': self._endpoint_name
            }
        }
        mock_post_requests.return_value = create_endpoint_lro

        create_endpoint_remote_runner.create_endpoint(
            self._type, self._project, self._location, self._payload,
            self._gcp_resouces_path, self._executor_input)
        mock_post_requests.assert_called_once_with(
            url=
            f'{self._uri_prefix}projects/{self._project}/locations/{self._location}/endpoints',
            data=json.dumps(self._create_endpoint_request),
            headers={
                'Content-type': 'application/json',
                'Authorization': 'Bearer fake_token',
                'User-Agent': 'google-cloud-pipeline-components'
            })

        with open(self._output_file_path) as f:
            executor_output = json.load(f, strict=False)
            self.assertEqual(
                executor_output,
                json.loads(
                    '{"artifacts": {"endpoint": {"artifacts": [{"metadata": {"resourceName": "projects/test_project/locations/test_region/endpoints/123"}, "name": "foobar", "type": {"schemaTitle": "system.Endpoint"}, "uri": "https://test_region-aiplatform.googleapis.com/v1/projects/test_project/locations/test_region/endpoints/123"}]}}}'
                ))

        with open(self._gcp_resouces_path) as f:
            serialized_gcp_resources = f.read()
            # Instantiate GCPResources Proto
            lro_resources = json_format.Parse(serialized_gcp_resources,
                                              GcpResources())

            self.assertEqual(len(lro_resources.resources), 1)
            self.assertEqual(lro_resources.resources[0].resource_uri,
                             self._uri_prefix + self._lro_name)
    def create_job(self, create_job_fn, payload) -> str:
        """Create a job."""
        parent = f'projects/{self.project}/locations/{self.location}'
        # TODO(kevinbnaughton) remove empty fields from the spec temporarily.
        job_spec = json_util.recursive_remove_empty(
            json.loads(payload, strict=False))
        create_job_response = create_job_fn(self.job_client, parent, job_spec)
        job_name = create_job_response.name

        # Write the job proto to output.
        job_resources = GcpResources()
        job_resource = job_resources.resources.add()
        job_resource.resource_type = self.job_type
        job_resource.resource_uri = f'{self.job_uri_prefix}{job_name}'

        with open(self.gcp_resources, 'w') as f:
            f.write(json_format.MessageToJson(job_resources))

        return job_name
    def test_model_export_remote_runner_succeeded(self, mock_post_requests, _,
                                                  mock_auth):
        creds = mock.Mock()
        creds.token = 'fake_token'
        mock_auth.return_value = [creds, 'project']
        export_model_lro = mock.Mock()
        export_model_lro.json.return_value = {
            'name': self._lro_name,
            'done': True,
            'metadata': {
                'outputInfo': self._output_info_content
            }
        }
        mock_post_requests.return_value = export_model_lro

        export_model_remote_runner.export_model(self._type, '', '',
                                                self._payload,
                                                self._gcp_resources_path,
                                                self._output_info)
        mock_post_requests.assert_called_once_with(
            url=
            f'{self._uri_prefix}projects/test_project/locations/test_region/models/m12:export',
            data=self._payload,
            headers={
                'Content-type': 'application/json',
                'Authorization': 'Bearer fake_token',
                'User-Agent': 'google-cloud-pipeline-components'
            })

        with open(self._output_info) as f:
            self.assertEqual(f.read(), json.dumps(self._output_info_content))

        with open(self._gcp_resources_path) as f:
            serialized_gcp_resources = f.read()
            # Instantiate GCPResources Proto
            lro_resources = json_format.Parse(serialized_gcp_resources,
                                              GcpResources())

            self.assertEqual(len(lro_resources.resources), 1)
            self.assertEqual(lro_resources.resources[0].resource_uri,
                             self._uri_prefix + self._lro_name)
  def test_import_model_evaluation_gcp_resources(self, mock_api):
    import_model_evaluation_response = mock.Mock()
    mock_api.return_value = import_model_evaluation_response
    import_model_evaluation_response.name = self._model_name

    main([
        '--metrics', self.metrics_path, '--problem_type', 'classification',
        '--model_name', self._model_name, '--gcp_resources', self._gcp_resources
    ])

    with open(self._gcp_resources) as f:
      serialized_gcp_resources = f.read()

      # Instantiate GCPResources Proto
      model_evaluation_resources = json_format.Parse(serialized_gcp_resources,
                                                     GcpResources())

      self.assertLen(model_evaluation_resources.resources, 1)
      model_evaluation_name = model_evaluation_resources.resources[
          0].resource_uri[len(self._model_evaluation_uri_prefix):]
      self.assertEqual(model_evaluation_name, self._model_name)
Beispiel #17
0
def create_job(job_type, project, location, payload, creds,
               gcp_resources) -> str:
    """Create a new BigQuery job"""
    job_configuration = json.loads(payload, strict=False)
    # Always use standard SQL instead of legacy SQL.
    job_configuration['query']['useLegacySql'] = False
    job_request = {
        # TODO(IronPan) temporarily remove the empty fields from the spec
        'configuration': json_util.recursive_remove_empty(job_configuration),
    }
    if location is not None:
        if 'jobReference' not in job_request:
            job_request['jobReference'] = {}
        job_request['jobReference']['location'] = location

    creds.refresh(google.auth.transport.requests.Request())
    headers = {
        'Content-type': 'application/json',
        'Authorization': 'Bearer ' + creds.token,
        'User-Agent': 'google-cloud-pipeline-components'
    }
    insert_job_url = f'https://www.googleapis.com/bigquery/v2/projects/{project}/jobs'
    job = requests.post(url=insert_job_url,
                        data=json.dumps(job_request),
                        headers=headers).json()
    if 'selfLink' not in job:
        raise RuntimeError(
            'BigQquery Job failed. Cannot retrieve the job name. Response: {}.'
            .format(job))

    # Write the bigquey job uri to gcp resource.
    job_uri = job['selfLink']
    job_resources = GcpResources()
    job_resource = job_resources.resources.add()
    job_resource.resource_type = job_type
    job_resource.resource_uri = job_uri
    with open(gcp_resources, 'w') as f:
        f.write(json_format.MessageToJson(job_resources))

    return job_uri
Beispiel #18
0
    def create_lro(self, create_url: str, request_body: str,
                   gcp_resources: str) -> Any:
        """call the create API and get a LRO"""

        # Currently we don't check if operation already exists and continue from there
        # If this is desirable to the user and improves the reliability, we could do the following
        # ```
        # from google.api_core import operations_v1, grpc_helpers
        # channel = grpc_helpers.create_channel(location + '-aiplatform.googleapis.com')
        # api = operations_v1.OperationsClient(channel)
        # current_status = api.get_operation(lro.operation.name)
        # ```

        self.creds.refresh(google.auth.transport.requests.Request())
        headers = {
            'Content-type': 'application/json',
            'Authorization': 'Bearer ' + self.creds.token,
            'User-Agent': 'google-cloud-pipeline-components'
        }
        lro = requests.post(url=create_url, data=request_body,
                            headers=headers).json()

        if "error" in lro and lro["error"]["code"]:
            raise RuntimeError(
                "Failed to create the resource. Error: {}".format(
                    lro["error"]))

        lro_name = lro['name']
        get_operation_uri = f"{self.vertex_uri_prefix}{lro_name}"

        # Write the lro to the gcp_resources output parameter
        long_running_operations = GcpResources()
        long_running_operation = long_running_operations.resources.add()
        long_running_operation.resource_type = "VertexLro"
        long_running_operation.resource_uri = get_operation_uri
        with open(gcp_resources, 'w') as f:
            f.write(json_format.MessageToJson(long_running_operations))

        return lro
Beispiel #19
0
def wait_gcp_resources(
    type,
    project,
    location,
    payload,
    gcp_resources,
):
    """
    Poll the gcp resources till it reaches a final state.
    """
    input_gcp_resources = Parse(payload, GcpResources())
    if len(input_gcp_resources.resources) != 1:
        raise ValueError(
            "Invalid payload: %s. Wait component support waiting on only one resource at this moment."
            % payload)

    if input_gcp_resources.resources[0].resource_type != 'DataflowJob':
        raise ValueError(
            "Invalid payload: %s. Wait component only support waiting on Dataflow job at this moment."
            % payload)

    dataflow_job_uri = input_gcp_resources.resources[0].resource_uri
    uri_pattern = re.compile(_DATAFLOW_URI_TEMPLATE)
    match = uri_pattern.match(dataflow_job_uri)
    # Get the project and location from the job URI instead from the parameter.
    try:
        project = match.group('project')
        location = match.group('location')
        job_id = match.group('jobid')
    except AttributeError as err:
        # TODO(ruifang) propagate the error.
        raise ValueError('Invalid dataflow resource URI: {}. Expect: {}.'.format(
            dataflow_job_uri,
            'https://dataflow.googleapis.com/v1b3/projects/[project_id]/locations/[location]/jobs/[job_id]'
        ))

    # Propagate the GCP resources as the output of the wait component
    with open(gcp_resources, 'w') as f:
        f.write(payload)

    with execution_context.ExecutionContext(on_cancel=partial(
            _send_cancel_request,
            project,
            job_id,
            location,
    )):
        # Poll the job status
        retry_count = 0
        while True:
            try:
                df_client = discovery.build('dataflow',
                                            'v1b3',
                                            cache_discovery=False)
                job = df_client.projects().locations().jobs().get(
                    projectId=project,
                    jobId=job_id,
                    location=location,
                    view=None).execute()
                retry_count = 0
            except ConnectionError as err:
                retry_count += 1
                if retry_count <= _CONNECTION_ERROR_RETRY_LIMIT:
                    logging.warning(
                        'ConnectionError (%s) encountered when polling job: %s. Retrying.',
                        err, job_id)
                else:
                    logging.error('Request failed after %s retries.',
                                  _CONNECTION_ERROR_RETRY_LIMIT)
                    # TODO(ruifang) propagate the error.
                    raise

            job_state = job.get('currentState', None)
            # Write the job details as gcp_resources
            if job_state in _JOB_SUCCESSFUL_STATES:
                logging.info(
                    'GetDataflowJob response state =%s. Job completed',
                    job_state)
                return

            elif job_state in _JOB_TERMINATED_STATES:
                # TODO(ruifang) propagate the error.
                raise RuntimeError(
                    'Job {} failed with error state: {}.'.format(
                        job_id, job_state))
            else:
                logging.info(
                    'Job %s is in a non-final state %s. Waiting for %s seconds for next poll.',
                    job_id, job_state, _POLLING_INTERVAL_IN_SECONDS)
                time.sleep(_POLLING_INTERVAL_IN_SECONDS)
    def test_bigquery_query_job_remote_runner_succeeded(
            self, mock_time_sleep, mock_get_requests, mock_post_requests, _,
            mock_auth):
        creds = mock.Mock()
        creds.token = 'fake_token'
        mock_auth.return_value = [creds, 'project']
        mock_created_bq_job = mock.Mock()
        mock_created_bq_job.json.return_value = {'selfLink': self._job_uri}
        mock_post_requests.return_value = mock_created_bq_job

        mock_polled_bq_job = mock.Mock()
        mock_polled_bq_job.json.return_value = {
            'selfLink': self._job_uri,
            'status': {
                'state': 'DONE'
            },
            'configuration': {
                'query': {
                    'destinationTable': {
                        'projectId': 'test_project',
                        'datasetId': 'test_dataset',
                        'tableId': 'test_table'
                    }
                }
            }
        }
        mock_get_requests.return_value = mock_polled_bq_job

        bigquery_query_job_remote_runner.create_bigquery_job(
            self._job_type, self._project, self._location, self._payload,
            self._gcp_resources, self._executor_input)
        mock_post_requests.assert_called_once_with(
            url=
            f'https://www.googleapis.com/bigquery/v2/projects/{self._project}/jobs',
            data=
            ('{"configuration": {"query": {"query": "CREATE OR REPLACE MODEL bqml_tutorial.penguins_model OPTIONS (model_type=\'linear_reg\', input_label_cols=[\'body_mass_g\']) AS SELECT * FROM `bigquery-public-data.ml_datasets.penguins` WHERE body_mass_g IS NOT NULL"}}, "jobReference": {"location": "US"}}'
             ),
            headers={
                'Content-type': 'application/json',
                'Authorization': 'Bearer fake_token',
                'User-Agent': 'google-cloud-pipeline-components'
            })

        with open(self._output_file_path) as f:
            self.assertEqual(
                f.read(),
                '{"artifacts": {"destinationTable": {"artifacts": [{"metadata": {}, "name": "foobar", "type": {"schemaTitle": "google.BQTable"}, "uri": "https://www.googleapis.com/bigquery/v2/projects/test_project/datasets/test_dataset/tables/test_table"}]}}}'
            )

        with open(self._gcp_resources) as f:
            serialized_gcp_resources = f.read()
            # Instantiate GCPResources Proto
            bq_job_resources = json_format.Parse(serialized_gcp_resources,
                                                 GcpResources())
            self.assertEqual(len(bq_job_resources.resources), 1)
            self.assertEqual(
                bq_job_resources.resources[0].resource_uri,
                'https://www.googleapis.com/bigquery/v2/projects/test_project/jobs/fake_job?location=US'
            )

        self.assertEqual(mock_post_requests.call_count, 1)
        self.assertEqual(mock_time_sleep.call_count, 1)
        self.assertEqual(mock_get_requests.call_count, 1)
Beispiel #21
0
def main(argv):
    """Calls ModelService.ImportModelEvaluation."""
    parser = argparse.ArgumentParser(
        prog='Vertex Model Service evaluation importer', description='')
    parser.add_argument('--metrics',
                        dest='metrics',
                        type=str,
                        required=True,
                        default=argparse.SUPPRESS)
    parser.add_argument('--metrics_explanation',
                        dest='metrics_explanation',
                        type=str,
                        default=None)
    parser.add_argument('--explanation',
                        dest='explanation',
                        type=str,
                        default=None)
    parser.add_argument('--problem_type',
                        dest='problem_type',
                        type=str,
                        required=True,
                        default=argparse.SUPPRESS)
    parser.add_argument('--model_name',
                        dest='model_name',
                        type=str,
                        required=True,
                        default=argparse.SUPPRESS)
    parser.add_argument('--gcp_resources',
                        dest='gcp_resources',
                        type=_make_parent_dirs_and_return_path,
                        required=True,
                        default=argparse.SUPPRESS)
    parsed_args, _ = parser.parse_known_args(argv)

    _, project_id, _, location, _, model_id = parsed_args.model_name.split('/')
    api_endpoint = location + '-aiplatform.googleapis.com'
    resource_uri_prefix = f'https://{api_endpoint}/v1/'

    with open(parsed_args.metrics) as metrics_file:
        model_evaluation = {
            'metrics':
            to_value(
                next(
                    iter(
                        json.loads(metrics_file.read())['slicedMetrics'][0]
                        ['metrics'].values()))),
            'metrics_schema_uri':
            PROBLEM_TYPE_TO_SCHEMA_URI.get(parsed_args.problem_type),
        }

    if parsed_args.explanation and parsed_args.explanation == "{{$.inputs.artifacts['explanation'].metadata['explanation_gcs_path']}}":
        # metrics_explanation must contain explanation_gcs_path when provided.
        logging.error('"explanation" must contain explanations when provided.')
        sys.exit(13)
    elif parsed_args.explanation:
        explanation_file_name = parsed_args.explanation if not parsed_args.explanation.startswith(
            'gs://') else '/gcs' + parsed_args.explanation[4:]
    elif parsed_args.metrics_explanation and parsed_args.metrics_explanation != "{{$.inputs.artifacts['metrics'].metadata['explanation_gcs_path']}}":
        explanation_file_name = parsed_args.metrics_explanation if not parsed_args.metrics_explanation.startswith(
            'gs://') else '/gcs' + parsed_args.metrics_explanation[4:]
    else:
        explanation_file_name = None
    if explanation_file_name:
        with open(explanation_file_name) as explanation_file:
            model_evaluation['model_explanation'] = {
                'mean_attributions': [{
                    'feature_attributions':
                    to_value(
                        json.loads(explanation_file.read())['explanation']
                        ['attributions'][0]['featureAttributions'])
                }]
            }

    import_model_evaluation_response = aiplatform.gapic.ModelServiceClient(
        client_info=gapic_v1.client_info.ClientInfo(
            user_agent='google-cloud-pipeline-components', ),
        client_options={
            'api_endpoint': api_endpoint,
        }).import_model_evaluation(
            parent=parsed_args.model_name,
            model_evaluation=model_evaluation,
        )
    model_evaluation_name = import_model_evaluation_response.name

    # Write the model evaluation resource to GcpResources output.
    model_eval_resources = GcpResources()
    model_eval_resource = model_eval_resources.resources.add()
    model_eval_resource.resource_type = RESOURCE_TYPE
    model_eval_resource.resource_uri = f'{resource_uri_prefix}{model_evaluation_name}'

    with open(parsed_args.gcp_resources, 'w') as f:
        f.write(json_format.MessageToJson(model_eval_resources))