Ejemplo n.º 1
0
 def test_write_metrics(self):
     MetricsEnvironment.process_wide_container().reset()
     write_fn = bigtableio._BigTableWriteFn(self._PROJECT_ID,
                                            self._INSTANCE_ID,
                                            self._TABLE_ID)
     write_fn.table = self.table
     write_fn.start_bundle()
     number_of_rows = 2
     error = Status()
     error.message = 'Entity already exists.'
     error.code = ALREADY_EXISTS
     success = Status()
     success.message = 'Success'
     success.code = OK
     rows_response = [error, success] * number_of_rows
     with patch.object(Table, 'mutate_rows', return_value=rows_response):
         direct_rows = [
             self.generate_row(i) for i in range(number_of_rows * 2)
         ]
         for direct_row in direct_rows:
             write_fn.process(direct_row)
         write_fn.finish_bundle()
         self.verify_write_call_metric(
             self._PROJECT_ID, self._INSTANCE_ID, self._TABLE_ID,
             ServiceCallMetric.bigtable_error_code_to_grpc_status_string(
                 ALREADY_EXISTS), 2)
         self.verify_write_call_metric(
             self._PROJECT_ID, self._INSTANCE_ID, self._TABLE_ID,
             ServiceCallMetric.bigtable_error_code_to_grpc_status_string(
                 OK), 2)
Ejemplo n.º 2
0
 def test_write_metrics(self):
     MetricsEnvironment.process_wide_container().reset()
     write_fn = bigtableio._BigTableWriteFn(self._PROJECT_ID,
                                            self._INSTANCE_ID,
                                            self._TABLE_ID)
     write_fn.table = self.table
     write_fn.start_bundle()
     number_of_rows = 2
     error = Status()
     error.message = 'Entity already exists.'
     error.code = ALREADY_EXISTS
     success = Status()
     success.message = 'Success'
     success.code = OK
     rows_response = [error, success] * number_of_rows
     with patch.object(Table, 'mutate_rows', return_value=rows_response):
         direct_rows = [
             self.generate_row(i) for i in range(number_of_rows * 2)
         ]
         for direct_row in direct_rows:
             write_fn.process(direct_row)
         try:
             write_fn.finish_bundle()
         except:  # pylint: disable=bare-except
             # Currently we fail the bundle when there are any failures.
             # TODO(BEAM-13849): remove after bigtableio can selectively retry.
             pass
         self.verify_write_call_metric(
             self._PROJECT_ID, self._INSTANCE_ID, self._TABLE_ID,
             ServiceCallMetric.bigtable_error_code_to_grpc_status_string(
                 ALREADY_EXISTS), 2)
         self.verify_write_call_metric(
             self._PROJECT_ID, self._INSTANCE_ID, self._TABLE_ID,
             ServiceCallMetric.bigtable_error_code_to_grpc_status_string(
                 OK), 2)
Ejemplo n.º 3
0
  def __init__(self, client, path, buffer_size, get_project_number):
    self._client = client
    self._path = path
    self._bucket, self._name = parse_gcs_path(path)
    self._buffer_size = buffer_size
    self._get_project_number = get_project_number

    project_number = self._get_project_number(self._bucket)

    # Create a request count metric
    resource = resource_identifiers.GoogleCloudStorageBucket(self._bucket)
    labels = {
        monitoring_infos.SERVICE_LABEL: 'Storage',
        monitoring_infos.METHOD_LABEL: 'Objects.get',
        monitoring_infos.RESOURCE_LABEL: resource,
        monitoring_infos.GCS_BUCKET_LABEL: self._bucket,
        monitoring_infos.GCS_PROJECT_ID_LABEL: project_number
    }
    service_call_metric = ServiceCallMetric(
        request_count_urn=monitoring_infos.API_REQUEST_COUNT_URN,
        base_labels=labels)

    # Get object state.
    self._get_request = (
        storage.StorageObjectsGetRequest(
            bucket=self._bucket, object=self._name))
    try:
      metadata = self._get_object_metadata(self._get_request)
      service_call_metric.call('ok')
    except HttpError as http_error:
      service_call_metric.call(http_error)
      if http_error.status_code == 404:
        raise IOError(errno.ENOENT, 'Not found: %s' % self._path)
      else:
        _LOGGER.error(
            'HTTP error while requesting file %s: %s', self._path, http_error)
        raise
    self._size = metadata.size

    # Ensure read is from file of the correct generation.
    self._get_request.generation = metadata.generation

    # Initialize read buffer state.
    self._download_stream = io.BytesIO()
    self._downloader = transfer.Download(
        self._download_stream,
        auto_transfer=False,
        chunksize=self._buffer_size,
        num_retries=20)

    try:
      self._client.objects.Get(self._get_request, download=self._downloader)
      service_call_metric.call('ok')
    except HttpError as e:
      service_call_metric.call(e)
Ejemplo n.º 4
0
 def _query_metric(self, query_name, status):
   project_id = self._spanner_configuration.project
   resource = resource_identifiers.SpannerSqlQuery(project_id, query_name)
   labels = {
       **self.base_labels,
       monitoring_infos.RESOURCE_LABEL: resource,
       monitoring_infos.SPANNER_QUERY_NAME: query_name
   }
   service_call_metric = ServiceCallMetric(
       request_count_urn=monitoring_infos.API_REQUEST_COUNT_URN,
       base_labels=labels)
   service_call_metric.call(str(status))
Ejemplo n.º 5
0
 def process(self, query, *unused_args, **unused_kwargs):
     if query.namespace is None:
         query.namespace = ''
     _client = helper.get_client(query.project, query.namespace)
     client_query = query._to_client_query(_client)
     # Create request count metric
     resource = resource_identifiers.DatastoreNamespace(
         query.project, query.namespace)
     labels = {
         monitoring_infos.SERVICE_LABEL: 'Datastore',
         monitoring_infos.METHOD_LABEL: 'BatchDatastoreRead',
         monitoring_infos.RESOURCE_LABEL: resource,
         monitoring_infos.DATASTORE_NAMESPACE_LABEL: query.namespace,
         monitoring_infos.DATASTORE_PROJECT_ID_LABEL: query.project,
         monitoring_infos.STATUS_LABEL: 'ok'
     }
     service_call_metric = ServiceCallMetric(
         request_count_urn=monitoring_infos.API_REQUEST_COUNT_URN,
         base_labels=labels)
     try:
         for client_entity in client_query.fetch(query.limit):
             yield types.Entity.from_client_entity(client_entity)
         service_call_metric.call('ok')
     except (ClientError, GoogleAPICallError) as e:
         # e.code.value contains the numeric http status code.
         service_call_metric.call(e.code.value)
     except HttpError as e:
         service_call_metric.call(e)
Ejemplo n.º 6
0
 def _table_metric(self, table_id, status):
   database_id = self._spanner_configuration.database
   project_id = self._spanner_configuration.project
   resource = resource_identifiers.SpannerTable(
       project_id, database_id, table_id)
   labels = {
       **self.base_labels,
       monitoring_infos.RESOURCE_LABEL: resource,
       monitoring_infos.SPANNER_TABLE_ID: table_id
   }
   service_call_metric = ServiceCallMetric(
       request_count_urn=monitoring_infos.API_REQUEST_COUNT_URN,
       base_labels=labels)
   service_call_metric.call(str(status))
Ejemplo n.º 7
0
 def write_mutate_metrics(self, response):
     for status in response:
         code = status.code if status else None
         grpc_status_string = (
             ServiceCallMetric.bigtable_error_code_to_grpc_status_string(
                 code))
         self.service_call_metric.call(grpc_status_string)
Ejemplo n.º 8
0
  def _start_upload(self):
    # This starts the uploader thread.  We are forced to run the uploader in
    # another thread because the apitools uploader insists on taking a stream
    # as input. Happily, this also means we get asynchronous I/O to GCS.
    #
    # The uploader by default transfers data in chunks of 1024 * 1024 bytes at
    # a time, buffering writes until that size is reached.

    project_number = self._get_project_number(self._bucket)

    # Create a request count metric
    resource = resource_identifiers.GoogleCloudStorageBucket(self._bucket)
    labels = {
        monitoring_infos.SERVICE_LABEL: 'Storage',
        monitoring_infos.METHOD_LABEL: 'Objects.insert',
        monitoring_infos.RESOURCE_LABEL: resource,
        monitoring_infos.GCS_BUCKET_LABEL: self._bucket,
        monitoring_infos.GCS_PROJECT_ID_LABEL: project_number
    }
    service_call_metric = ServiceCallMetric(
        request_count_urn=monitoring_infos.API_REQUEST_COUNT_URN,
        base_labels=labels)
    try:
      self._client.objects.Insert(self._insert_request, upload=self._upload)
      service_call_metric.call('ok')
    except Exception as e:  # pylint: disable=broad-except
      service_call_metric.call(e)
      _LOGGER.error(
          'Error in _start_upload while inserting file %s: %s',
          self._path,
          traceback.format_exc())
      self._upload_thread.last_error = e
    finally:
      self._child_conn.close()
Ejemplo n.º 9
0
 def start_service_call_metrics(self, project_id, instance_id, table_id):
   resource = resource_identifiers.BigtableTable(
       project_id, instance_id, table_id)
   labels = {
       monitoring_infos.SERVICE_LABEL: 'BigTable',
       # TODO(JIRA-11985): Add Ptransform label.
       monitoring_infos.METHOD_LABEL: 'google.bigtable.v2.MutateRows',
       monitoring_infos.RESOURCE_LABEL: resource,
       monitoring_infos.BIGTABLE_PROJECT_ID_LABEL: (
           self.beam_options['project_id']),
       monitoring_infos.INSTANCE_ID_LABEL: self.beam_options['instance_id'],
       monitoring_infos.TABLE_ID_LABEL: self.beam_options['table_id']
   }
   return ServiceCallMetric(
       request_count_urn=monitoring_infos.API_REQUEST_COUNT_URN,
       base_labels=labels)
Ejemplo n.º 10
0
        def write_mutations(self,
                            throttler,
                            rpc_stats_callback,
                            throttle_delay=1):
            """Writes a batch of mutations to Cloud Datastore.

      If a commit fails, it will be retried up to 5 times. All mutations in the
      batch will be committed again, even if the commit was partially
      successful. If the retry limit is exceeded, the last exception from
      Cloud Datastore will be raised.

      Assumes that the Datastore client library does not perform any retries on
      commits. It has not been determined how such retries would interact with
      the retries and throttler used here.
      See ``google.cloud.datastore_v1.gapic.datastore_client_config`` for
      retry config.

      Args:
        rpc_stats_callback: a function to call with arguments `successes` and
            `failures` and `throttled_secs`; this is called to record successful
            and failed RPCs to Datastore and time spent waiting for throttling.
        throttler: (``apache_beam.io.gcp.datastore.v1new.adaptive_throttler.
          AdaptiveThrottler``)
          Throttler instance used to select requests to be throttled.
        throttle_delay: (:class:`float`) time in seconds to sleep when
            throttled.

      Returns:
        (int) The latency of the successful RPC in milliseconds.
      """
            # Client-side throttling.
            while throttler.throttle_request(time.time() * 1000):
                _LOGGER.info(
                    "Delaying request for %ds due to previous failures",
                    throttle_delay)
                time.sleep(throttle_delay)
                rpc_stats_callback(throttled_secs=throttle_delay)

            if self._batch is None:
                # this will only happen when we re-try previously failed batch
                self._batch = self._client.batch()
                self._batch.begin()
                for element in self._batch_elements:
                    self.add_to_batch(element)

            # Create request count metric
            resource = resource_identifiers.DatastoreNamespace(
                self._project, "")
            labels = {
                monitoring_infos.SERVICE_LABEL: 'Datastore',
                monitoring_infos.METHOD_LABEL: 'BatchDatastoreWrite',
                monitoring_infos.RESOURCE_LABEL: resource,
                monitoring_infos.DATASTORE_NAMESPACE_LABEL: "",
                monitoring_infos.DATASTORE_PROJECT_ID_LABEL: self._project,
                monitoring_infos.STATUS_LABEL: 'ok'
            }

            service_call_metric = ServiceCallMetric(
                request_count_urn=monitoring_infos.API_REQUEST_COUNT_URN,
                base_labels=labels)

            try:
                start_time = time.time()
                self._batch.commit()
                end_time = time.time()
                service_call_metric.call('ok')

                rpc_stats_callback(successes=1)
                throttler.successful_request(start_time * 1000)
                commit_time_ms = int((end_time - start_time) * 1000)
                return commit_time_ms
            except (ClientError, GoogleAPICallError) as e:
                self._batch = None
                # e.code.value contains the numeric http status code.
                service_call_metric.call(e.code.value)
                rpc_stats_callback(errors=1)
                raise
            except HttpError as e:
                service_call_metric.call(e)
                rpc_stats_callback(errors=1)
                raise
Ejemplo n.º 11
0
 def write_mutate_metrics(self, rows):
   for status in rows:
     grpc_status_string = (
         ServiceCallMetric.bigtable_error_code_to_grpc_status_string(
             status.code))
     self.service_call_metric.call(grpc_status_string)