def test_get_or_create_dataset_created(self):
     client = mock.Mock()
     client.datasets.Get.side_effect = HttpError(response={'status': '404'},
                                                 url='',
                                                 content='')
     client.datasets.Insert.return_value = bigquery.Dataset(
         datasetReference=bigquery.DatasetReference(projectId='project_id',
                                                    datasetId='dataset_id'))
     wrapper = beam.io.gcp.bigquery_tools.BigQueryWrapper(client)
     new_dataset = wrapper.get_or_create_dataset('project_id', 'dataset_id')
     self.assertEqual(new_dataset.datasetReference.datasetId, 'dataset_id')
Example #2
0
 def test_file_checksum_matcher_service_error(self, mock_match):
     mock_match.side_effect = HttpError(
         response={'status': '404'},
         url='',
         content='Not Found',
     )
     matcher = verifiers.FileChecksumMatcher('gs://dummy/path', Mock())
     with self.assertRaises(HttpError):
         hc_assert_that(self._mock_result, matcher)
     self.assertTrue(mock_match.called)
     self.assertEqual(verifiers.MAX_RETRIES + 1, mock_match.call_count)
 def test_insert_error_latency_recorded(self):
     client = mock.Mock()
     client.tabledata.InsertAll.side_effect = HttpError(
         response={'status': '404'}, url='', content='')
     wrapper = beam.io.gcp.bigquery_tools.BigQueryWrapper(client)
     mock_recoder = mock.Mock()
     with self.assertRaises(HttpError):
         wrapper._insert_all_rows('',
                                  '',
                                  '', [],
                                  latency_recoder=mock_recoder)
     self.assertTrue(mock_recoder.record.called)
Example #4
0
 def Copy(self, copy_request):  # pylint: disable=invalid-name
     src_file = self.get_file(copy_request.sourceBucket,
                              copy_request.sourceObject)
     if not src_file:
         raise HttpError(httplib2.Response({'status': '404'}),
                         '404 Not Found', 'https://fake/url')
     generation = self.get_last_generation(
         copy_request.destinationBucket, copy_request.destinationObject) + 1
     dest_file = FakeFile(copy_request.destinationBucket,
                          copy_request.destinationObject, src_file.contents,
                          generation)
     self.add_file(dest_file)
Example #5
0
 def test_no_table_and_create_never(self, patched_time_sleep):
   client = mock.Mock()
   client.tables.Get.side_effect = HttpError(
       response={'status': '404'}, url='', content='')
   create_disposition = beam.io.BigQueryDisposition.CREATE_NEVER
   with self.assertRaisesRegexp(
       RuntimeError, r'Table project:dataset\.table not found but create '
                     r'disposition is CREATE_NEVER'):
     with beam.io.BigQuerySink(
         'project:dataset.table',
         create_disposition=create_disposition).writer(client):
       pass
Example #6
0
    def Get(self, get_request, download=None):  # pylint: disable=invalid-name
        f = self.get_file(get_request.bucket, get_request.object)
        if f is None:
            # Failing with an HTTP 404 if file does not exist.
            raise HttpError({'status': 404}, None, None)
        if download is None:
            if f in self._fail_when_getting_metadata:
                raise HttpError({'status': 429}, None, None)
            return f.get_metadata()
        else:
            if f in self._fail_when_reading:
                raise HttpError({'status': 429}, None, None)
            stream = download.stream

            def get_range_callback(start, end):
                if not 0 <= start <= end < len(f.contents):
                    raise ValueError('start=%d end=%d len=%s' %
                                     (start, end, len(f.contents)))
                stream.write(f.contents[start:end + 1])

            download.GetRange = get_range_callback
Example #7
0
 def test_get_or_create_table_race_condition(self):
     client = mock.Mock()
     client.tables.Insert.side_effect = HttpError(
         response={'status': '409'}, url='', content='')
     client.tables.Get.side_effect = [None, 'table_id']
     wrapper = beam.io.gcp.bigquery_tools.BigQueryWrapper(client)
     new_table = wrapper.get_or_create_table(
         'project_id', 'dataset_id', 'table_id',
         bigquery.TableSchema(fields=[
             bigquery.TableFieldSchema(
                 name='b', type='BOOLEAN', mode='REQUIRED')
         ]), False, False)
     self.assertEqual(new_table, 'table_id')
Example #8
0
 def test_no_table_and_create_if_needed_and_no_schema(
     self, patched_time_sleep):
   client = mock.Mock()
   client.tables.Get.side_effect = HttpError(
       response={'status': '404'}, url='', content='')
   create_disposition = beam.io.BigQueryDisposition.CREATE_IF_NEEDED
   with self.assertRaisesRegexp(
       RuntimeError, r'Table project:dataset\.table requires a schema\. None '
                     r'can be inferred because the table does not exist'):
     with beam.io.BigQuerySink(
         'project:dataset.table',
         create_disposition=create_disposition).writer(client):
       pass
Example #9
0
 def test_no_table_and_create_never(self, patched_time_sleep):
   client = mock.Mock()
   client.tables.Get.side_effect = HttpError(
       response={'status': '404'}, url='', content='')
   create_disposition = beam.io.BigQueryDisposition.CREATE_NEVER
   with self.assertRaises(RuntimeError) as exn:
     with beam.io.BigQuerySink(
         'project:dataset.table',
         create_disposition=create_disposition).writer(client):
       pass
   self.assertEqual(
       exn.exception.message,
       'Table project:dataset.table not found but create disposition is '
       'CREATE_NEVER.')
Example #10
0
 def test_no_table_and_create_if_needed_and_no_schema(self):
     client = mock.Mock()
     client.tables.Get.side_effect = HttpError(response={'status': '404'},
                                               url='',
                                               content='')
     create_disposition = df.io.BigQueryDisposition.CREATE_IF_NEEDED
     with self.assertRaises(RuntimeError) as exn:
         with df.io.BigQuerySink(
                 'project:dataset.table',
                 create_disposition=create_disposition).writer(client):
             pass
     self.assertEqual(
         exn.exception.message,
         'Table project:dataset.table requires a schema. None can be inferred '
         'because the table does not exist.')
Example #11
0
    def Get(self, get_request, download=None):  # pylint: disable=invalid-name
        f = self.get_file(get_request.bucket, get_request.object)
        if f is None:
            # Failing with a HTTP 404 if file does not exist.
            raise HttpError({'status': 404}, None, None)
        if download is None:
            return f.get_metadata()
        else:
            stream = download.stream

            def get_range_callback(start, end):
                assert start >= 0 and end >= start and end < len(f.contents)
                stream.write(f.contents[start:end + 1])

            download.GetRange = get_range_callback
Example #12
0
    def test_downloader_fail_to_get_project_number(self, mock_get):
        # Raising an error when listing GCS Bucket so that project number fails to
        # be retrieved.
        mock_get.side_effect = HttpError({'status': 403}, None, None)
        # Clear the process wide metric container.
        MetricsEnvironment.process_wide_container().reset()

        file_name = 'gs://gcsio-metrics-test/dummy_mode_file'
        file_size = 5 * 1024 * 1024 + 100
        random_file = self._insert_random_file(self.client, file_name,
                                               file_size)
        self.gcs.open(file_name, 'r')

        resource = resource_identifiers.GoogleCloudStorageBucket(
            random_file.bucket)
        labels = {
            monitoring_infos.SERVICE_LABEL: 'Storage',
            monitoring_infos.METHOD_LABEL: 'Objects.get',
            monitoring_infos.RESOURCE_LABEL: resource,
            monitoring_infos.GCS_BUCKET_LABEL: random_file.bucket,
            monitoring_infos.GCS_PROJECT_ID_LABEL: str(DEFAULT_PROJECT_NUMBER),
            monitoring_infos.STATUS_LABEL: 'ok'
        }

        metric_name = MetricName(None,
                                 None,
                                 urn=monitoring_infos.API_REQUEST_COUNT_URN,
                                 labels=labels)
        metric_value = MetricsEnvironment.process_wide_container().get_counter(
            metric_name).get_cumulative()

        self.assertEqual(metric_value, 0)

        labels_without_project_id = {
            monitoring_infos.SERVICE_LABEL: 'Storage',
            monitoring_infos.METHOD_LABEL: 'Objects.get',
            monitoring_infos.RESOURCE_LABEL: resource,
            monitoring_infos.GCS_BUCKET_LABEL: random_file.bucket,
            monitoring_infos.STATUS_LABEL: 'ok'
        }
        metric_name = MetricName(None,
                                 None,
                                 urn=monitoring_infos.API_REQUEST_COUNT_URN,
                                 labels=labels_without_project_id)
        metric_value = MetricsEnvironment.process_wide_container().get_counter(
            metric_name).get_cumulative()

        self.assertEqual(metric_value, 2)
    def testBucketNameTaken(self):
        self._mock_find_bucket.return_value = None
        # Inserting a bucket will fail for the first 3 times saying a bucket with
        # that name already exists.
        error409 = HttpError({'status': 409}, None, None)
        self._mock_insert_bucket.side_effect = [
            error409, error409, error409, None
        ]

        results = self.Run('compute diagnose export-logs '
                           '--zone us-west1-a instance-1')

        # The _# suffix is added after the first failure, and counts up until the
        # bucket name is unique.
        suffix = '{}_2'.format(_PROJECT_NUM)
        self.assertTrue(results['bucket'].endswith(suffix))
        self.assertEqual(4, self._mock_insert_bucket.call_count)
Example #14
0
 def test_no_table_and_create_if_needed(self):
     client = mock.Mock()
     table = bigquery.Table(tableReference=bigquery.TableReference(
         projectId='project', datasetId='dataset', tableId='table'),
                            schema=bigquery.TableSchema())
     client.tables.Get.side_effect = HttpError(response={'status': '404'},
                                               url='',
                                               content='')
     client.tables.Insert.return_value = table
     create_disposition = df.io.BigQueryDisposition.CREATE_IF_NEEDED
     with df.io.BigQuerySink(
             'project:dataset.table',
             schema='somefield:INTEGER',
             create_disposition=create_disposition).writer(client):
         pass
     self.assertTrue(client.tables.Get.called)
     self.assertTrue(client.tables.Insert.called)
Example #15
0
    def _ProbeObjectAccessWithClient(self, key, use_service_account, provider,
                                     client_email, gcs_path, generation,
                                     logger, region, billing_project):
        """Performs a head request against a signed URL to check for read access."""

        # Choose a reasonable time in the future; if the user's system clock is
        # 60 or more seconds behind the server's this will generate an error.
        signed_url = _GenSignedUrl(key=key,
                                   api=self.gsutil_api,
                                   use_service_account=use_service_account,
                                   provider=provider,
                                   client_id=client_email,
                                   method='HEAD',
                                   duration=timedelta(seconds=60),
                                   gcs_path=gcs_path,
                                   generation=generation,
                                   logger=logger,
                                   region=region,
                                   billing_project=billing_project,
                                   string_to_sign_debug=True)

        try:
            h = GetNewHttp()
            req = Request(signed_url, 'HEAD')
            response = MakeRequest(h, req)

            if response.status_code not in [200, 403, 404]:
                raise HttpError.FromResponse(response)

            return response.status_code
        except HttpError as http_error:
            if http_error.has_attr('response'):
                error_response = http_error.response
                error_string = (
                    'Unexpected HTTP response code %s while querying '
                    'object readability. Is your system clock accurate?' %
                    error_response.status_code)
                if error_response.content:
                    error_string += ' Content: %s' % error_response.content
            else:
                error_string = (
                    'Expected an HTTP response code of '
                    '200 while querying object readability, but received '
                    'an error: %s' % http_error)
            raise CommandException(error_string)
Example #16
0
  def _ProbeObjectAccessWithClient(self, key, client_email, gcs_path):
    """Performs a head request against a signed url to check for read access."""

    signed_url = _GenSignedUrl(key, client_email, 'HEAD', '', '',
                               int(time.time()) + 10, gcs_path)

    try:
      h = GetNewHttp()
      req = Request(signed_url, 'HEAD')
      response = MakeRequest(h, req)

      if response.status_code not in [200, 403, 404]:
        raise HttpError(response)

      return response.status_code
    except HttpError as e:
      raise CommandException('Unexpected response code while querying'
                             'object readability ({0})'.format(e.message))
Example #17
0
  def Rewrite(self, rewrite_request):  # pylint: disable=invalid-name
    if rewrite_request.rewriteToken == self.REWRITE_TOKEN:
      dest_object = storage.Object()
      return storage.RewriteResponse(
          done=True, objectSize=100, resource=dest_object,
          totalBytesRewritten=100)

    src_file = self.get_file(rewrite_request.sourceBucket,
                             rewrite_request.sourceObject)
    if not src_file:
      raise HttpError(
          httplib2.Response({'status': '404'}), '404 Not Found',
          'https://fake/url')
    generation = self.get_last_generation(rewrite_request.destinationBucket,
                                          rewrite_request.destinationObject) + 1
    dest_file = FakeFile(rewrite_request.destinationBucket,
                         rewrite_request.destinationObject, src_file.contents,
                         generation)
    self.add_file(dest_file)
    time.sleep(10)  # time.sleep and time.time are mocked below.
    return storage.RewriteResponse(
        done=False, objectSize=100, rewriteToken=self.REWRITE_TOKEN,
        totalBytesRewritten=5)
Example #18
0
  def _ProbeObjectAccessWithClient(self, key, client_email, gcs_path, logger):
    """Performs a head request against a signed url to check for read access."""

    # Choose a reasonable time in the future; if the user's system clock is
    # 60 or more seconds behind the server's this will generate an error.
    signed_url = _GenSignedUrl(key, client_email, 'HEAD', '', '',
                               int(time.time()) + 60, gcs_path, logger)

    try:
      h = GetNewHttp()
      req = Request(signed_url, 'HEAD')
      response = MakeRequest(h, req)

      if response.status_code not in [200, 403, 404]:
        raise HttpError.FromResponse(response)

      return response.status_code
    except HttpError:
      error_string = ('Unexpected HTTP response code %s while querying '
                      'object readability. Is your system clock accurate?'
                      % response.status_code)
      if response.content:
        error_string += ' Content: %s' % response.content
      raise CommandException(error_string)
Example #19
0
 def Get(self, path):
     if path == 'test-bucket-not-found':
         raise HttpNotFoundError({'status': 404}, {}, '')
     elif path == 'test-bucket-not-verified':
         raise HttpError({'status': 400}, {}, '')
Example #20
0
 def http_error(self, code):
     if HttpError is None:
         raise RuntimeError(
             "This is not a valid test as GCP is not enabled")
     raise HttpError({'status': str(code)}, '', '')
Example #21
0
 def http_error(self, code):
     raise HttpError({'status': str(code)}, '', '')