def test_get_or_create_dataset_created(self): client = mock.Mock() client.datasets.Get.side_effect = HttpError(response={'status': '404'}, url='', content='') client.datasets.Insert.return_value = bigquery.Dataset( datasetReference=bigquery.DatasetReference(projectId='project_id', datasetId='dataset_id')) wrapper = beam.io.gcp.bigquery_tools.BigQueryWrapper(client) new_dataset = wrapper.get_or_create_dataset('project_id', 'dataset_id') self.assertEqual(new_dataset.datasetReference.datasetId, 'dataset_id')
def test_file_checksum_matcher_service_error(self, mock_match): mock_match.side_effect = HttpError( response={'status': '404'}, url='', content='Not Found', ) matcher = verifiers.FileChecksumMatcher('gs://dummy/path', Mock()) with self.assertRaises(HttpError): hc_assert_that(self._mock_result, matcher) self.assertTrue(mock_match.called) self.assertEqual(verifiers.MAX_RETRIES + 1, mock_match.call_count)
def test_insert_error_latency_recorded(self): client = mock.Mock() client.tabledata.InsertAll.side_effect = HttpError( response={'status': '404'}, url='', content='') wrapper = beam.io.gcp.bigquery_tools.BigQueryWrapper(client) mock_recoder = mock.Mock() with self.assertRaises(HttpError): wrapper._insert_all_rows('', '', '', [], latency_recoder=mock_recoder) self.assertTrue(mock_recoder.record.called)
def Copy(self, copy_request): # pylint: disable=invalid-name src_file = self.get_file(copy_request.sourceBucket, copy_request.sourceObject) if not src_file: raise HttpError(httplib2.Response({'status': '404'}), '404 Not Found', 'https://fake/url') generation = self.get_last_generation( copy_request.destinationBucket, copy_request.destinationObject) + 1 dest_file = FakeFile(copy_request.destinationBucket, copy_request.destinationObject, src_file.contents, generation) self.add_file(dest_file)
def test_no_table_and_create_never(self, patched_time_sleep): client = mock.Mock() client.tables.Get.side_effect = HttpError( response={'status': '404'}, url='', content='') create_disposition = beam.io.BigQueryDisposition.CREATE_NEVER with self.assertRaisesRegexp( RuntimeError, r'Table project:dataset\.table not found but create ' r'disposition is CREATE_NEVER'): with beam.io.BigQuerySink( 'project:dataset.table', create_disposition=create_disposition).writer(client): pass
def Get(self, get_request, download=None): # pylint: disable=invalid-name f = self.get_file(get_request.bucket, get_request.object) if f is None: # Failing with an HTTP 404 if file does not exist. raise HttpError({'status': 404}, None, None) if download is None: if f in self._fail_when_getting_metadata: raise HttpError({'status': 429}, None, None) return f.get_metadata() else: if f in self._fail_when_reading: raise HttpError({'status': 429}, None, None) stream = download.stream def get_range_callback(start, end): if not 0 <= start <= end < len(f.contents): raise ValueError('start=%d end=%d len=%s' % (start, end, len(f.contents))) stream.write(f.contents[start:end + 1]) download.GetRange = get_range_callback
def test_get_or_create_table_race_condition(self): client = mock.Mock() client.tables.Insert.side_effect = HttpError( response={'status': '409'}, url='', content='') client.tables.Get.side_effect = [None, 'table_id'] wrapper = beam.io.gcp.bigquery_tools.BigQueryWrapper(client) new_table = wrapper.get_or_create_table( 'project_id', 'dataset_id', 'table_id', bigquery.TableSchema(fields=[ bigquery.TableFieldSchema( name='b', type='BOOLEAN', mode='REQUIRED') ]), False, False) self.assertEqual(new_table, 'table_id')
def test_no_table_and_create_if_needed_and_no_schema( self, patched_time_sleep): client = mock.Mock() client.tables.Get.side_effect = HttpError( response={'status': '404'}, url='', content='') create_disposition = beam.io.BigQueryDisposition.CREATE_IF_NEEDED with self.assertRaisesRegexp( RuntimeError, r'Table project:dataset\.table requires a schema\. None ' r'can be inferred because the table does not exist'): with beam.io.BigQuerySink( 'project:dataset.table', create_disposition=create_disposition).writer(client): pass
def test_no_table_and_create_never(self, patched_time_sleep): client = mock.Mock() client.tables.Get.side_effect = HttpError( response={'status': '404'}, url='', content='') create_disposition = beam.io.BigQueryDisposition.CREATE_NEVER with self.assertRaises(RuntimeError) as exn: with beam.io.BigQuerySink( 'project:dataset.table', create_disposition=create_disposition).writer(client): pass self.assertEqual( exn.exception.message, 'Table project:dataset.table not found but create disposition is ' 'CREATE_NEVER.')
def test_no_table_and_create_if_needed_and_no_schema(self): client = mock.Mock() client.tables.Get.side_effect = HttpError(response={'status': '404'}, url='', content='') create_disposition = df.io.BigQueryDisposition.CREATE_IF_NEEDED with self.assertRaises(RuntimeError) as exn: with df.io.BigQuerySink( 'project:dataset.table', create_disposition=create_disposition).writer(client): pass self.assertEqual( exn.exception.message, 'Table project:dataset.table requires a schema. None can be inferred ' 'because the table does not exist.')
def Get(self, get_request, download=None): # pylint: disable=invalid-name f = self.get_file(get_request.bucket, get_request.object) if f is None: # Failing with a HTTP 404 if file does not exist. raise HttpError({'status': 404}, None, None) if download is None: return f.get_metadata() else: stream = download.stream def get_range_callback(start, end): assert start >= 0 and end >= start and end < len(f.contents) stream.write(f.contents[start:end + 1]) download.GetRange = get_range_callback
def test_downloader_fail_to_get_project_number(self, mock_get): # Raising an error when listing GCS Bucket so that project number fails to # be retrieved. mock_get.side_effect = HttpError({'status': 403}, None, None) # Clear the process wide metric container. MetricsEnvironment.process_wide_container().reset() file_name = 'gs://gcsio-metrics-test/dummy_mode_file' file_size = 5 * 1024 * 1024 + 100 random_file = self._insert_random_file(self.client, file_name, file_size) self.gcs.open(file_name, 'r') resource = resource_identifiers.GoogleCloudStorageBucket( random_file.bucket) labels = { monitoring_infos.SERVICE_LABEL: 'Storage', monitoring_infos.METHOD_LABEL: 'Objects.get', monitoring_infos.RESOURCE_LABEL: resource, monitoring_infos.GCS_BUCKET_LABEL: random_file.bucket, monitoring_infos.GCS_PROJECT_ID_LABEL: str(DEFAULT_PROJECT_NUMBER), monitoring_infos.STATUS_LABEL: 'ok' } metric_name = MetricName(None, None, urn=monitoring_infos.API_REQUEST_COUNT_URN, labels=labels) metric_value = MetricsEnvironment.process_wide_container().get_counter( metric_name).get_cumulative() self.assertEqual(metric_value, 0) labels_without_project_id = { monitoring_infos.SERVICE_LABEL: 'Storage', monitoring_infos.METHOD_LABEL: 'Objects.get', monitoring_infos.RESOURCE_LABEL: resource, monitoring_infos.GCS_BUCKET_LABEL: random_file.bucket, monitoring_infos.STATUS_LABEL: 'ok' } metric_name = MetricName(None, None, urn=monitoring_infos.API_REQUEST_COUNT_URN, labels=labels_without_project_id) metric_value = MetricsEnvironment.process_wide_container().get_counter( metric_name).get_cumulative() self.assertEqual(metric_value, 2)
def testBucketNameTaken(self): self._mock_find_bucket.return_value = None # Inserting a bucket will fail for the first 3 times saying a bucket with # that name already exists. error409 = HttpError({'status': 409}, None, None) self._mock_insert_bucket.side_effect = [ error409, error409, error409, None ] results = self.Run('compute diagnose export-logs ' '--zone us-west1-a instance-1') # The _# suffix is added after the first failure, and counts up until the # bucket name is unique. suffix = '{}_2'.format(_PROJECT_NUM) self.assertTrue(results['bucket'].endswith(suffix)) self.assertEqual(4, self._mock_insert_bucket.call_count)
def test_no_table_and_create_if_needed(self): client = mock.Mock() table = bigquery.Table(tableReference=bigquery.TableReference( projectId='project', datasetId='dataset', tableId='table'), schema=bigquery.TableSchema()) client.tables.Get.side_effect = HttpError(response={'status': '404'}, url='', content='') client.tables.Insert.return_value = table create_disposition = df.io.BigQueryDisposition.CREATE_IF_NEEDED with df.io.BigQuerySink( 'project:dataset.table', schema='somefield:INTEGER', create_disposition=create_disposition).writer(client): pass self.assertTrue(client.tables.Get.called) self.assertTrue(client.tables.Insert.called)
def _ProbeObjectAccessWithClient(self, key, use_service_account, provider, client_email, gcs_path, generation, logger, region, billing_project): """Performs a head request against a signed URL to check for read access.""" # Choose a reasonable time in the future; if the user's system clock is # 60 or more seconds behind the server's this will generate an error. signed_url = _GenSignedUrl(key=key, api=self.gsutil_api, use_service_account=use_service_account, provider=provider, client_id=client_email, method='HEAD', duration=timedelta(seconds=60), gcs_path=gcs_path, generation=generation, logger=logger, region=region, billing_project=billing_project, string_to_sign_debug=True) try: h = GetNewHttp() req = Request(signed_url, 'HEAD') response = MakeRequest(h, req) if response.status_code not in [200, 403, 404]: raise HttpError.FromResponse(response) return response.status_code except HttpError as http_error: if http_error.has_attr('response'): error_response = http_error.response error_string = ( 'Unexpected HTTP response code %s while querying ' 'object readability. Is your system clock accurate?' % error_response.status_code) if error_response.content: error_string += ' Content: %s' % error_response.content else: error_string = ( 'Expected an HTTP response code of ' '200 while querying object readability, but received ' 'an error: %s' % http_error) raise CommandException(error_string)
def _ProbeObjectAccessWithClient(self, key, client_email, gcs_path): """Performs a head request against a signed url to check for read access.""" signed_url = _GenSignedUrl(key, client_email, 'HEAD', '', '', int(time.time()) + 10, gcs_path) try: h = GetNewHttp() req = Request(signed_url, 'HEAD') response = MakeRequest(h, req) if response.status_code not in [200, 403, 404]: raise HttpError(response) return response.status_code except HttpError as e: raise CommandException('Unexpected response code while querying' 'object readability ({0})'.format(e.message))
def Rewrite(self, rewrite_request): # pylint: disable=invalid-name if rewrite_request.rewriteToken == self.REWRITE_TOKEN: dest_object = storage.Object() return storage.RewriteResponse( done=True, objectSize=100, resource=dest_object, totalBytesRewritten=100) src_file = self.get_file(rewrite_request.sourceBucket, rewrite_request.sourceObject) if not src_file: raise HttpError( httplib2.Response({'status': '404'}), '404 Not Found', 'https://fake/url') generation = self.get_last_generation(rewrite_request.destinationBucket, rewrite_request.destinationObject) + 1 dest_file = FakeFile(rewrite_request.destinationBucket, rewrite_request.destinationObject, src_file.contents, generation) self.add_file(dest_file) time.sleep(10) # time.sleep and time.time are mocked below. return storage.RewriteResponse( done=False, objectSize=100, rewriteToken=self.REWRITE_TOKEN, totalBytesRewritten=5)
def _ProbeObjectAccessWithClient(self, key, client_email, gcs_path, logger): """Performs a head request against a signed url to check for read access.""" # Choose a reasonable time in the future; if the user's system clock is # 60 or more seconds behind the server's this will generate an error. signed_url = _GenSignedUrl(key, client_email, 'HEAD', '', '', int(time.time()) + 60, gcs_path, logger) try: h = GetNewHttp() req = Request(signed_url, 'HEAD') response = MakeRequest(h, req) if response.status_code not in [200, 403, 404]: raise HttpError.FromResponse(response) return response.status_code except HttpError: error_string = ('Unexpected HTTP response code %s while querying ' 'object readability. Is your system clock accurate?' % response.status_code) if response.content: error_string += ' Content: %s' % response.content raise CommandException(error_string)
def Get(self, path): if path == 'test-bucket-not-found': raise HttpNotFoundError({'status': 404}, {}, '') elif path == 'test-bucket-not-verified': raise HttpError({'status': 400}, {}, '')
def http_error(self, code): if HttpError is None: raise RuntimeError( "This is not a valid test as GCP is not enabled") raise HttpError({'status': str(code)}, '', '')
def http_error(self, code): raise HttpError({'status': str(code)}, '', '')