def test_delete_non_existing_table(self): client = mock.Mock() client.tables.Delete.side_effect = HttpError( response={'status': '404'}, url='', content='') wrapper = beam.io.bigquery.BigQueryWrapper(client) wrapper._delete_table('', '', '') self.assertTrue(client.tables.Delete.called)
def Rewrite(self, rewrite_request): # pylint: disable=invalid-name if rewrite_request.rewriteToken == self.REWRITE_TOKEN: dest_object = storage.Object() return storage.RewriteResponse(done=True, objectSize=100, resource=dest_object, totalBytesRewritten=100) src_file = self.get_file(rewrite_request.sourceBucket, rewrite_request.sourceObject) if not src_file: raise HttpError(httplib2.Response({'status': '404'}), '404 Not Found', 'https://fake/url') generation = self.get_last_generation( rewrite_request.destinationBucket, rewrite_request.destinationObject) + 1 dest_file = FakeFile(rewrite_request.destinationBucket, rewrite_request.destinationObject, src_file.contents, generation) self.add_file(dest_file) time.sleep(10) # time.sleep and time.time are mocked below. return storage.RewriteResponse(done=False, objectSize=100, rewriteToken=self.REWRITE_TOKEN, totalBytesRewritten=5)
def test_dofn_client_start_bundle_create_called(self): client = mock.Mock() client.tables.Get.side_effect = HttpError( response={'status': 404}, content=None, url=None) client.tables.Insert.return_value = bigquery.Table( tableReference=bigquery.TableReference( projectId='project_id', datasetId='dataset_id', tableId='table_id')) create_disposition = beam.io.BigQueryDisposition.CREATE_IF_NEEDED write_disposition = beam.io.BigQueryDisposition.WRITE_APPEND schema = {'fields': [ {'name': 'month', 'type': 'INTEGER', 'mode': 'NULLABLE'}]} fn = beam.io.gcp.bigquery.BigQueryWriteFn( table_id='table_id', dataset_id='dataset_id', project_id='project_id', batch_size=2, schema=schema, create_disposition=create_disposition, write_disposition=write_disposition, kms_key='kms_key', test_client=client) fn.start_bundle() self.assertTrue(client.tables.Get.called) self.assertTrue(client.tables.Insert.called)
def test_delete_non_existing_dataset(self): client = mock.Mock() client.datasets.Delete.side_effect = HttpError( response={'status': '404'}, url='', content='') wrapper = beam.io.google_cloud_platform.bigquery.BigQueryWrapper(client) wrapper._delete_dataset('', '') self.assertTrue(client.datasets.Delete.called)
def Delete(self, delete_request): # pylint: disable=invalid-name # Here, we emulate the behavior of the GCS service in raising a 404 error # if this object already exists. if self.get_file(delete_request.bucket, delete_request.object): self.delete_file(delete_request.bucket, delete_request.object) else: raise HttpError(httplib2.Response({'status': '404'}), '404 Not Found', 'https://fake/url')
def test_delete_table_retries_for_timeouts(self, patched_time_sleep): client = mock.Mock() client.tables.Delete.side_effect = [ HttpError(response={'status': '408'}, url='', content=''), bigquery.BigqueryTablesDeleteResponse() ] wrapper = beam.io.bigquery.BigQueryWrapper(client) wrapper._delete_table('', '', '') self.assertTrue(client.tables.Delete.called)
def test_get_or_create_dataset_created(self): client = mock.Mock() client.datasets.Get.side_effect = HttpError( response={'status': '404'}, url='', content='') client.datasets.Insert.return_value = bigquery.Dataset( datasetReference=bigquery.DatasetReference( projectId='project_id', datasetId='dataset_id')) wrapper = beam.io.gcp.bigquery_tools.BigQueryWrapper(client) new_dataset = wrapper.get_or_create_dataset('project_id', 'dataset_id') self.assertEqual(new_dataset.datasetReference.datasetId, 'dataset_id')
def test_delete_dataset_retries_for_timeouts(self, patched_time_sleep): client = mock.Mock() client.datasets.Delete.side_effect = [ HttpError( response={'status': '408'}, url='', content=''), bigquery.BigqueryDatasetsDeleteResponse() ] wrapper = beam.io.google_cloud_platform.bigquery.BigQueryWrapper(client) wrapper._delete_dataset('', '') self.assertTrue(client.datasets.Delete.called)
def test_exists_failure(self, mock_get): # Raising an error other than 404. Raising 404 is a valid failure for # exists() call. mock_get.side_effect = HttpError({'status': 400}, None, None) file_name = 'gs://gcsio-test/dummy_file' file_size = 1234 self._insert_random_file(self.client, file_name, file_size) with self.assertRaises(HttpError) as cm: self.gcs.exists(file_name) self.assertEquals(400, cm.exception.status_code)
def test_file_checksum_matcher_service_error(self, mock_match): mock_match.side_effect = HttpError( response={'status': '404'}, url='', content='Not Found', ) matcher = verifiers.FileChecksumMatcher('gs://dummy/path', Mock()) with self.assertRaises(HttpError): hc_assert_that(self._mock_result, matcher) self.assertTrue(mock_match.called) self.assertEqual(verifiers.MAX_RETRIES + 1, mock_match.call_count)
def test_insert_error_latency_recorded(self): client = mock.Mock() client.tabledata.InsertAll.side_effect = HttpError( response={'status': '404'}, url='', content='') wrapper = beam.io.gcp.bigquery_tools.BigQueryWrapper(client) mock_recoder = mock.Mock() with self.assertRaises(HttpError): wrapper._insert_all_rows('', '', '', [], latency_recoder=mock_recoder) self.assertTrue(mock_recoder.record.called)
def Get(self, get_request, download=None): # pylint: disable=invalid-name f = self.get_file(get_request.bucket, get_request.object) if f is None: # Failing with an HTTP 404 if file does not exist. raise HttpError({'status': 404}, None, None) if download is None: if f in self._fail_when_getting_metadata: raise HttpError({'status': 429}, None, None) return f.get_metadata() else: if f in self._fail_when_reading: raise HttpError({'status': 429}, None, None) stream = download.stream def get_range_callback(start, end): if not 0 <= start <= end < len(f.contents): raise ValueError('start=%d end=%d len=%s' % (start, end, len(f.contents))) stream.write(f.contents[start:end + 1]) download.GetRange = get_range_callback
def test_no_table_and_create_never(self, patched_time_sleep): client = mock.Mock() client.tables.Get.side_effect = HttpError( response={'status': '404'}, url='', content='') create_disposition = beam.io.BigQueryDisposition.CREATE_NEVER with self.assertRaisesRegexp( RuntimeError, r'Table project:dataset\.table not found but create ' r'disposition is CREATE_NEVER'): with beam.io.BigQuerySink( 'project:dataset.table', create_disposition=create_disposition).writer(client): pass
def Copy(self, copy_request): # pylint: disable=invalid-name src_file = self.get_file(copy_request.sourceBucket, copy_request.sourceObject) if not src_file: raise HttpError(httplib2.Response({'status': '404'}), '404 Not Found', 'https://fake/url') generation = self.get_last_generation( copy_request.destinationBucket, copy_request.destinationObject) + 1 dest_file = FakeFile(copy_request.destinationBucket, copy_request.destinationObject, src_file.contents, generation) self.add_file(dest_file)
def test_get_or_create_table_race_condition(self): client = mock.Mock() client.tables.Insert.side_effect = HttpError( response={'status': '409'}, url='', content='') client.tables.Get.side_effect = [None, 'table_id'] wrapper = beam.io.gcp.bigquery_tools.BigQueryWrapper(client) new_table = wrapper.get_or_create_table( 'project_id', 'dataset_id', 'table_id', bigquery.TableSchema(fields=[ bigquery.TableFieldSchema( name='b', type='BOOLEAN', mode='REQUIRED') ]), False, False) self.assertEqual(new_table, 'table_id')
def test_no_table_and_create_if_needed_and_no_schema( self, patched_time_sleep): client = mock.Mock() client.tables.Get.side_effect = HttpError( response={'status': '404'}, url='', content='') create_disposition = beam.io.BigQueryDisposition.CREATE_IF_NEEDED with self.assertRaisesRegexp( RuntimeError, r'Table project:dataset\.table requires a schema\. None ' r'can be inferred because the table does not exist'): with beam.io.BigQuerySink( 'project:dataset.table', create_disposition=create_disposition).writer(client): pass
def test_no_table_and_create_never(self, patched_time_sleep): client = mock.Mock() client.tables.Get.side_effect = HttpError( response={'status': '404'}, url='', content='') create_disposition = beam.io.BigQueryDisposition.CREATE_NEVER with self.assertRaises(RuntimeError) as exn: with beam.io.BigQuerySink( 'project:dataset.table', create_disposition=create_disposition).writer(client): pass self.assertEqual( exn.exception.message, 'Table project:dataset.table not found but create disposition is ' 'CREATE_NEVER.')
def Get(self, get_request, download=None): # pylint: disable=invalid-name f = self.get_file(get_request.bucket, get_request.object) if f is None: # Failing with a HTTP 404 if file does not exist. raise HttpError({'status': 404}, None, None) if download is None: return f.get_metadata() else: stream = download.stream def get_range_callback(start, end): assert start >= 0 and end >= start and end < len(f.contents) stream.write(f.contents[start:end + 1]) download.GetRange = get_range_callback
def test_no_table_and_create_if_needed_and_no_schema(self): client = mock.Mock() client.tables.Get.side_effect = HttpError(response={'status': '404'}, url='', content='') create_disposition = df.io.BigQueryDisposition.CREATE_IF_NEEDED with self.assertRaises(RuntimeError) as exn: with df.io.BigQuerySink( 'project:dataset.table', create_disposition=create_disposition).writer(client): pass self.assertEqual( exn.exception.message, 'Table project:dataset.table requires a schema. None can be inferred ' 'because the table does not exist.')
def test_downloader_fail_to_get_project_number(self, mock_get): # Raising an error when listing GCS Bucket so that project number fails to # be retrieved. mock_get.side_effect = HttpError({'status': 403}, None, None) # Clear the process wide metric container. MetricsEnvironment.process_wide_container().reset() file_name = 'gs://gcsio-metrics-test/dummy_mode_file' file_size = 5 * 1024 * 1024 + 100 random_file = self._insert_random_file(self.client, file_name, file_size) self.gcs.open(file_name, 'r') resource = resource_identifiers.GoogleCloudStorageBucket( random_file.bucket) labels = { monitoring_infos.SERVICE_LABEL: 'Storage', monitoring_infos.METHOD_LABEL: 'Objects.get', monitoring_infos.RESOURCE_LABEL: resource, monitoring_infos.GCS_BUCKET_LABEL: random_file.bucket, monitoring_infos.GCS_PROJECT_ID_LABEL: str(DEFAULT_PROJECT_NUMBER), monitoring_infos.STATUS_LABEL: 'ok' } metric_name = MetricName(None, None, urn=monitoring_infos.API_REQUEST_COUNT_URN, labels=labels) metric_value = MetricsEnvironment.process_wide_container().get_counter( metric_name).get_cumulative() self.assertEqual(metric_value, 0) labels_without_project_id = { monitoring_infos.SERVICE_LABEL: 'Storage', monitoring_infos.METHOD_LABEL: 'Objects.get', monitoring_infos.RESOURCE_LABEL: resource, monitoring_infos.GCS_BUCKET_LABEL: random_file.bucket, monitoring_infos.STATUS_LABEL: 'ok' } metric_name = MetricName(None, None, urn=monitoring_infos.API_REQUEST_COUNT_URN, labels=labels_without_project_id) metric_value = MetricsEnvironment.process_wide_container().get_counter( metric_name).get_cumulative() self.assertEqual(metric_value, 2)
def test_no_table_and_create_if_needed(self): client = mock.Mock() table = bigquery.Table(tableReference=bigquery.TableReference( projectId='project', datasetId='dataset', tableId='table'), schema=bigquery.TableSchema()) client.tables.Get.side_effect = HttpError(response={'status': '404'}, url='', content='') client.tables.Insert.return_value = table create_disposition = df.io.BigQueryDisposition.CREATE_IF_NEEDED with df.io.BigQuerySink( 'project:dataset.table', schema='somefield:INTEGER', create_disposition=create_disposition).writer(client): pass self.assertTrue(client.tables.Get.called) self.assertTrue(client.tables.Insert.called)
def testBucketNameTaken(self): self._mock_find_bucket.return_value = None # Inserting a bucket will fail for the first 3 times saying a bucket with # that name already exists. error409 = HttpError({'status': 409}, None, None) self._mock_insert_bucket.side_effect = [ error409, error409, error409, None ] results = self.Run('compute diagnose export-logs ' '--zone us-west1-a instance-1') # The _# suffix is added after the first failure, and counts up until the # bucket name is unique. suffix = '{}_2'.format(_PROJECT_NUM) self.assertTrue(results['bucket'].endswith(suffix)) self.assertEqual(4, self._mock_insert_bucket.call_count)
def _ProbeObjectAccessWithClient(self, key, client_email, gcs_path): """Performs a head request against a signed url to check for read access.""" signed_url = _GenSignedUrl(key, client_email, 'HEAD', '', '', int(time.time()) + 10, gcs_path) try: h = GetNewHttp() req = Request(signed_url, 'HEAD') response = MakeRequest(h, req) if response.status_code not in [200, 403, 404]: raise HttpError(response) return response.status_code except HttpError as e: raise CommandException('Unexpected response code while querying' 'object readability ({0})'.format(e.message))
def Get(self, path): if path == 'test-bucket-not-found': raise HttpNotFoundError({'status': 404}, {}, '') elif path == 'test-bucket-not-verified': raise HttpError({'status': 400}, {}, '')
def http_error(self, code): if HttpError is None: raise RuntimeError( "This is not a valid test as GCP is not enabled") raise HttpError({'status': str(code)}, '', '')
def http_error(self, code): raise HttpError({'status': str(code)}, '', '')