def test__begin_explicit(self): from google.cloud.bigquery.retry import DEFAULT_RETRY other_project = "other-project-234" resource = { "jobReference": { "jobId": self.JOB_ID, "projectId": self.PROJECT, "location": None, }, "configuration": { "test": True }, } job = self._set_properties_job() builder = job.to_api_repr = mock.Mock() builder.return_value = resource client = _make_client(project=other_project) call_api = client._call_api = mock.Mock() call_api.return_value = resource retry = DEFAULT_RETRY.with_deadline(1) path = "/projects/{}/jobs".format(self.PROJECT) job._begin(client=client, retry=retry, timeout=7.5) call_api.assert_called_once_with( retry, span_name="BigQuery.job.begin", span_attributes={"path": path}, job_ref=job, method="POST", path=path, data=resource, timeout=7.5, ) self.assertEqual(job._properties, resource)
def test__begin_defaults(self): from google.cloud.bigquery.retry import DEFAULT_RETRY resource = { "jobReference": { "jobId": self.JOB_ID, "projectId": self.PROJECT, "location": None, }, "configuration": { "test": True }, } job = self._set_properties_job() builder = job.to_api_repr = mock.Mock() builder.return_value = resource call_api = job._client._call_api = mock.Mock() call_api.return_value = resource path = "/projects/{}/jobs".format(self.PROJECT) job._begin() call_api.assert_called_once_with( DEFAULT_RETRY, span_name="BigQuery.job.begin", span_attributes={"path": path}, job_ref=job, method="POST", path=path, data=resource, timeout=None, ) self.assertEqual(job._properties, resource)
def test_begin_w_already_running(self): conn = _make_connection() client = _make_client(project=self.PROJECT, connection=conn) job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client) job._properties["status"] = {"state": "RUNNING"} with self.assertRaises(ValueError): job._begin()
def test_begin_w_alternate_client(self): from google.cloud.bigquery.dataset import DatasetReference from google.cloud.bigquery.job import Compression from google.cloud.bigquery.job import DestinationFormat from google.cloud.bigquery.job import ExtractJobConfig PATH = "/projects/%s/jobs" % (self.PROJECT, ) RESOURCE = self._make_resource(ended=True) EXTRACT_CONFIGURATION = { "sourceTable": { "projectId": self.PROJECT, "datasetId": self.DS_ID, "tableId": self.SOURCE_TABLE, }, "destinationUris": [self.DESTINATION_URI], "compression": Compression.GZIP, "destinationFormat": DestinationFormat.NEWLINE_DELIMITED_JSON, "fieldDelimiter": "|", "printHeader": False, } RESOURCE["configuration"]["extract"] = EXTRACT_CONFIGURATION conn1 = _make_connection() client1 = _make_client(project=self.PROJECT, connection=conn1) conn2 = _make_connection(RESOURCE) client2 = _make_client(project=self.PROJECT, connection=conn2) source_dataset = DatasetReference(self.PROJECT, self.DS_ID) source = source_dataset.table(self.SOURCE_TABLE) config = ExtractJobConfig() config.compression = Compression.GZIP config.destination_format = DestinationFormat.NEWLINE_DELIMITED_JSON config.field_delimiter = "|" config.print_header = False job = self._make_one(self.JOB_ID, source, [self.DESTINATION_URI], client1, config) with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: job._begin(client=client2) final_attributes.assert_called_with({"path": PATH}, client2, job) conn1.api_request.assert_not_called() conn2.api_request.assert_called_once_with( method="POST", path=PATH, data={ "jobReference": { "projectId": self.PROJECT, "jobId": self.JOB_ID }, "configuration": { "extract": EXTRACT_CONFIGURATION }, }, timeout=None, ) self._verifyResourceProperties(job, RESOURCE)
def test_begin_w_autodetect(self): from google.cloud.bigquery.job import LoadJobConfig path = "/projects/{}/jobs".format(self.PROJECT) resource = self._make_resource() resource["configuration"]["load"]["autodetect"] = True # Ensure None for missing server-set props del resource["statistics"]["creationTime"] del resource["etag"] del resource["selfLink"] del resource["user_email"] conn = _make_connection(resource) client = _make_client(project=self.PROJECT, connection=conn) config = LoadJobConfig() config.autodetect = True job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client, config) with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: job._begin() final_attributes.assert_called_with({"path": path}, client, job) sent = { "jobReference": { "projectId": self.PROJECT, "jobId": self.JOB_ID }, "configuration": { "load": { "sourceUris": [self.SOURCE1], "destinationTable": { "projectId": self.PROJECT, "datasetId": self.DS_ID, "tableId": self.TABLE_ID, }, "autodetect": True, } }, } conn.api_request.assert_called_once_with(method="POST", path=path, data=sent, timeout=None) self._verifyResourceProperties(job, resource)
def test_begin_w_bound_client(self): from google.cloud.bigquery.dataset import DatasetReference PATH = "/projects/%s/jobs" % (self.PROJECT, ) RESOURCE = self._make_resource() # Ensure None for missing server-set props del RESOURCE["statistics"]["creationTime"] del RESOURCE["etag"] del RESOURCE["selfLink"] del RESOURCE["user_email"] conn = _make_connection(RESOURCE) client = _make_client(project=self.PROJECT, connection=conn) source_dataset = DatasetReference(self.PROJECT, self.DS_ID) source = source_dataset.table(self.SOURCE_TABLE) job = self._make_one(self.JOB_ID, source, [self.DESTINATION_URI], client) with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: job._begin() final_attributes.assert_called_with({"path": PATH}, client, job) conn.api_request.assert_called_once_with( method="POST", path=PATH, data={ "jobReference": { "projectId": self.PROJECT, "jobId": self.JOB_ID }, "configuration": { "extract": { "sourceTable": { "projectId": self.PROJECT, "datasetId": self.DS_ID, "tableId": self.SOURCE_TABLE, }, "destinationUris": [self.DESTINATION_URI], } }, }, timeout=None, ) self._verifyResourceProperties(job, RESOURCE)
def test_begin_w_bound_client(self): RESOURCE = self._make_resource() # Ensure None for missing server-set props del RESOURCE["statistics"]["creationTime"] del RESOURCE["etag"] del RESOURCE["selfLink"] del RESOURCE["user_email"] conn = _make_connection(RESOURCE) client = _make_client(project=self.PROJECT, connection=conn) job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client) path = "/projects/{}/jobs".format(self.PROJECT) with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: job._begin() final_attributes.assert_called_with({"path": path}, client, job) conn.api_request.assert_called_once_with( method="POST", path=path, data={ "jobReference": { "projectId": self.PROJECT, "jobId": self.JOB_ID }, "configuration": { "load": { "sourceUris": [self.SOURCE1], "destinationTable": { "projectId": self.PROJECT, "datasetId": self.DS_ID, "tableId": self.TABLE_ID, }, } }, }, timeout=None, ) self._verifyResourceProperties(job, RESOURCE)
def test_begin_w_alternate_client(self): from google.cloud.bigquery.job import CreateDisposition from google.cloud.bigquery.job import LoadJobConfig from google.cloud.bigquery.job import SchemaUpdateOption from google.cloud.bigquery.job import WriteDisposition from google.cloud.bigquery.schema import SchemaField PATH = "/projects/%s/jobs" % (self.PROJECT, ) RESOURCE = self._make_resource(ended=True) LOAD_CONFIGURATION = { "sourceUris": [self.SOURCE1], "destinationTable": { "projectId": self.PROJECT, "datasetId": self.DS_ID, "tableId": self.TABLE_ID, }, "allowJaggedRows": True, "allowQuotedNewlines": True, "createDisposition": CreateDisposition.CREATE_NEVER, "encoding": "ISO-8559-1", "fieldDelimiter": "|", "ignoreUnknownValues": True, "maxBadRecords": 100, "nullMarker": r"\N", "quote": "'", "skipLeadingRows": "1", "sourceFormat": "CSV", "useAvroLogicalTypes": True, "writeDisposition": WriteDisposition.WRITE_TRUNCATE, "schema": { "fields": [ { "name": "full_name", "type": "STRING", "mode": "REQUIRED", "description": None, }, { "name": "age", "type": "INTEGER", "mode": "REQUIRED", "description": None, }, ] }, "schemaUpdateOptions": [SchemaUpdateOption.ALLOW_FIELD_ADDITION], } RESOURCE["configuration"]["load"] = LOAD_CONFIGURATION conn1 = _make_connection() client1 = _make_client(project=self.PROJECT, connection=conn1) conn2 = _make_connection(RESOURCE) client2 = _make_client(project=self.PROJECT, connection=conn2) full_name = SchemaField("full_name", "STRING", mode="REQUIRED") age = SchemaField("age", "INTEGER", mode="REQUIRED") config = LoadJobConfig() config.schema = [full_name, age] job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client1, config) config.allow_jagged_rows = True config.allow_quoted_newlines = True config.create_disposition = CreateDisposition.CREATE_NEVER config.encoding = "ISO-8559-1" config.field_delimiter = "|" config.ignore_unknown_values = True config.max_bad_records = 100 config.null_marker = r"\N" config.quote_character = "'" config.skip_leading_rows = 1 config.source_format = "CSV" config.use_avro_logical_types = True config.write_disposition = WriteDisposition.WRITE_TRUNCATE config.schema_update_options = [ SchemaUpdateOption.ALLOW_FIELD_ADDITION ] with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: job._begin(client=client2) final_attributes.assert_called_with({"path": PATH}, client2, job) conn1.api_request.assert_not_called() self.assertEqual(len(conn2.api_request.call_args_list), 1) req = conn2.api_request.call_args_list[0] self.assertEqual(req[1]["method"], "POST") self.assertEqual(req[1]["path"], PATH) SENT = { "jobReference": { "projectId": self.PROJECT, "jobId": self.JOB_ID }, "configuration": { "load": LOAD_CONFIGURATION }, } self.maxDiff = None self.assertEqual(req[1]["data"], SENT) self._verifyResourceProperties(job, RESOURCE)
def test__begin_already(self): job = self._set_properties_job() job._properties["status"] = {"state": "WHATEVER"} with self.assertRaises(ValueError): job._begin()