Beispiel #1
0
    def test__begin_explicit(self):
        from google.cloud.bigquery.retry import DEFAULT_RETRY

        other_project = "other-project-234"
        resource = {
            "jobReference": {
                "jobId": self.JOB_ID,
                "projectId": self.PROJECT,
                "location": None,
            },
            "configuration": {
                "test": True
            },
        }
        job = self._set_properties_job()
        builder = job.to_api_repr = mock.Mock()
        builder.return_value = resource
        client = _make_client(project=other_project)
        call_api = client._call_api = mock.Mock()
        call_api.return_value = resource
        retry = DEFAULT_RETRY.with_deadline(1)
        path = "/projects/{}/jobs".format(self.PROJECT)
        job._begin(client=client, retry=retry, timeout=7.5)

        call_api.assert_called_once_with(
            retry,
            span_name="BigQuery.job.begin",
            span_attributes={"path": path},
            job_ref=job,
            method="POST",
            path=path,
            data=resource,
            timeout=7.5,
        )
        self.assertEqual(job._properties, resource)
Beispiel #2
0
    def test__begin_defaults(self):
        from google.cloud.bigquery.retry import DEFAULT_RETRY

        resource = {
            "jobReference": {
                "jobId": self.JOB_ID,
                "projectId": self.PROJECT,
                "location": None,
            },
            "configuration": {
                "test": True
            },
        }
        job = self._set_properties_job()
        builder = job.to_api_repr = mock.Mock()
        builder.return_value = resource
        call_api = job._client._call_api = mock.Mock()
        call_api.return_value = resource
        path = "/projects/{}/jobs".format(self.PROJECT)
        job._begin()

        call_api.assert_called_once_with(
            DEFAULT_RETRY,
            span_name="BigQuery.job.begin",
            span_attributes={"path": path},
            job_ref=job,
            method="POST",
            path=path,
            data=resource,
            timeout=None,
        )
        self.assertEqual(job._properties, resource)
Beispiel #3
0
    def test_begin_w_already_running(self):
        conn = _make_connection()
        client = _make_client(project=self.PROJECT, connection=conn)
        job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF,
                             client)
        job._properties["status"] = {"state": "RUNNING"}

        with self.assertRaises(ValueError):
            job._begin()
Beispiel #4
0
    def test_begin_w_alternate_client(self):
        from google.cloud.bigquery.dataset import DatasetReference
        from google.cloud.bigquery.job import Compression
        from google.cloud.bigquery.job import DestinationFormat
        from google.cloud.bigquery.job import ExtractJobConfig

        PATH = "/projects/%s/jobs" % (self.PROJECT, )
        RESOURCE = self._make_resource(ended=True)
        EXTRACT_CONFIGURATION = {
            "sourceTable": {
                "projectId": self.PROJECT,
                "datasetId": self.DS_ID,
                "tableId": self.SOURCE_TABLE,
            },
            "destinationUris": [self.DESTINATION_URI],
            "compression": Compression.GZIP,
            "destinationFormat": DestinationFormat.NEWLINE_DELIMITED_JSON,
            "fieldDelimiter": "|",
            "printHeader": False,
        }
        RESOURCE["configuration"]["extract"] = EXTRACT_CONFIGURATION
        conn1 = _make_connection()
        client1 = _make_client(project=self.PROJECT, connection=conn1)
        conn2 = _make_connection(RESOURCE)
        client2 = _make_client(project=self.PROJECT, connection=conn2)
        source_dataset = DatasetReference(self.PROJECT, self.DS_ID)
        source = source_dataset.table(self.SOURCE_TABLE)
        config = ExtractJobConfig()
        config.compression = Compression.GZIP
        config.destination_format = DestinationFormat.NEWLINE_DELIMITED_JSON
        config.field_delimiter = "|"
        config.print_header = False
        job = self._make_one(self.JOB_ID, source, [self.DESTINATION_URI],
                             client1, config)
        with mock.patch(
                "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes"
        ) as final_attributes:
            job._begin(client=client2)

        final_attributes.assert_called_with({"path": PATH}, client2, job)

        conn1.api_request.assert_not_called()
        conn2.api_request.assert_called_once_with(
            method="POST",
            path=PATH,
            data={
                "jobReference": {
                    "projectId": self.PROJECT,
                    "jobId": self.JOB_ID
                },
                "configuration": {
                    "extract": EXTRACT_CONFIGURATION
                },
            },
            timeout=None,
        )
        self._verifyResourceProperties(job, RESOURCE)
Beispiel #5
0
    def test_begin_w_autodetect(self):
        from google.cloud.bigquery.job import LoadJobConfig

        path = "/projects/{}/jobs".format(self.PROJECT)
        resource = self._make_resource()
        resource["configuration"]["load"]["autodetect"] = True
        # Ensure None for missing server-set props
        del resource["statistics"]["creationTime"]
        del resource["etag"]
        del resource["selfLink"]
        del resource["user_email"]
        conn = _make_connection(resource)
        client = _make_client(project=self.PROJECT, connection=conn)
        config = LoadJobConfig()
        config.autodetect = True
        job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF,
                             client, config)
        with mock.patch(
                "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes"
        ) as final_attributes:
            job._begin()

        final_attributes.assert_called_with({"path": path}, client, job)

        sent = {
            "jobReference": {
                "projectId": self.PROJECT,
                "jobId": self.JOB_ID
            },
            "configuration": {
                "load": {
                    "sourceUris": [self.SOURCE1],
                    "destinationTable": {
                        "projectId": self.PROJECT,
                        "datasetId": self.DS_ID,
                        "tableId": self.TABLE_ID,
                    },
                    "autodetect": True,
                }
            },
        }
        conn.api_request.assert_called_once_with(method="POST",
                                                 path=path,
                                                 data=sent,
                                                 timeout=None)
        self._verifyResourceProperties(job, resource)
Beispiel #6
0
    def test_begin_w_bound_client(self):
        from google.cloud.bigquery.dataset import DatasetReference

        PATH = "/projects/%s/jobs" % (self.PROJECT, )
        RESOURCE = self._make_resource()
        # Ensure None for missing server-set props
        del RESOURCE["statistics"]["creationTime"]
        del RESOURCE["etag"]
        del RESOURCE["selfLink"]
        del RESOURCE["user_email"]
        conn = _make_connection(RESOURCE)
        client = _make_client(project=self.PROJECT, connection=conn)
        source_dataset = DatasetReference(self.PROJECT, self.DS_ID)
        source = source_dataset.table(self.SOURCE_TABLE)
        job = self._make_one(self.JOB_ID, source, [self.DESTINATION_URI],
                             client)
        with mock.patch(
                "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes"
        ) as final_attributes:
            job._begin()

        final_attributes.assert_called_with({"path": PATH}, client, job)

        conn.api_request.assert_called_once_with(
            method="POST",
            path=PATH,
            data={
                "jobReference": {
                    "projectId": self.PROJECT,
                    "jobId": self.JOB_ID
                },
                "configuration": {
                    "extract": {
                        "sourceTable": {
                            "projectId": self.PROJECT,
                            "datasetId": self.DS_ID,
                            "tableId": self.SOURCE_TABLE,
                        },
                        "destinationUris": [self.DESTINATION_URI],
                    }
                },
            },
            timeout=None,
        )
        self._verifyResourceProperties(job, RESOURCE)
Beispiel #7
0
    def test_begin_w_bound_client(self):
        RESOURCE = self._make_resource()
        # Ensure None for missing server-set props
        del RESOURCE["statistics"]["creationTime"]
        del RESOURCE["etag"]
        del RESOURCE["selfLink"]
        del RESOURCE["user_email"]
        conn = _make_connection(RESOURCE)
        client = _make_client(project=self.PROJECT, connection=conn)
        job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF,
                             client)
        path = "/projects/{}/jobs".format(self.PROJECT)
        with mock.patch(
                "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes"
        ) as final_attributes:
            job._begin()

        final_attributes.assert_called_with({"path": path}, client, job)

        conn.api_request.assert_called_once_with(
            method="POST",
            path=path,
            data={
                "jobReference": {
                    "projectId": self.PROJECT,
                    "jobId": self.JOB_ID
                },
                "configuration": {
                    "load": {
                        "sourceUris": [self.SOURCE1],
                        "destinationTable": {
                            "projectId": self.PROJECT,
                            "datasetId": self.DS_ID,
                            "tableId": self.TABLE_ID,
                        },
                    }
                },
            },
            timeout=None,
        )
        self._verifyResourceProperties(job, RESOURCE)
Beispiel #8
0
    def test_begin_w_alternate_client(self):
        from google.cloud.bigquery.job import CreateDisposition
        from google.cloud.bigquery.job import LoadJobConfig
        from google.cloud.bigquery.job import SchemaUpdateOption
        from google.cloud.bigquery.job import WriteDisposition
        from google.cloud.bigquery.schema import SchemaField

        PATH = "/projects/%s/jobs" % (self.PROJECT, )
        RESOURCE = self._make_resource(ended=True)
        LOAD_CONFIGURATION = {
            "sourceUris": [self.SOURCE1],
            "destinationTable": {
                "projectId": self.PROJECT,
                "datasetId": self.DS_ID,
                "tableId": self.TABLE_ID,
            },
            "allowJaggedRows": True,
            "allowQuotedNewlines": True,
            "createDisposition": CreateDisposition.CREATE_NEVER,
            "encoding": "ISO-8559-1",
            "fieldDelimiter": "|",
            "ignoreUnknownValues": True,
            "maxBadRecords": 100,
            "nullMarker": r"\N",
            "quote": "'",
            "skipLeadingRows": "1",
            "sourceFormat": "CSV",
            "useAvroLogicalTypes": True,
            "writeDisposition": WriteDisposition.WRITE_TRUNCATE,
            "schema": {
                "fields": [
                    {
                        "name": "full_name",
                        "type": "STRING",
                        "mode": "REQUIRED",
                        "description": None,
                    },
                    {
                        "name": "age",
                        "type": "INTEGER",
                        "mode": "REQUIRED",
                        "description": None,
                    },
                ]
            },
            "schemaUpdateOptions": [SchemaUpdateOption.ALLOW_FIELD_ADDITION],
        }
        RESOURCE["configuration"]["load"] = LOAD_CONFIGURATION
        conn1 = _make_connection()
        client1 = _make_client(project=self.PROJECT, connection=conn1)
        conn2 = _make_connection(RESOURCE)
        client2 = _make_client(project=self.PROJECT, connection=conn2)
        full_name = SchemaField("full_name", "STRING", mode="REQUIRED")
        age = SchemaField("age", "INTEGER", mode="REQUIRED")
        config = LoadJobConfig()
        config.schema = [full_name, age]
        job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF,
                             client1, config)
        config.allow_jagged_rows = True
        config.allow_quoted_newlines = True
        config.create_disposition = CreateDisposition.CREATE_NEVER
        config.encoding = "ISO-8559-1"
        config.field_delimiter = "|"
        config.ignore_unknown_values = True
        config.max_bad_records = 100
        config.null_marker = r"\N"
        config.quote_character = "'"
        config.skip_leading_rows = 1
        config.source_format = "CSV"
        config.use_avro_logical_types = True
        config.write_disposition = WriteDisposition.WRITE_TRUNCATE
        config.schema_update_options = [
            SchemaUpdateOption.ALLOW_FIELD_ADDITION
        ]
        with mock.patch(
                "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes"
        ) as final_attributes:
            job._begin(client=client2)

        final_attributes.assert_called_with({"path": PATH}, client2, job)

        conn1.api_request.assert_not_called()
        self.assertEqual(len(conn2.api_request.call_args_list), 1)
        req = conn2.api_request.call_args_list[0]
        self.assertEqual(req[1]["method"], "POST")
        self.assertEqual(req[1]["path"], PATH)
        SENT = {
            "jobReference": {
                "projectId": self.PROJECT,
                "jobId": self.JOB_ID
            },
            "configuration": {
                "load": LOAD_CONFIGURATION
            },
        }
        self.maxDiff = None
        self.assertEqual(req[1]["data"], SENT)
        self._verifyResourceProperties(job, RESOURCE)
Beispiel #9
0
    def test__begin_already(self):
        job = self._set_properties_job()
        job._properties["status"] = {"state": "WHATEVER"}

        with self.assertRaises(ValueError):
            job._begin()