def import_csv(self, bucket_name: str, bucket_path: str, dataset: str, table: str, sep: str = "\t") -> bool: logging.info( f"DataWarehouse.import_csv {bucket_path} to {dataset}.{table} ...") client = self._get_client() config = LoadJobConfig() config.autodetect = True config.field_delimiter = sep bucket_url = f"gs://{self.config.lake_path}/{bucket_path}" load_job = client.load_table_from_uri(bucket_url, f"{dataset}.{table}", job_config=config) result = load_job.result() logging.info( f"DataWarehouse.import_csv {bucket_path} to {dataset}.{table} Complete!" ) return True
def test_begin_w_alternate_client(self): from google.cloud.bigquery.job import CreateDisposition from google.cloud.bigquery.job import LoadJobConfig from google.cloud.bigquery.job import SchemaUpdateOption from google.cloud.bigquery.job import WriteDisposition from google.cloud.bigquery.schema import SchemaField PATH = "/projects/%s/jobs" % (self.PROJECT, ) RESOURCE = self._make_resource(ended=True) LOAD_CONFIGURATION = { "sourceUris": [self.SOURCE1], "destinationTable": { "projectId": self.PROJECT, "datasetId": self.DS_ID, "tableId": self.TABLE_ID, }, "allowJaggedRows": True, "allowQuotedNewlines": True, "createDisposition": CreateDisposition.CREATE_NEVER, "encoding": "ISO-8559-1", "fieldDelimiter": "|", "ignoreUnknownValues": True, "maxBadRecords": 100, "nullMarker": r"\N", "quote": "'", "skipLeadingRows": "1", "sourceFormat": "CSV", "useAvroLogicalTypes": True, "writeDisposition": WriteDisposition.WRITE_TRUNCATE, "schema": { "fields": [ { "name": "full_name", "type": "STRING", "mode": "REQUIRED", "description": None, }, { "name": "age", "type": "INTEGER", "mode": "REQUIRED", "description": None, }, ] }, "schemaUpdateOptions": [SchemaUpdateOption.ALLOW_FIELD_ADDITION], } RESOURCE["configuration"]["load"] = LOAD_CONFIGURATION conn1 = _make_connection() client1 = _make_client(project=self.PROJECT, connection=conn1) conn2 = _make_connection(RESOURCE) client2 = _make_client(project=self.PROJECT, connection=conn2) full_name = SchemaField("full_name", "STRING", mode="REQUIRED") age = SchemaField("age", "INTEGER", mode="REQUIRED") config = LoadJobConfig() config.schema = [full_name, age] job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client1, config) config.allow_jagged_rows = True config.allow_quoted_newlines = True config.create_disposition = CreateDisposition.CREATE_NEVER config.encoding = "ISO-8559-1" config.field_delimiter = "|" config.ignore_unknown_values = True config.max_bad_records = 100 config.null_marker = r"\N" config.quote_character = "'" config.skip_leading_rows = 1 config.source_format = "CSV" config.use_avro_logical_types = True config.write_disposition = WriteDisposition.WRITE_TRUNCATE config.schema_update_options = [ SchemaUpdateOption.ALLOW_FIELD_ADDITION ] with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: job._begin(client=client2) final_attributes.assert_called_with({"path": PATH}, client2, job) conn1.api_request.assert_not_called() self.assertEqual(len(conn2.api_request.call_args_list), 1) req = conn2.api_request.call_args_list[0] self.assertEqual(req[1]["method"], "POST") self.assertEqual(req[1]["path"], PATH) SENT = { "jobReference": { "projectId": self.PROJECT, "jobId": self.JOB_ID }, "configuration": { "load": LOAD_CONFIGURATION }, } self.maxDiff = None self.assertEqual(req[1]["data"], SENT) self._verifyResourceProperties(job, RESOURCE)