def _load_table_from_cloud_storage_async( self, source_uri: str, destination_dataset_ref: bigquery.DatasetReference, destination_table_id: str, destination_table_schema: List[bigquery.SchemaField], write_disposition: bigquery.WriteDisposition ) -> bigquery.job.LoadJob: self.create_dataset_if_necessary(destination_dataset_ref) destination_table_ref = destination_dataset_ref.table( destination_table_id) job_config = bigquery.LoadJobConfig() job_config.schema = destination_table_schema job_config.source_format = bigquery.SourceFormat.CSV job_config.allow_quoted_newlines = True job_config.write_disposition = write_disposition load_job = self.client.load_table_from_uri(source_uri, destination_table_ref, job_config=job_config) logging.info("Started load job [%s] for table [%s.%s.%s]", load_job.job_id, destination_table_ref.project, destination_table_ref.dataset_id, destination_table_ref.table_id) return load_job
def table_exists(self, dataset_ref: bigquery.DatasetReference, table_id: str) -> bool: table_ref = dataset_ref.table(table_id) try: self.client.get_table(table_ref) return True except exceptions.NotFound: logging.warning("Table [%s] does not exist in dataset [%s]", table_id, str(dataset_ref)) return False
def copy_view( self, view: BigQueryView, destination_client: BigQueryClient, destination_dataset_ref: bigquery.DatasetReference ) -> bigquery.Table: if destination_client.table_exists(destination_dataset_ref, view.view_id): raise ValueError( f"Table [{view.view_id}] already exists in dataset!") # Create the destination dataset if it doesn't yet exist destination_client.create_dataset_if_necessary(destination_dataset_ref) new_view_ref = destination_dataset_ref.table(view.view_id) new_view = bigquery.Table(new_view_ref) new_view.view_query = view.view_query.format( destination_client.project_id, destination_dataset_ref.dataset_id, view.view_id) table = destination_client.create_table(new_view) logging.info("Created %s", new_view_ref) return table
def export_table_to_cloud_storage_async( self, source_table_dataset_ref: bigquery.DatasetReference, source_table_id: str, destination_uri: str, destination_format: bigquery.DestinationFormat ) -> Optional[bigquery.ExtractJob]: if not self.table_exists(source_table_dataset_ref, source_table_id): logging.error("Table [%s] does not exist in dataset [%s]", source_table_id, str(source_table_dataset_ref)) return None table_ref = source_table_dataset_ref.table(source_table_id) job_config = bigquery.ExtractJobConfig() job_config.destination_format = destination_format return self.client.extract_table( table_ref, destination_uri, # Location must match that of the source table. location=self.LOCATION, job_config=job_config)
def get_table(self, dataset_ref: bigquery.DatasetReference, table_id: str) -> bigquery.Table: table_ref = dataset_ref.table(table_id) return self.client.get_table(table_ref)