Exemple #1
0
    def _load_table_from_cloud_storage_async(
            self, source_uri: str,
            destination_dataset_ref: bigquery.DatasetReference,
            destination_table_id: str,
            destination_table_schema: List[bigquery.SchemaField],
            write_disposition: bigquery.WriteDisposition
    ) -> bigquery.job.LoadJob:

        self.create_dataset_if_necessary(destination_dataset_ref)

        destination_table_ref = destination_dataset_ref.table(
            destination_table_id)

        job_config = bigquery.LoadJobConfig()
        job_config.schema = destination_table_schema
        job_config.source_format = bigquery.SourceFormat.CSV
        job_config.allow_quoted_newlines = True
        job_config.write_disposition = write_disposition

        load_job = self.client.load_table_from_uri(source_uri,
                                                   destination_table_ref,
                                                   job_config=job_config)

        logging.info("Started load job [%s] for table [%s.%s.%s]",
                     load_job.job_id, destination_table_ref.project,
                     destination_table_ref.dataset_id,
                     destination_table_ref.table_id)

        return load_job
Exemple #2
0
    def table_exists(self, dataset_ref: bigquery.DatasetReference,
                     table_id: str) -> bool:
        table_ref = dataset_ref.table(table_id)

        try:
            self.client.get_table(table_ref)
            return True
        except exceptions.NotFound:
            logging.warning("Table [%s] does not exist in dataset [%s]",
                            table_id, str(dataset_ref))
            return False
Exemple #3
0
    def copy_view(
            self, view: BigQueryView, destination_client: BigQueryClient,
            destination_dataset_ref: bigquery.DatasetReference
    ) -> bigquery.Table:

        if destination_client.table_exists(destination_dataset_ref,
                                           view.view_id):
            raise ValueError(
                f"Table [{view.view_id}] already exists in dataset!")

        # Create the destination dataset if it doesn't yet exist
        destination_client.create_dataset_if_necessary(destination_dataset_ref)

        new_view_ref = destination_dataset_ref.table(view.view_id)
        new_view = bigquery.Table(new_view_ref)
        new_view.view_query = view.view_query.format(
            destination_client.project_id, destination_dataset_ref.dataset_id,
            view.view_id)
        table = destination_client.create_table(new_view)
        logging.info("Created %s", new_view_ref)
        return table
Exemple #4
0
    def export_table_to_cloud_storage_async(
        self, source_table_dataset_ref: bigquery.DatasetReference,
        source_table_id: str, destination_uri: str,
        destination_format: bigquery.DestinationFormat
    ) -> Optional[bigquery.ExtractJob]:
        if not self.table_exists(source_table_dataset_ref, source_table_id):
            logging.error("Table [%s] does not exist in dataset [%s]",
                          source_table_id, str(source_table_dataset_ref))
            return None

        table_ref = source_table_dataset_ref.table(source_table_id)

        job_config = bigquery.ExtractJobConfig()
        job_config.destination_format = destination_format

        return self.client.extract_table(
            table_ref,
            destination_uri,
            # Location must match that of the source table.
            location=self.LOCATION,
            job_config=job_config)
Exemple #5
0
 def get_table(self, dataset_ref: bigquery.DatasetReference,
               table_id: str) -> bigquery.Table:
     table_ref = dataset_ref.table(table_id)
     return self.client.get_table(table_ref)