def process(self, element: 'ReadFromBigQueryRequest') -> Iterable[BoundedSource]: bq = bigquery_tools.BigQueryWrapper( temp_dataset_id=self._get_temp_dataset().datasetId) # TODO(BEAM-11359): Clean up temp dataset at pipeline completion. if element.query is not None: self._setup_temporary_dataset(bq, element) table_reference = self._execute_query(bq, element) else: assert element.table table_reference = bigquery_tools.parse_table_reference( element.table, project=self._get_project()) if not table_reference.projectId: table_reference.projectId = self._get_project() schema, metadata_list = self._export_files(bq, element, table_reference) for metadata in metadata_list: yield self._create_source(metadata.path, schema) if element.query is not None: bq._delete_table(table_reference.projectId, table_reference.datasetId, table_reference.tableId)
def start_bundle(self): self._rows_buffer = [] self.table_schema = self.get_table_schema(self.schema) self.bigquery_wrapper = bigquery_tools.BigQueryWrapper( client=self.test_client) self.bigquery_wrapper.get_or_create_table( self.project_id, self.dataset_id, self.table_id, self.table_schema, self.create_disposition, self.write_disposition)
def _matches(self, _): _LOGGER.info('Start verify Bigquery table properties.') # Run query bigquery_wrapper = bigquery_tools.BigQueryWrapper() self.actual_table = self._get_table_with_retry(bigquery_wrapper) _LOGGER.info('Table proto is %s', self.actual_table) return all( self._match_property(v, self._get_or_none(self.actual_table, k)) for k, v in self.expected_properties.items())
def start_bundle(self): self._reset_rows_buffer() self.bigquery_wrapper = bigquery_tools.BigQueryWrapper( client=self.test_client) self._observed_tables = set() self._backoff_calculator = iter( retry.FuzzedExponentialIntervals(initial_delay_secs=0.2, num_retries=10000, max_delay_secs=1500))
def delete_tables(): klio_cfg = common.get_config() input_table_cfg = klio_cfg.job_config.events.inputs[0] output_table_cfg = klio_cfg.job_config.events.outputs[0] bq_client = beam_bq_tools.BigQueryWrapper() bq_client._delete_table(input_table_cfg.project, input_table_cfg.dataset, input_table_cfg.table) bq_client._delete_table(output_table_cfg.project, output_table_cfg.dataset, output_table_cfg.table)
def setUp(self): self.test_pipeline = TestPipeline(is_integration_test=True) self.runner_name = type(self.test_pipeline.runner).__name__ self.project = self.test_pipeline.get_option('project') self.dataset_id = '%s%s%d' % (self.BIG_QUERY_DATASET_ID, str(int(time.time())), random.randint(0, 10000)) self.bigquery_client = bigquery_tools.BigQueryWrapper() self.bigquery_client.get_or_create_dataset(self.project, self.dataset_id) self.output_table = "%s.output_table" % (self.dataset_id) logging.info("Created dataset %s in project %s", self.dataset_id, self.project)
def process(self, unused_element, unused_signal, pipeline_details): bq = bigquery_tools.BigQueryWrapper() pipeline_details = pipeline_details[0] if 'temp_table_ref' in pipeline_details.keys(): temp_table_ref = pipeline_details['temp_table_ref'] bq._clean_up_beam_labelled_temporary_datasets( project_id=temp_table_ref.projectId, dataset_id=temp_table_ref.datasetId, table_id=temp_table_ref.tableId) elif 'project_id' in pipeline_details.keys(): bq._clean_up_beam_labelled_temporary_datasets( project_id=pipeline_details['project_id'], labels=pipeline_details['bigquery_dataset_labels'])
def estimate_size(self): bq = bigquery_tools.BigQueryWrapper() if self.table_reference is not None: table = bq.get_table(self.table_reference.projectId, self.table_reference.datasetId, self.table_reference.tableId) return int(table.numBytes) else: job = bq._start_query_job(self.project.get(), self.query.get(), self.use_legacy_sql, self.flatten_results, job_id=uuid.uuid4().hex, dry_run=True, kms_key=self.kms_key) size = int(job.statistics.totalBytesProcessed) return size
def split(self, desired_bundle_size, start_position=None, stop_position=None): if self.split_result is None: bq = bigquery_tools.BigQueryWrapper() if self.query is not None: self._setup_temporary_dataset(bq) self.table_reference = self._execute_query(bq) schema, metadata_list = self._export_files(bq) self.split_result = [ TextSource(metadata.path, 0, CompressionTypes.UNCOMPRESSED, True, self.coder(schema)) for metadata in metadata_list ] if self.query is not None: bq.clean_up_temporary_dataset(self.project.get()) for source in self.split_result: yield SourceBundle(0, source, None, None)
def start_bundle(self): self.bq_wrapper = bigquery_tools.BigQueryWrapper( client=self.test_client)
def start_bundle(self): self.bq_wrapper = bigquery_tools.BigQueryWrapper( client=self.test_client) if not self.bq_io_metadata: self.bq_io_metadata = create_bigquery_io_metadata(self._step_name)
def setup(self): self._bq_wrapper = bigquery_tools.BigQueryWrapper( client=self._test_client) self._bq_io_metadata = create_bigquery_io_metadata(self._step_name)
def BigQueryWrapper(*args, **kwargs): return bigquery_tools.BigQueryWrapper(*args, **kwargs)
def start_bundle(self): self._observed_tables = set() self.bq_wrapper = bigquery_tools.BigQueryWrapper( client=self.test_client)
def start_bundle(self): self._observed_tables = set() self.bq_wrapper = bigquery_tools.BigQueryWrapper(client=self.test_client) if not self.bq_io_metadata: self.bq_io_metadata = create_bigquery_io_metadata()
def BigQueryWrapper(*args, **kwargs): import warnings warnings.warn("This class is deprecated and will be permanently moved " "to the bigquery_tools module in a future version of beam") return bigquery_tools.BigQueryWrapper(*args, **kwargs)