Exemple #1
0
    def process(self,
                element: 'ReadFromBigQueryRequest') -> Iterable[BoundedSource]:
        bq = bigquery_tools.BigQueryWrapper(
            temp_dataset_id=self._get_temp_dataset().datasetId)
        # TODO(BEAM-11359): Clean up temp dataset at pipeline completion.

        if element.query is not None:
            self._setup_temporary_dataset(bq, element)
            table_reference = self._execute_query(bq, element)
        else:
            assert element.table
            table_reference = bigquery_tools.parse_table_reference(
                element.table, project=self._get_project())

        if not table_reference.projectId:
            table_reference.projectId = self._get_project()

        schema, metadata_list = self._export_files(bq, element,
                                                   table_reference)

        for metadata in metadata_list:
            yield self._create_source(metadata.path, schema)

        if element.query is not None:
            bq._delete_table(table_reference.projectId,
                             table_reference.datasetId,
                             table_reference.tableId)
Exemple #2
0
  def start_bundle(self):
    self._rows_buffer = []
    self.table_schema = self.get_table_schema(self.schema)

    self.bigquery_wrapper = bigquery_tools.BigQueryWrapper(
        client=self.test_client)
    self.bigquery_wrapper.get_or_create_table(
        self.project_id, self.dataset_id, self.table_id, self.table_schema,
        self.create_disposition, self.write_disposition)
Exemple #3
0
  def _matches(self, _):
    _LOGGER.info('Start verify Bigquery table properties.')
    # Run query
    bigquery_wrapper = bigquery_tools.BigQueryWrapper()

    self.actual_table = self._get_table_with_retry(bigquery_wrapper)

    _LOGGER.info('Table proto is %s', self.actual_table)

    return all(
        self._match_property(v, self._get_or_none(self.actual_table, k)) for k,
        v in self.expected_properties.items())
Exemple #4
0
    def start_bundle(self):
        self._reset_rows_buffer()

        self.bigquery_wrapper = bigquery_tools.BigQueryWrapper(
            client=self.test_client)

        self._observed_tables = set()

        self._backoff_calculator = iter(
            retry.FuzzedExponentialIntervals(initial_delay_secs=0.2,
                                             num_retries=10000,
                                             max_delay_secs=1500))
Exemple #5
0
def delete_tables():
    klio_cfg = common.get_config()
    input_table_cfg = klio_cfg.job_config.events.inputs[0]
    output_table_cfg = klio_cfg.job_config.events.outputs[0]

    bq_client = beam_bq_tools.BigQueryWrapper()

    bq_client._delete_table(input_table_cfg.project, input_table_cfg.dataset,
                            input_table_cfg.table)

    bq_client._delete_table(output_table_cfg.project, output_table_cfg.dataset,
                            output_table_cfg.table)
Exemple #6
0
  def setUp(self):
    self.test_pipeline = TestPipeline(is_integration_test=True)
    self.runner_name = type(self.test_pipeline.runner).__name__
    self.project = self.test_pipeline.get_option('project')

    self.dataset_id = '%s%s%d' % (self.BIG_QUERY_DATASET_ID,
                                  str(int(time.time())),
                                  random.randint(0, 10000))
    self.bigquery_client = bigquery_tools.BigQueryWrapper()
    self.bigquery_client.get_or_create_dataset(self.project, self.dataset_id)
    self.output_table = "%s.output_table" % (self.dataset_id)
    logging.info("Created dataset %s in project %s",
                 self.dataset_id, self.project)
 def process(self, unused_element, unused_signal, pipeline_details):
   bq = bigquery_tools.BigQueryWrapper()
   pipeline_details = pipeline_details[0]
   if 'temp_table_ref' in pipeline_details.keys():
     temp_table_ref = pipeline_details['temp_table_ref']
     bq._clean_up_beam_labelled_temporary_datasets(
         project_id=temp_table_ref.projectId,
         dataset_id=temp_table_ref.datasetId,
         table_id=temp_table_ref.tableId)
   elif 'project_id' in pipeline_details.keys():
     bq._clean_up_beam_labelled_temporary_datasets(
         project_id=pipeline_details['project_id'],
         labels=pipeline_details['bigquery_dataset_labels'])
Exemple #8
0
 def estimate_size(self):
     bq = bigquery_tools.BigQueryWrapper()
     if self.table_reference is not None:
         table = bq.get_table(self.table_reference.projectId,
                              self.table_reference.datasetId,
                              self.table_reference.tableId)
         return int(table.numBytes)
     else:
         job = bq._start_query_job(self.project.get(),
                                   self.query.get(),
                                   self.use_legacy_sql,
                                   self.flatten_results,
                                   job_id=uuid.uuid4().hex,
                                   dry_run=True,
                                   kms_key=self.kms_key)
         size = int(job.statistics.totalBytesProcessed)
         return size
Exemple #9
0
    def split(self,
              desired_bundle_size,
              start_position=None,
              stop_position=None):
        if self.split_result is None:
            bq = bigquery_tools.BigQueryWrapper()

            if self.query is not None:
                self._setup_temporary_dataset(bq)
                self.table_reference = self._execute_query(bq)

            schema, metadata_list = self._export_files(bq)
            self.split_result = [
                TextSource(metadata.path, 0,
                           CompressionTypes.UNCOMPRESSED, True,
                           self.coder(schema)) for metadata in metadata_list
            ]
            if self.query is not None:
                bq.clean_up_temporary_dataset(self.project.get())

        for source in self.split_result:
            yield SourceBundle(0, source, None, None)
Exemple #10
0
 def start_bundle(self):
     self.bq_wrapper = bigquery_tools.BigQueryWrapper(
         client=self.test_client)
 def start_bundle(self):
     self.bq_wrapper = bigquery_tools.BigQueryWrapper(
         client=self.test_client)
     if not self.bq_io_metadata:
         self.bq_io_metadata = create_bigquery_io_metadata(self._step_name)
 def setup(self):
     self._bq_wrapper = bigquery_tools.BigQueryWrapper(
         client=self._test_client)
     self._bq_io_metadata = create_bigquery_io_metadata(self._step_name)
Exemple #13
0
def BigQueryWrapper(*args, **kwargs):
    return bigquery_tools.BigQueryWrapper(*args, **kwargs)
 def start_bundle(self):
     self._observed_tables = set()
     self.bq_wrapper = bigquery_tools.BigQueryWrapper(
         client=self.test_client)
Exemple #15
0
 def start_bundle(self):
   self._observed_tables = set()
   self.bq_wrapper = bigquery_tools.BigQueryWrapper(client=self.test_client)
   if not self.bq_io_metadata:
     self.bq_io_metadata = create_bigquery_io_metadata()
Exemple #16
0
def BigQueryWrapper(*args, **kwargs):
    import warnings
    warnings.warn("This class is deprecated and will be permanently moved "
                  "to the bigquery_tools module in a future version of beam")
    return bigquery_tools.BigQueryWrapper(*args, **kwargs)