def test_mutate_row(self): table = _Table(self.TABLE_NAME) mutation_batcher = MutationsBatcher(table=table) rows = [DirectRow(row_key=b'row_key'), DirectRow(row_key=b'row_key_2'), DirectRow(row_key=b'row_key_3'), DirectRow(row_key=b'row_key_4')] mutation_batcher.mutate_rows(rows) mutation_batcher.flush() self.assertEqual(table.mutation_calls, 1)
def test_mutate_row_with_max_mutations(self): table = _Table(self.TABLE_NAME) mutation_batcher = MutationsBatcher(table=table) row = DirectRow(row_key=b"row_key") row.set_cell("cf1", b"c1", 1) row.set_cell("cf1", b"c2", 2) row.set_cell("cf1", b"c3", 3) mutation_batcher.mutate(row) mutation_batcher.flush() self.assertEqual(table.mutation_calls, 1)
def test_mutate_row_with_max_mutations(self): table = _Table(self.TABLE_NAME) mutation_batcher = MutationsBatcher(table=table) row = DirectRow(row_key=b'row_key') row.set_cell('cf1', b'c1', 1) row.set_cell('cf1', b'c2', 2) row.set_cell('cf1', b'c3', 3) mutation_batcher.mutate(row) mutation_batcher.flush() self.assertEqual(table.mutation_calls, 1)
def test_mutate_row(self): table = _Table(self.TABLE_NAME) mutation_batcher = MutationsBatcher(table=table) rows = [ DirectRow(row_key=b"row_key"), DirectRow(row_key=b"row_key_2"), DirectRow(row_key=b"row_key_3"), DirectRow(row_key=b"row_key_4"), ] mutation_batcher.mutate_rows(rows) mutation_batcher.flush() self.assertEqual(table.mutation_calls, 1)
def test_flush_with_no_rows(self): table = _Table(self.TABLE_NAME) mutation_batcher = MutationsBatcher(table=table) mutation_batcher.flush() self.assertEqual(table.mutation_calls, 0)
class WriteToBigtable(beam.DoFn): """ Creates the connector can call and add_row to the batcher using each row in beam pipe line :type beam_options: class:`~bigtable_configuration.BigtableConfiguration` :param beam_options: Class `~bigtable_configuration.BigtableConfiguration`. :type flush_count: int :param flush_count: (Optional) Max number of rows to flush. If it reaches the max number of rows it calls finish_batch() to mutate the current row batch. Default is FLUSH_COUNT (1000 rows). :type max_mutations: int :param max_mutations: (Optional) Max number of row mutations to flush. If it reaches the max number of row mutations it calls finish_batch() to mutate the current row batch. Default is MAX_MUTATIONS (100000 mutations). :type max_row_bytes: int :param max_row_bytes: (Optional) Max number of row mutations size to flush. If it reaches the max number of row mutations size it calls finish_batch() to mutate the current row batch. Default is MAX_ROW_BYTES (5 MB). :type app_profile_id: str :param app_profile_id: (Optional) The unique name of the AppProfile. """ def __init__(self, beam_options, flush_count=None, max_row_bytes=None, app_profile_id=None): super(WriteToBigtable, self).__init__(beam_options) self.beam_options = beam_options self.client = None self.instance = None self.table = None self.batcher = None self._app_profile_id = app_profile_id self.flush_count = flush_count self.max_row_bytes = max_row_bytes def start_bundle(self): if self.beam_options.credentials is None: self.client = bigtable.Client(project=self.beam_options.project_id, admin=True) else: self.client = bigtable.Client( project=self.beam_options.project_id, credentials=self.beam_options.credentials, admin=True) self.instance = self.client.instance(self.beam_options.instance_id) self.table = self.instance.table(self.beam_options.table_id, self._app_profile_id) self.batcher = MutationsBatcher( self.table, flush_count=self.flush_count, max_row_bytes=self.max_row_bytes) def process(self, row): # row.table = self.table self.batcher.mutate(row) def finish_bundle(self): return self.batcher.flush()