def test_mutate_row_with_max_mutations(self): table = _Table(self.TABLE_NAME) mutation_batcher = MutationsBatcher(table=table) row = DirectRow(row_key=b'row_key') row.set_cell('cf1', b'c1', 1) row.set_cell('cf1', b'c2', 2) row.set_cell('cf1', b'c3', 3) mutation_batcher.mutate(row) mutation_batcher.flush() self.assertEqual(table.mutation_calls, 1)
def test_add_row_with_max_flush_count(self): table = _Table(self.TABLE_NAME) mutation_batcher = MutationsBatcher(table=table, flush_count=3) row_1 = DirectRow(row_key=b'row_key_1') row_2 = DirectRow(row_key=b'row_key_2') row_3 = DirectRow(row_key=b'row_key_3') mutation_batcher.mutate(row_1) mutation_batcher.mutate(row_2) mutation_batcher.mutate(row_3) self.assertEqual(table.mutation_calls, 1)
def test_mutate_row_with_max_mutations(self): table = _Table(self.TABLE_NAME) mutation_batcher = MutationsBatcher(table=table) row = DirectRow(row_key=b"row_key") row.set_cell("cf1", b"c1", 1) row.set_cell("cf1", b"c2", 2) row.set_cell("cf1", b"c3", 3) mutation_batcher.mutate(row) mutation_batcher.flush() self.assertEqual(table.mutation_calls, 1)
def test_add_row_with_max_flush_count(self): table = _Table(self.TABLE_NAME) mutation_batcher = MutationsBatcher(table=table, flush_count=3) row_1 = DirectRow(row_key=b"row_key_1") row_2 = DirectRow(row_key=b"row_key_2") row_3 = DirectRow(row_key=b"row_key_3") mutation_batcher.mutate(row_1) mutation_batcher.mutate(row_2) mutation_batcher.mutate(row_3) self.assertEqual(table.mutation_calls, 1)
def test_mutate_row_with_max_mutations_failure(self): from google.cloud.bigtable.batcher import MaxMutationsError table = _Table(self.TABLE_NAME) mutation_batcher = MutationsBatcher(table=table) row = DirectRow(row_key=b'row_key') row.set_cell('cf1', b'c1', 1) row.set_cell('cf1', b'c2', 2) row.set_cell('cf1', b'c3', 3) row.set_cell('cf1', b'c4', 4) with self.assertRaises(MaxMutationsError): mutation_batcher.mutate(row)
def test_mutate_row_with_max_mutations_failure(self): from google.cloud.bigtable.batcher import MaxMutationsError table = _Table(self.TABLE_NAME) mutation_batcher = MutationsBatcher(table=table) row = DirectRow(row_key=b"row_key") row.set_cell("cf1", b"c1", 1) row.set_cell("cf1", b"c2", 2) row.set_cell("cf1", b"c3", 3) row.set_cell("cf1", b"c4", 4) with self.assertRaises(MaxMutationsError): mutation_batcher.mutate(row)
def test_mutate_row_with_max_row_bytes(self): table = _Table(self.TABLE_NAME) mutation_batcher = MutationsBatcher(table=table, max_row_bytes=3 * 1024 * 1024) number_of_bytes = 1 * 1024 * 1024 max_value = b"1" * number_of_bytes row = DirectRow(row_key=b"row_key") row.set_cell("cf1", b"c1", max_value) row.set_cell("cf1", b"c2", max_value) row.set_cell("cf1", b"c3", max_value) mutation_batcher.mutate(row) self.assertEqual(table.mutation_calls, 1)
def test_mutate_row_with_max_row_bytes(self): table = _Table(self.TABLE_NAME) mutation_batcher = MutationsBatcher(table=table, max_row_bytes=3 * 1024 * 1024) number_of_bytes = 1 * 1024 * 1024 max_value = b'1' * number_of_bytes row = DirectRow(row_key=b'row_key') row.set_cell('cf1', b'c1', max_value) row.set_cell('cf1', b'c2', max_value) row.set_cell('cf1', b'c3', max_value) mutation_batcher.mutate(row) self.assertEqual(table.mutation_calls, 1)
class WriteToBigtable(beam.DoFn): """ Creates the connector can call and add_row to the batcher using each row in beam pipe line :type beam_options: class:`~bigtable_configuration.BigtableConfiguration` :param beam_options: Class `~bigtable_configuration.BigtableConfiguration`. :type flush_count: int :param flush_count: (Optional) Max number of rows to flush. If it reaches the max number of rows it calls finish_batch() to mutate the current row batch. Default is FLUSH_COUNT (1000 rows). :type max_mutations: int :param max_mutations: (Optional) Max number of row mutations to flush. If it reaches the max number of row mutations it calls finish_batch() to mutate the current row batch. Default is MAX_MUTATIONS (100000 mutations). :type max_row_bytes: int :param max_row_bytes: (Optional) Max number of row mutations size to flush. If it reaches the max number of row mutations size it calls finish_batch() to mutate the current row batch. Default is MAX_ROW_BYTES (5 MB). :type app_profile_id: str :param app_profile_id: (Optional) The unique name of the AppProfile. """ def __init__(self, beam_options, flush_count=None, max_row_bytes=None, app_profile_id=None): super(WriteToBigtable, self).__init__(beam_options) self.beam_options = beam_options self.client = None self.instance = None self.table = None self.batcher = None self._app_profile_id = app_profile_id self.flush_count = flush_count self.max_row_bytes = max_row_bytes def start_bundle(self): if self.beam_options.credentials is None: self.client = bigtable.Client(project=self.beam_options.project_id, admin=True) else: self.client = bigtable.Client( project=self.beam_options.project_id, credentials=self.beam_options.credentials, admin=True) self.instance = self.client.instance(self.beam_options.instance_id) self.table = self.instance.table(self.beam_options.table_id, self._app_profile_id) self.batcher = MutationsBatcher( self.table, flush_count=self.flush_count, max_row_bytes=self.max_row_bytes) def process(self, row): # row.table = self.table self.batcher.mutate(row) def finish_bundle(self): return self.batcher.flush()