def test_batch_max_cells(self, mock_batch_snapshot_class, mock_batch_checkout): mutation_group = [ MutationGroup([ WriteMutation.insert("roles", ("key", "rolename"), [ ('1234', "mutations-inset-1234"), ('1235', "mutations-inset-1235"), ]) ]) ] * 50 with TestPipeline() as p: # There are total 50 mutation groups, each contains two rows (or 4 cells). # The total number of cells will be 200 (50 groups * 4 cells). # If each batch contains 50 cells max then batch count should be 5. # 4 batches contains 12 mutations groups and the fifth batch should be # consists of 2 mutation group element. # No. of mutations groups per batch = Max Cells / Cells per mutation group # total_batches = Total Number of Cells / Max Cells res = (p | beam.Create(mutation_group) | beam.ParDo( _BatchFn(max_batch_size_bytes=1048576, max_number_rows=500, max_number_cells=50)) | beam.Map(lambda x: len(x))) assert_that(res, equal_to([12, 12, 12, 12, 2]))
def make_insert_mutations(element): import uuid # pylint: disable=reimported from apache_beam.io.gcp.experimental.spannerio import WriteMutation ins_mutation = WriteMutation.insert(table='test', columns=('id', 'data'), values=[(str(uuid.uuid1()), element)]) return [ins_mutation]
def test_write_batches(self): _prefex = 'test_write_batches' mutations = [ WriteMutation.insert('Users', ('UserId', 'Key'), [(_prefex + '1', _prefex + 'inset-1')]), WriteMutation.insert('Users', ('UserId', 'Key'), [(_prefex + '2', _prefex + 'inset-2')]), WriteMutation.insert('Users', ('UserId', 'Key'), [(_prefex + '3', _prefex + 'inset-3')]), WriteMutation.insert('Users', ('UserId', 'Key'), [(_prefex + '4', _prefex + 'inset-4')]) ] p = beam.Pipeline(argv=self.args) _ = (p | beam.Create(mutations) | WriteToSpanner(project_id=self.project, instance_id=self.instance, database_id=self.TEST_DATABASE, max_batch_size_bytes=250)) res = p.run() res.wait_until_finish() self.assertEqual(self._count_data(_prefex), len(mutations))
def test_metrics_ok_call(self): if 'DirectRunner' not in self.runner_name: raise unittest.SkipTest('This test only runs with DirectRunner.') MetricsEnvironment.process_wide_container().reset() _prefix = 'test_write_batches' mutations = [ WriteMutation.insert('Albums', ('AlbumId', 'Name'), [(_prefix + '1', _prefix + 'inset-1')]), WriteMutation.insert('Albums', ('AlbumId', 'Name'), [(_prefix + '2', _prefix + 'inset-2')]), ] p = beam.Pipeline(argv=self.args) _ = (p | beam.Create(mutations) | WriteToSpanner(project_id=self.project, instance_id=self.instance, database_id=self.TEST_DATABASE)) res = p.run() res.wait_until_finish() self.verify_write_call_metric(self.project, self.TEST_DATABASE, 'Albums', 'ok', 1)
def test_batch_disable(self, mock_batch_snapshot_class, mock_batch_checkout): mutation_group = [ MutationGroup([ WriteMutation.insert("roles", ("key", "rolename"), [('1234', "mutations-inset-1234")]) ]) ] * 4 with TestPipeline() as p: # to disable to batching, we need to set any of the batching parameters # either to lower value or zero res = (p | beam.Create(mutation_group) | beam.ParDo( _BatchFn(max_batch_size_bytes=1450, max_number_rows=0, max_number_cells=500)) | beam.Map(lambda x: len(x))) assert_that(res, equal_to([1] * 4))
def test_batch_byte_size(self, mock_batch_snapshot_class, mock_batch_checkout): # each mutation group byte size is 58 bytes. mutation_group = [ MutationGroup([ WriteMutation.insert("roles", ("key", "rolename"), [('1234', "mutations-inset-1234")]) ]) ] * 50 with TestPipeline() as p: # the total 50 mutation group size will be 2900 (58 * 50) # if we want to make two batches, so batch size should be 1450 (2900 / 2) # and each bach should contains 25 mutations. res = (p | beam.Create(mutation_group) | beam.ParDo( _BatchFn(max_batch_size_bytes=1450, max_number_rows=50, max_number_cells=500)) | beam.Map(lambda x: len(x))) assert_that(res, equal_to([25] * 2))
def test_batch_max_rows(self, mock_batch_snapshot_class, mock_batch_checkout): mutation_group = [ MutationGroup([ WriteMutation.insert("roles", ("key", "rolename"), [ ('1234', "mutations-inset-1234"), ('1235', "mutations-inset-1235"), ]) ]) ] * 50 with TestPipeline() as p: # There are total 50 mutation groups, each contains two rows. # The total number of rows will be 100 (50 * 2). # If each batch contains 10 rows max then batch count should be 10 # (contains 5 mutation groups each). res = (p | beam.Create(mutation_group) | beam.ParDo( _BatchFn(max_batch_size_bytes=1048576, max_number_rows=10, max_number_cells=500)) | beam.Map(lambda x: len(x))) assert_that(res, equal_to([5] * 10))
def test_spanner_bundles_size(self, mock_batch_snapshot_class, mock_batch_checkout): ks = spanner.KeySet(keys=[[1233], [1234]]) mutations = [ WriteMutation.delete("roles", ks), WriteMutation.insert("roles", ("key", "rolename"), [('1234', "mutations-inset-1234")]) ] * 50 p = TestPipeline() _ = (p | beam.Create(mutations) | WriteToSpanner(project_id=TEST_PROJECT_ID, instance_id=TEST_INSTANCE_ID, database_id=_generate_database_name(), max_batch_size_bytes=1024)) res = p.run() res.wait_until_finish() metric_results = res.metrics().query( MetricsFilter().with_name('SpannerBatches')) batches_counter = metric_results['counters'][0] self.assertEqual(batches_counter.committed, 53) self.assertEqual(batches_counter.attempted, 53)