def test_batch_max_cells(self, mock_batch_snapshot_class, mock_batch_checkout): mutation_group = [ MutationGroup([ WriteMutation.insert("roles", ("key", "rolename"), [ ('1234', "mutations-inset-1234"), ('1235', "mutations-inset-1235"), ]) ]) ] * 50 with TestPipeline() as p: # There are total 50 mutation groups, each contains two rows (or 4 cells). # The total number of cells will be 200 (50 groups * 4 cells). # If each batch contains 50 cells max then batch count should be 5. # 4 batches contains 12 mutations groups and the fifth batch should be # consists of 2 mutation group element. # No. of mutations groups per batch = Max Cells / Cells per mutation group # total_batches = Total Number of Cells / Max Cells res = (p | beam.Create(mutation_group) | beam.ParDo( _BatchFn(max_batch_size_bytes=1048576, max_number_rows=500, max_number_cells=50)) | beam.Map(lambda x: len(x))) assert_that(res, equal_to([12, 12, 12, 12, 2]))
def test_batch_disable(self, mock_batch_snapshot_class, mock_batch_checkout): mutation_group = [ MutationGroup([ WriteMutation.insert("roles", ("key", "rolename"), [('1234', "mutations-inset-1234")]) ]) ] * 4 with TestPipeline() as p: # to disable to batching, we need to set any of the batching parameters # either to lower value or zero res = (p | beam.Create(mutation_group) | beam.ParDo( _BatchFn(max_batch_size_bytes=1450, max_number_rows=0, max_number_cells=500)) | beam.Map(lambda x: len(x))) assert_that(res, equal_to([1] * 4))
def test_batch_byte_size(self, mock_batch_snapshot_class, mock_batch_checkout): # each mutation group byte size is 58 bytes. mutation_group = [ MutationGroup([ WriteMutation.insert("roles", ("key", "rolename"), [('1234', "mutations-inset-1234")]) ]) ] * 50 with TestPipeline() as p: # the total 50 mutation group size will be 2900 (58 * 50) # if we want to make two batches, so batch size should be 1450 (2900 / 2) # and each bach should contains 25 mutations. res = (p | beam.Create(mutation_group) | beam.ParDo( _BatchFn(max_batch_size_bytes=1450, max_number_rows=50, max_number_cells=500)) | beam.Map(lambda x: len(x))) assert_that(res, equal_to([25] * 2))
def test_batch_max_rows(self, mock_batch_snapshot_class, mock_batch_checkout): mutation_group = [ MutationGroup([ WriteMutation.insert("roles", ("key", "rolename"), [ ('1234', "mutations-inset-1234"), ('1235', "mutations-inset-1235"), ]) ]) ] * 50 with TestPipeline() as p: # There are total 50 mutation groups, each contains two rows. # The total number of rows will be 100 (50 * 2). # If each batch contains 10 rows max then batch count should be 10 # (contains 5 mutation groups each). res = (p | beam.Create(mutation_group) | beam.ParDo( _BatchFn(max_batch_size_bytes=1048576, max_number_rows=10, max_number_cells=500)) | beam.Map(lambda x: len(x))) assert_that(res, equal_to([5] * 10))