Example #1
0
    def test_batch_max_cells(self, mock_batch_snapshot_class,
                             mock_batch_checkout):

        mutation_group = [
            MutationGroup([
                WriteMutation.insert("roles", ("key", "rolename"), [
                    ('1234', "mutations-inset-1234"),
                    ('1235', "mutations-inset-1235"),
                ])
            ])
        ] * 50

        with TestPipeline() as p:
            # There are total 50 mutation groups, each contains two rows (or 4 cells).
            # The total number of cells will be 200 (50 groups * 4 cells).
            # If each batch contains 50 cells max then batch count should be 5.
            # 4 batches contains 12 mutations groups and the fifth batch should be
            # consists of 2 mutation group element.
            # No. of mutations groups per batch = Max Cells / Cells per mutation group
            # total_batches = Total Number of Cells / Max Cells
            res = (p | beam.Create(mutation_group)
                   | beam.ParDo(
                       _BatchFn(max_batch_size_bytes=1048576,
                                max_number_rows=500,
                                max_number_cells=50))
                   | beam.Map(lambda x: len(x)))
            assert_that(res, equal_to([12, 12, 12, 12, 2]))
Example #2
0
    def test_batch_disable(self, mock_batch_snapshot_class,
                           mock_batch_checkout):

        mutation_group = [
            MutationGroup([
                WriteMutation.insert("roles", ("key", "rolename"),
                                     [('1234', "mutations-inset-1234")])
            ])
        ] * 4

        with TestPipeline() as p:
            # to disable to batching, we need to set any of the batching parameters
            # either to lower value or zero
            res = (p | beam.Create(mutation_group)
                   | beam.ParDo(
                       _BatchFn(max_batch_size_bytes=1450,
                                max_number_rows=0,
                                max_number_cells=500))
                   | beam.Map(lambda x: len(x)))
            assert_that(res, equal_to([1] * 4))
Example #3
0
    def test_batch_byte_size(self, mock_batch_snapshot_class,
                             mock_batch_checkout):

        # each mutation group byte size is 58 bytes.
        mutation_group = [
            MutationGroup([
                WriteMutation.insert("roles", ("key", "rolename"),
                                     [('1234', "mutations-inset-1234")])
            ])
        ] * 50

        with TestPipeline() as p:
            # the total 50 mutation group size will be 2900 (58 * 50)
            # if we want to make two batches, so batch size should be 1450 (2900 / 2)
            # and each bach should contains 25 mutations.
            res = (p | beam.Create(mutation_group)
                   | beam.ParDo(
                       _BatchFn(max_batch_size_bytes=1450,
                                max_number_rows=50,
                                max_number_cells=500))
                   | beam.Map(lambda x: len(x)))
            assert_that(res, equal_to([25] * 2))
Example #4
0
    def test_batch_max_rows(self, mock_batch_snapshot_class,
                            mock_batch_checkout):

        mutation_group = [
            MutationGroup([
                WriteMutation.insert("roles", ("key", "rolename"), [
                    ('1234', "mutations-inset-1234"),
                    ('1235', "mutations-inset-1235"),
                ])
            ])
        ] * 50

        with TestPipeline() as p:
            # There are total 50 mutation groups, each contains two rows.
            # The total number of rows will be 100 (50 * 2).
            # If each batch contains 10 rows max then batch count should be 10
            # (contains 5 mutation groups each).
            res = (p | beam.Create(mutation_group)
                   | beam.ParDo(
                       _BatchFn(max_batch_size_bytes=1048576,
                                max_number_rows=10,
                                max_number_cells=500))
                   | beam.Map(lambda x: len(x)))
            assert_that(res, equal_to([5] * 10))