예제 #1
0
    def test_batch_max_cells(self, mock_batch_snapshot_class,
                             mock_batch_checkout):

        mutation_group = [
            MutationGroup([
                WriteMutation.insert("roles", ("key", "rolename"), [
                    ('1234', "mutations-inset-1234"),
                    ('1235', "mutations-inset-1235"),
                ])
            ])
        ] * 50

        with TestPipeline() as p:
            # There are total 50 mutation groups, each contains two rows (or 4 cells).
            # The total number of cells will be 200 (50 groups * 4 cells).
            # If each batch contains 50 cells max then batch count should be 5.
            # 4 batches contains 12 mutations groups and the fifth batch should be
            # consists of 2 mutation group element.
            # No. of mutations groups per batch = Max Cells / Cells per mutation group
            # total_batches = Total Number of Cells / Max Cells
            res = (p | beam.Create(mutation_group)
                   | beam.ParDo(
                       _BatchFn(max_batch_size_bytes=1048576,
                                max_number_rows=500,
                                max_number_cells=50))
                   | beam.Map(lambda x: len(x)))
            assert_that(res, equal_to([12, 12, 12, 12, 2]))
 def make_insert_mutations(element):
     import uuid  # pylint: disable=reimported
     from apache_beam.io.gcp.experimental.spannerio import WriteMutation
     ins_mutation = WriteMutation.insert(table='test',
                                         columns=('id', 'data'),
                                         values=[(str(uuid.uuid1()),
                                                  element)])
     return [ins_mutation]
    def test_write_batches(self):
        _prefex = 'test_write_batches'
        mutations = [
            WriteMutation.insert('Users', ('UserId', 'Key'),
                                 [(_prefex + '1', _prefex + 'inset-1')]),
            WriteMutation.insert('Users', ('UserId', 'Key'),
                                 [(_prefex + '2', _prefex + 'inset-2')]),
            WriteMutation.insert('Users', ('UserId', 'Key'),
                                 [(_prefex + '3', _prefex + 'inset-3')]),
            WriteMutation.insert('Users', ('UserId', 'Key'),
                                 [(_prefex + '4', _prefex + 'inset-4')])
        ]

        p = beam.Pipeline(argv=self.args)
        _ = (p | beam.Create(mutations)
             | WriteToSpanner(project_id=self.project,
                              instance_id=self.instance,
                              database_id=self.TEST_DATABASE,
                              max_batch_size_bytes=250))

        res = p.run()
        res.wait_until_finish()
        self.assertEqual(self._count_data(_prefex), len(mutations))
예제 #4
0
    def test_metrics_ok_call(self):
        if 'DirectRunner' not in self.runner_name:
            raise unittest.SkipTest('This test only runs with DirectRunner.')

        MetricsEnvironment.process_wide_container().reset()
        _prefix = 'test_write_batches'
        mutations = [
            WriteMutation.insert('Albums', ('AlbumId', 'Name'),
                                 [(_prefix + '1', _prefix + 'inset-1')]),
            WriteMutation.insert('Albums', ('AlbumId', 'Name'),
                                 [(_prefix + '2', _prefix + 'inset-2')]),
        ]

        p = beam.Pipeline(argv=self.args)
        _ = (p | beam.Create(mutations)
             | WriteToSpanner(project_id=self.project,
                              instance_id=self.instance,
                              database_id=self.TEST_DATABASE))

        res = p.run()
        res.wait_until_finish()

        self.verify_write_call_metric(self.project, self.TEST_DATABASE,
                                      'Albums', 'ok', 1)
예제 #5
0
    def test_batch_disable(self, mock_batch_snapshot_class,
                           mock_batch_checkout):

        mutation_group = [
            MutationGroup([
                WriteMutation.insert("roles", ("key", "rolename"),
                                     [('1234', "mutations-inset-1234")])
            ])
        ] * 4

        with TestPipeline() as p:
            # to disable to batching, we need to set any of the batching parameters
            # either to lower value or zero
            res = (p | beam.Create(mutation_group)
                   | beam.ParDo(
                       _BatchFn(max_batch_size_bytes=1450,
                                max_number_rows=0,
                                max_number_cells=500))
                   | beam.Map(lambda x: len(x)))
            assert_that(res, equal_to([1] * 4))
예제 #6
0
    def test_batch_byte_size(self, mock_batch_snapshot_class,
                             mock_batch_checkout):

        # each mutation group byte size is 58 bytes.
        mutation_group = [
            MutationGroup([
                WriteMutation.insert("roles", ("key", "rolename"),
                                     [('1234', "mutations-inset-1234")])
            ])
        ] * 50

        with TestPipeline() as p:
            # the total 50 mutation group size will be 2900 (58 * 50)
            # if we want to make two batches, so batch size should be 1450 (2900 / 2)
            # and each bach should contains 25 mutations.
            res = (p | beam.Create(mutation_group)
                   | beam.ParDo(
                       _BatchFn(max_batch_size_bytes=1450,
                                max_number_rows=50,
                                max_number_cells=500))
                   | beam.Map(lambda x: len(x)))
            assert_that(res, equal_to([25] * 2))
예제 #7
0
    def test_batch_max_rows(self, mock_batch_snapshot_class,
                            mock_batch_checkout):

        mutation_group = [
            MutationGroup([
                WriteMutation.insert("roles", ("key", "rolename"), [
                    ('1234', "mutations-inset-1234"),
                    ('1235', "mutations-inset-1235"),
                ])
            ])
        ] * 50

        with TestPipeline() as p:
            # There are total 50 mutation groups, each contains two rows.
            # The total number of rows will be 100 (50 * 2).
            # If each batch contains 10 rows max then batch count should be 10
            # (contains 5 mutation groups each).
            res = (p | beam.Create(mutation_group)
                   | beam.ParDo(
                       _BatchFn(max_batch_size_bytes=1048576,
                                max_number_rows=10,
                                max_number_cells=500))
                   | beam.Map(lambda x: len(x)))
            assert_that(res, equal_to([5] * 10))
예제 #8
0
    def test_spanner_bundles_size(self, mock_batch_snapshot_class,
                                  mock_batch_checkout):
        ks = spanner.KeySet(keys=[[1233], [1234]])
        mutations = [
            WriteMutation.delete("roles", ks),
            WriteMutation.insert("roles", ("key", "rolename"),
                                 [('1234', "mutations-inset-1234")])
        ] * 50
        p = TestPipeline()
        _ = (p
             | beam.Create(mutations)
             | WriteToSpanner(project_id=TEST_PROJECT_ID,
                              instance_id=TEST_INSTANCE_ID,
                              database_id=_generate_database_name(),
                              max_batch_size_bytes=1024))
        res = p.run()
        res.wait_until_finish()

        metric_results = res.metrics().query(
            MetricsFilter().with_name('SpannerBatches'))
        batches_counter = metric_results['counters'][0]

        self.assertEqual(batches_counter.committed, 53)
        self.assertEqual(batches_counter.attempted, 53)