def test_spanner_update(self): _prefex = 'test_update' # adding data to perform test instance = self._SPANNER_INSTANCE database = instance.database(self.TEST_DATABASE) data = [ (_prefex + '1', _prefex + 'inset-1'), (_prefex + '2', _prefex + 'inset-2'), (_prefex + '3', _prefex + 'inset-3'), ] with database.batch() as batch: batch.insert(table='Users', columns=('UserId', 'Key'), values=data) mutations_update = [ WriteMutation.update('Users', ('UserId', 'Key'), [(_prefex + '1', _prefex + 'update-1')]), WriteMutation.update('Users', ('UserId', 'Key'), [(_prefex + '2', _prefex + 'update-2')]), WriteMutation.delete('Users', spanner.KeySet(keys=[[_prefex + '3']])) ] p = beam.Pipeline(argv=self.args) _ = (p | beam.Create(mutations_update) | WriteToSpanner(project_id=self.project, instance_id=self.instance, database_id=self.TEST_DATABASE)) res = p.run() res.wait_until_finish() self.assertEqual(self._count_data(_prefex), 2)
def test_spanner_write_mutation_groups(self, mock_batch_snapshot_class, mock_batch_checkout): ks = spanner.KeySet(keys=[[1233], [1234]]) mutation_groups = [ MutationGroup([ WriteMutation.insert("roles", ("key", "rolename"), [('9001233', "mutations-inset-1233")]), WriteMutation.insert("roles", ("key", "rolename"), [('9001234', "mutations-inset-1234")]) ]), MutationGroup([ WriteMutation.update( "roles", ("key", "rolename"), [('9001234', "mutations-inset-9001233-updated")]) ]), MutationGroup([WriteMutation.delete("roles", ks)]) ] p = TestPipeline() _ = (p | beam.Create(mutation_groups) | WriteToSpanner(project_id=TEST_PROJECT_ID, instance_id=TEST_INSTANCE_ID, database_id=_generate_database_name(), max_batch_size_bytes=100)) res = p.run() res.wait_until_finish() metric_results = res.metrics().query( MetricsFilter().with_name('SpannerBatches')) batches_counter = metric_results['counters'][0] self.assertEqual(batches_counter.committed, 3) self.assertEqual(batches_counter.attempted, 3)
def test_metrics_error_call(self): if 'DirectRunner' not in self.runner_name: raise unittest.SkipTest('This test only runs with DirectRunner.') MetricsEnvironment.process_wide_container().reset() _prefix = 'test_write_batches' mutations = [ WriteMutation.insert('Albums', ('AlbumId', 'Name'), [(_prefix + '3', _prefix + 'inset-3')]), WriteMutation.insert('Albums', ('AlbumId', 'Name'), [(_prefix + '3', _prefix + 'inset-3')]), ] with self.assertRaises(Exception): p = beam.Pipeline(argv=self.args) _ = (p | beam.Create(mutations) | WriteToSpanner(project_id=self.project, instance_id=self.instance, database_id=self.TEST_DATABASE)) res = p.run() res.wait_until_finish() self.verify_write_call_metric(self.project, self.TEST_DATABASE, 'Albums', '400', 1)
def test_batch_max_cells(self, mock_batch_snapshot_class, mock_batch_checkout): mutation_group = [ MutationGroup([ WriteMutation.insert("roles", ("key", "rolename"), [ ('1234', "mutations-inset-1234"), ('1235', "mutations-inset-1235"), ]) ]) ] * 50 with TestPipeline() as p: # There are total 50 mutation groups, each contains two rows (or 4 cells). # The total number of cells will be 200 (50 groups * 4 cells). # If each batch contains 50 cells max then batch count should be 5. # 4 batches contains 12 mutations groups and the fifth batch should be # consists of 2 mutation group element. # No. of mutations groups per batch = Max Cells / Cells per mutation group # total_batches = Total Number of Cells / Max Cells res = (p | beam.Create(mutation_group) | beam.ParDo( _BatchFn(max_batch_size_bytes=1048576, max_number_rows=500, max_number_cells=50)) | beam.Map(lambda x: len(x))) assert_that(res, equal_to([12, 12, 12, 12, 2]))
def make_insert_mutations(element): import uuid # pylint: disable=reimported from apache_beam.io.gcp.experimental.spannerio import WriteMutation ins_mutation = WriteMutation.insert(table='test', columns=('id', 'data'), values=[(str(uuid.uuid1()), element)]) return [ins_mutation]
def test_spanner_error(self): mutations_update = [ WriteMutation.update('Users', ('UserId', 'Key'), [('INVALD_ID', 'Error-error')]), ] with self.assertRaises(Exception): p = beam.Pipeline(argv=self.args) _ = (p | beam.Create(mutations_update) | WriteToSpanner(project_id=self.project, instance_id=self.instance, database_id=self.TEST_DATABASE)) p.run()
def test_write_batches(self): _prefex = 'test_write_batches' mutations = [ WriteMutation.insert('Users', ('UserId', 'Key'), [(_prefex + '1', _prefex + 'inset-1')]), WriteMutation.insert('Users', ('UserId', 'Key'), [(_prefex + '2', _prefex + 'inset-2')]), WriteMutation.insert('Users', ('UserId', 'Key'), [(_prefex + '3', _prefex + 'inset-3')]), WriteMutation.insert('Users', ('UserId', 'Key'), [(_prefex + '4', _prefex + 'inset-4')]) ] p = beam.Pipeline(argv=self.args) _ = (p | beam.Create(mutations) | WriteToSpanner(project_id=self.project, instance_id=self.instance, database_id=self.TEST_DATABASE, max_batch_size_bytes=250)) res = p.run() res.wait_until_finish() self.assertEqual(self._count_data(_prefex), len(mutations))
def test_batch_disable(self, mock_batch_snapshot_class, mock_batch_checkout): mutation_group = [ MutationGroup([ WriteMutation.insert("roles", ("key", "rolename"), [('1234', "mutations-inset-1234")]) ]) ] * 4 with TestPipeline() as p: # to disable to batching, we need to set any of the batching parameters # either to lower value or zero res = (p | beam.Create(mutation_group) | beam.ParDo( _BatchFn(max_batch_size_bytes=1450, max_number_rows=0, max_number_cells=500)) | beam.Map(lambda x: len(x))) assert_that(res, equal_to([1] * 4))
def test_batch_byte_size(self, mock_batch_snapshot_class, mock_batch_checkout): # each mutation group byte size is 58 bytes. mutation_group = [ MutationGroup([ WriteMutation.insert("roles", ("key", "rolename"), [('1234', "mutations-inset-1234")]) ]) ] * 50 with TestPipeline() as p: # the total 50 mutation group size will be 2900 (58 * 50) # if we want to make two batches, so batch size should be 1450 (2900 / 2) # and each bach should contains 25 mutations. res = (p | beam.Create(mutation_group) | beam.ParDo( _BatchFn(max_batch_size_bytes=1450, max_number_rows=50, max_number_cells=500)) | beam.Map(lambda x: len(x))) assert_that(res, equal_to([25] * 2))
def test_batch_max_rows(self, mock_batch_snapshot_class, mock_batch_checkout): mutation_group = [ MutationGroup([ WriteMutation.insert("roles", ("key", "rolename"), [ ('1234', "mutations-inset-1234"), ('1235', "mutations-inset-1235"), ]) ]) ] * 50 with TestPipeline() as p: # There are total 50 mutation groups, each contains two rows. # The total number of rows will be 100 (50 * 2). # If each batch contains 10 rows max then batch count should be 10 # (contains 5 mutation groups each). res = (p | beam.Create(mutation_group) | beam.ParDo( _BatchFn(max_batch_size_bytes=1048576, max_number_rows=10, max_number_cells=500)) | beam.Map(lambda x: len(x))) assert_that(res, equal_to([5] * 10))
def test_write_mutation_error(self, *args): with self.assertRaises(ValueError): # since `WriteMutation` only accept one operation. WriteMutation(insert="table-name", update="table-name")