def test_create_do_with_collection_side_bigquery_write(self): elements = ['aa', 'bb'] side_elements = ['x', 'y'] output_buffer = [] patch_target = 'google.cloud.dataflow.io.bigquery.BigQueryReader' with mock.patch(target=patch_target) as mock_class: # Setup the reader so it will yield the values in 'side_elements'. reader_mock = mock_class.return_value reader_mock.__enter__.return_value = reader_mock # Use a lambda so that multiple readers can be created, each reading the # entirety of the side elements. reader_mock.__iter__.side_effect = lambda: (x for x in side_elements) executor.MapTaskExecutor().execute( make_map_task([ maptask.WorkerRead(inmemory.InMemorySource( elements=[pickler.dumps(e) for e in elements], start_index=0, end_index=3), output_coders=[self.OUTPUT_CODER]), maptask. WorkerDoFn(serialized_fn=pickle_with_side_inputs( ptransform.CallableWrapperDoFn( lambda x, side: ['%s:%s' % (x, s) for s in side]), tag_and_type=('bigquery', pvalue.IterablePCollectionView, ())), output_tags=['out'], input=(0, 0), side_inputs=[ maptask.WorkerSideInputSource( bigquery.BigQuerySource( project='project', dataset='dataset', table='table', coder=get_bigquery_source_coder()), tag='bigquery') ], output_coders=[self.OUTPUT_CODER]), maptask.WorkerInMemoryWrite( output_buffer=output_buffer, input=(1, 0), output_coders=(self.OUTPUT_CODER, )) ])) # The side source was specified as collection therefore we should see # all elements of the side source. self.assertEqual(['aa:x', 'aa:y', 'bb:x', 'bb:y'], sorted(output_buffer))
def test_create_do_with_singleton_side_bigquery_write(self): elements = ['abc', 'def', 'ghi'] side_elements = ['x', 'y', 'z'] output_buffer = [] patch_target = 'google.cloud.dataflow.io.bigquery.BigQueryReader' with mock.patch(target=patch_target) as mock_class: # Setup the reader so it will yield the values in 'side_elements'. reader_mock = mock_class.return_value reader_mock.__enter__.return_value = reader_mock reader_mock.__iter__.return_value = (x for x in side_elements) pickled_elements = [pickler.dumps(e) for e in elements] executor.MapTaskExecutor().execute(make_map_task([ maptask.WorkerRead( inmemory.InMemorySource(elements=pickled_elements, start_index=0, end_index=3), output_coders=[self.OUTPUT_CODER]), maptask.WorkerDoFn( serialized_fn=pickle_with_side_inputs( ptransform.CallableWrapperDoFn( lambda x, side: ['%s:%s' % (x, side)]), tag_and_type=('bigquery', pvalue.SingletonPCollectionView, (False, None))), output_tags=['out'], input=(0, 0), side_inputs=[ maptask.WorkerSideInputSource( bigquery.BigQuerySource( project='project', dataset='dataset', table='table', coder=get_bigquery_source_coder()), tag='bigquery')], output_coders=[self.OUTPUT_CODER]), maptask.WorkerInMemoryWrite( output_buffer=output_buffer, input=(1, 0), output_coders=(self.OUTPUT_CODER,))])) # The side source was specified as singleton therefore we should see # only the first element appended. self.assertEqual(['abc:x', 'def:x', 'ghi:x'], output_buffer)