import apache_beam as beam from apache_beam.testing.test_pipeline import TestPipeline def test_simple_pipeline(): input_file = 'input.txt' output_file = 'output.txt' input_data = ['Hello', 'World'] # Create a TestPipeline object with TestPipeline() as p: # Read the input data from a file input_pc = p | 'Read Input' >> beam.io.ReadFromText(input_file) # Write the data to an output file output_pc = input_pc | 'Write Output' >> beam.io.WriteToText(output_file) # Assert that the output file contains the input data assert set(open(output_file).read().strip().split()) == set(input_data)
import apache_beam as beam from apache_beam.testing.test_pipeline import TestPipeline class FilterElements(beam.DoFn): def process(self, element): if len(element) > 3: yield element def test_filter_pipeline(): input_data = ['Hello', 'World', 'Beam', 'Test', 'Pipeline'] # Create a TestPipeline object with TestPipeline() as p: # Create a PCollection from the input data input_pc = p | 'Create Input' >> beam.Create(input_data) # Apply a ParDo transform to filter out elements filtered_pc = input_pc | 'Filter Elements' >> beam.ParDo(FilterElements()) # Assert that the filtered PCollection contains the expected elements assert set(filtered_pc) == {'World', 'Beam', 'Pipeline'}In this example, we use the TestPipeline class to test a pipeline that applies a ParDo transform to filter out elements with a length greater than 3. We define the FilterElements class as a subclass of beam.DoFn and implement the process() method to filter out elements. We define the pipeline within the with statement and apply the necessary transforms. Finally, we use the assert statement to verify that the filtered PCollection contains the expected elements. Package library: apache_beam.testing