def test_implicit_payload_builder_with_bytes(self): values = PayloadBase.bytes_values builder = ImplicitSchemaPayloadBuilder(values) result = builder.build() if sys.version_info[0] < 3: # in python 2.x bytes coder will be inferred args = { 'integer_example': ConfigValue( coder_urn=['beam:coder:varint:v1'], payload=VarIntCoder() .get_impl().encode_nested(values['integer_example'])), 'string_example': ConfigValue( coder_urn=['beam:coder:bytes:v1'], payload=StrUtf8Coder() .get_impl().encode_nested(values['string_example'])), 'list_of_strings': ConfigValue( coder_urn=['beam:coder:iterable:v1', 'beam:coder:bytes:v1'], payload=IterableCoder(StrUtf8Coder()) .get_impl().encode_nested(values['list_of_strings'])), 'optional_kv': ConfigValue( coder_urn=['beam:coder:kv:v1', 'beam:coder:bytes:v1', 'beam:coder:double:v1'], payload=TupleCoder([StrUtf8Coder(), FloatCoder()]) .get_impl().encode_nested(values['optional_kv'])), } expected = get_payload(args) self.assertEqual(result, expected) else: expected = get_payload(PayloadBase.args) self.assertEqual(result, expected)
def test_implicit_payload_builder(self): builder = ImplicitSchemaPayloadBuilder(PayloadBase.values) result = builder.build() decoded = RowCoder(result.schema).decode(result.payload) for key, value in PayloadBase.values.items(): # Note the default value in the getattr call. # ImplicitSchemaPayloadBuilder omits fields with valu=None since their # type cannot be inferred. self.assertEqual(getattr(decoded, key, None), value)
def test_external_empty_spec_translation(self): pipeline = beam.Pipeline() external_transform = beam.ExternalTransform( 'beam:transforms:xlang:test:prefix', ImplicitSchemaPayloadBuilder({'data': u'0'}), expansion_service.ExpansionServiceServicer()) _ = (pipeline | beam.Create(['a', 'b']) | external_transform) pipeline.run().wait_until_finish() external_transform_label = ( 'ExternalTransform(beam:transforms:xlang:test:prefix)/TestLabel') for transform in external_transform._expanded_components.transforms.values( ): # We clear the spec of one of the external transforms. if transform.unique_name == external_transform_label: transform.spec.Clear() context = pipeline_context.PipelineContext() proto_pipeline = pipeline.to_runner_api(context=context) proto_transform = None for transform in proto_pipeline.components.transforms.values(): if (transform.unique_name == 'ExternalTransform(beam:transforms:xlang:test:prefix)/TestLabel' ): proto_transform = transform self.assertIsNotNone(proto_transform) self.assertTrue(str(proto_transform).strip().find('spec {') == -1)
def test_xlang_parquetio_write(self): expansion_jar = os.environ.get('EXPANSION_JAR') port = os.environ.get('EXPANSION_PORT') address = 'localhost:%s' % port try: with TestPipeline() as p: p.get_pipeline_options().view_as( DebugOptions).experiments.append('jar_packages=' + expansion_jar) p.not_use_test_runner_api = True _ = p \ | beam.Create([ AvroRecord({"name": "abc"}), AvroRecord({"name": "def"}), AvroRecord({"name": "ghi"})]) \ | beam.ExternalTransform( PARQUET_WRITE_URN, ImplicitSchemaPayloadBuilder({'data': u'/tmp/test.parquet'}), address) except RuntimeError as e: if re.search(PARQUET_WRITE_URN, str(e)): print( "looks like URN not implemented in expansion service, skipping." ) else: raise e
def test_implicit_payload_builder_with_bytes(self): values = PayloadBase.bytes_values builder = ImplicitSchemaPayloadBuilder(values) result = builder.build() decoded = RowCoder(result.schema).decode(result.payload) for key, value in PayloadBase.values.items(): # Note the default value in the getattr call. # ImplicitSchemaPayloadBuilder omits fields with valu=None since their # type cannot be inferred. self.assertEqual(getattr(decoded, key, None), value) # Verify we have not modified a cached type (BEAM-10766) # TODO(BEAM-7372): Remove when bytes coercion code is removed. self.assertEqual(typehints.List[bytes], convert_to_beam_type(typing.List[bytes]))
def run_pipeline(pipeline_options, expansion_service, wait_until_finish=True): # The actual definitions of these transforms is in # org.apache.beam.runners.core.construction.TestExpansionService. TEST_COUNT_URN = "beam:transforms:xlang:count" TEST_FILTER_URN = "beam:transforms:xlang:filter_less_than_eq" # Run a simple count-filtered-letters pipeline. p = TestPipeline(options=pipeline_options) if isinstance(expansion_service, int): # Only the port was specified. expansion_service = 'localhost:%s' % str(expansion_service) res = (p | beam.Create(list('aaabccxyyzzz')) | beam.Map(unicode) | beam.ExternalTransform( TEST_FILTER_URN, ImplicitSchemaPayloadBuilder({'data': u'middle'}), expansion_service) | beam.ExternalTransform(TEST_COUNT_URN, None, expansion_service) | beam.Map(lambda kv: '%s: %s' % kv)) assert_that(res, equal_to(['a: 3', 'b: 1', 'c: 2'])) result = p.run() if wait_until_finish: result.wait_until_finish()
def run_prefix(self, pipeline): with pipeline as p: res = (p | beam.Create(['a', 'b']).with_output_types(unicode) | beam.ExternalTransform( TEST_PREFIX_URN, ImplicitSchemaPayloadBuilder({'data': u'0'}), self.expansion_service)) assert_that(res, equal_to(['0a', '0b']))
def test_implicit_payload_builder_with_bytes(self): values = PayloadBase.bytes_values builder = ImplicitSchemaPayloadBuilder(values) result = builder.build() decoded = RowCoder(result.schema).decode(result.payload) if sys.version_info[0] < 3: for key, value in PayloadBase.bytes_values.items(): # Note the default value in the getattr call. # ImplicitSchemaPayloadBuilder omits fields with valu=None since their # type cannot be inferred. self.assertEqual(getattr(decoded, key, None), value) else: for key, value in PayloadBase.values.items(): # Note the default value in the getattr call. # ImplicitSchemaPayloadBuilder omits fields with valu=None since their # type cannot be inferred. self.assertEqual(getattr(decoded, key, None), value)
def test_external_transform_finder_leaf(self): pipeline = beam.Pipeline() _ = (pipeline | beam.Create(['a', 'b']) | beam.ExternalTransform( 'beam:transforms:xlang:test:nooutput', ImplicitSchemaPayloadBuilder({'data': u'0'}), expansion_service.ExpansionServiceServicer())) pipeline.run().wait_until_finish() self.assertTrue(pipeline.contains_external_transforms)
def test_prefix(self): test_pipeline = TestPipeline() test_pipeline.get_pipeline_options().view_as( DebugOptions).experiments.append( 'jar_packages=' + ValidateRunnerXlangTest.expansion_jar) test_pipeline.not_use_test_runner_api = True with test_pipeline as p: res = (p | beam.Create(['a', 'b']).with_output_types(unicode) | beam.ExternalTransform( TEST_PREFIX_URN, ImplicitSchemaPayloadBuilder({'data': u'0'}), ValidateRunnerXlangTest.expansion_service)) assert_that(res, equal_to(['0a', '0b']))
def __init__(self, start, stop=None, elements_per_period=None, max_read_time=None, expansion_service=None): super(GenerateSequence, self).__init__( self.URN, ImplicitSchemaPayloadBuilder( { 'start': start, 'stop': stop, 'elements_per_period': elements_per_period, 'max_read_time': max_read_time, } ), expansion_service)
def test_as_external_transform_no_kwargs(self): with FullyQualifiedNamedTransform.with_filter('*'): with beam.Pipeline() as p: assert_that( p | beam.Create(['a', 'b', 'c']) | beam.ExternalTransform( PYTHON_FULLY_QUALIFIED_NAMED_TRANSFORM_URN, ImplicitSchemaPayloadBuilder({ 'constructor': 'apache_beam.transforms' '.fully_qualified_named_transform_test._TestTransform', 'args': beam.Row(arg0='x', arg1='y'), }), expansion_service.ExpansionServiceServicer()), equal_to(['xay', 'xby', 'xcy']))
def run(input_path, output_path, expansion_service_port, pipeline_args): pipeline_options = PipelineOptions(pipeline_args) with beam.Pipeline(options=pipeline_options) as p: input = p | 'Read' >> ReadFromText(input_path).with_output_types(str) java_output = (input | 'JavaPrefix' >> beam.ExternalTransform( 'beam:transform:org.apache.beam:javaprefix:v1', ImplicitSchemaPayloadBuilder({'prefix': 'java:'}), ('localhost:%s' % expansion_service_port))) def python_prefix(record): return 'python:%s' % record output = java_output | 'PythonPrefix' >> beam.Map(python_prefix) output | 'Write' >> WriteToText(output_path)
def test_pipeline_generation_with_runner_overrides(self): pipeline_properties = [ '--dataflow_endpoint=ignored', '--job_name=test-job', '--project=test-project', '--staging_location=ignored', '--temp_location=/dev/null', '--no_auth', '--dry_run=True', '--sdk_location=container', '--runner=DataflowRunner', '--streaming' ] with beam.Pipeline(options=PipelineOptions(pipeline_properties)) as p: _ = ( p | beam.io.ReadFromPubSub( subscription= 'projects/dummy-project/subscriptions/dummy-subscription') | beam.ExternalTransform( 'beam:transforms:xlang:test:prefix', ImplicitSchemaPayloadBuilder({'data': u'0'}), expansion_service.ExpansionServiceServicer())) pipeline_proto, _ = p.to_runner_api(return_context=True) pubsub_read_transform = None external_transform = None proto_transforms = pipeline_proto.components.transforms for id in proto_transforms: if 'beam:transforms:xlang:test:prefix' in proto_transforms[ id].unique_name: external_transform = proto_transforms[id] if 'ReadFromPubSub' in proto_transforms[id].unique_name: pubsub_read_transform = proto_transforms[id] if not (pubsub_read_transform and external_transform): raise ValueError( 'Could not find an external transform and the PubSub read transform ' 'in the pipeline') self.assertEqual(1, len(list(pubsub_read_transform.outputs.values()))) self.assertEqual( list(pubsub_read_transform.outputs.values()), list(external_transform.inputs.values()))
def run_prefix(self, pipeline): """ Target transform - ParDo (https://beam.apache.org/documentation/programming-guide/#pardo) Test scenario - Mapping elements from a single input collection to a single output collection Boundary conditions checked - - PCollection<?> to external transforms - PCollection<?> from external transforms """ with pipeline as p: res = (p | beam.Create(['a', 'b']).with_output_types(unicode) | beam.ExternalTransform( TEST_PREFIX_URN, ImplicitSchemaPayloadBuilder({'data': u'0'}), self.expansion_service)) assert_that(res, equal_to(['0a', '0b']))
def test_pipeline_generation(self): pipeline = beam.Pipeline() _ = (pipeline | beam.Create(['a', 'b']) | beam.ExternalTransform( 'beam:transforms:xlang:test:prefix', ImplicitSchemaPayloadBuilder({'data': u'0'}), expansion_service.ExpansionServiceServicer())) proto, _ = pipeline.to_runner_api(return_context=True) pipeline_from_proto = Pipeline.from_runner_api(proto, pipeline.runner, pipeline._options) # Original pipeline has the un-expanded external transform self.assertEqual([], pipeline.transforms_stack[0].parts[1].parts) # new pipeline has the expanded external transform self.assertNotEqual( [], pipeline_from_proto.transforms_stack[0].parts[1].parts) self.assertEqual( u'ExternalTransform(beam:transforms:xlang:test:prefix)/TestLabel', pipeline_from_proto.transforms_stack[0].parts[1].parts[0]. full_label)
def to_runner_api_parameter(self, unused_context): return TEST_PREFIX_URN, ImplicitSchemaPayloadBuilder({ 'data': self._payload }).payload()
def test_implicit_payload_builder(self): builder = ImplicitSchemaPayloadBuilder(PayloadBase.values) result = builder.build() expected = get_payload(PayloadBase.args) self.assertEqual(result, expected)