Esempio n. 1
0
        def test_external_transforms(self):
            # TODO Move expansion address resides into PipelineOptions
            def get_expansion_service():
                return "localhost:" + str(self.expansion_port)

            with self.create_pipeline() as p:
                res = (p
                       | GenerateSequence(
                           start=1,
                           stop=10,
                           expansion_service=get_expansion_service()))

                assert_that(res, equal_to([i for i in range(1, 10)]))

            # We expect to fail here because we do not have a Kafka cluster handy.
            # Nevertheless, we check that the transform is expanded by the
            # ExpansionService and that the pipeline fails during execution.
            with self.assertRaises(Exception) as ctx:
                with self.create_pipeline() as p:
                    # pylint: disable=expression-not-assigned
                    (p
                     |
                     ReadFromKafka(consumer_config={
                         'bootstrap.servers':
                         'notvalid1:7777, notvalid2:3531'
                     },
                                   topics=['topic1', 'topic2'],
                                   key_deserializer='org.apache.kafka.'
                                   'common.serialization.'
                                   'ByteArrayDeserializer',
                                   value_deserializer='org.apache.kafka.'
                                   'common.serialization.'
                                   'LongDeserializer',
                                   expansion_service=get_expansion_service()))
            self.assertTrue(
                'No resolvable bootstrap urls given in bootstrap.servers'
                in str(ctx.exception),
                'Expected to fail due to invalid bootstrap.servers, but '
                'failed due to:\n%s' % str(ctx.exception))

            # We just test the expansion but do not execute.
            # pylint: disable=expression-not-assigned
            (self.create_pipeline()
             | Impulse()
             | Map(lambda input: (1, input))
             | WriteToKafka(producer_config={
                 'bootstrap.servers':
                 'localhost:9092, notvalid2:3531'
             },
                            topic='topic1',
                            key_serializer='org.apache.kafka.'
                            'common.serialization.'
                            'LongSerializer',
                            value_serializer='org.apache.kafka.'
                            'common.serialization.'
                            'ByteArraySerializer',
                            expansion_service=get_expansion_service()))
Esempio n. 2
0
def run_pipeline():
  with beam.Pipeline() as p:
    (p
     | beam.Create([{'a' : 'alpha'}, {'b' : 'beta'}])
     | 'Convert dict to byte string' >> beam.Map(lambda x: (b'', json.dumps(x).encode('utf-8')))
     | beam.Map(lambda x : x).with_output_types(typing.Tuple[bytes, bytes])
#     | beam.Map(print)
     | WriteToKafka(producer_config = {'bootstrap.servers': brokers}
                            , topic=kafka_topic)
    )
Esempio n. 3
0
 def build_write_pipeline(self, pipeline):
     _ = (
         pipeline
         | 'Impulse' >> beam.Impulse()
         | 'Generate' >> beam.FlatMap(lambda x: range(1000))  # pylint: disable=range-builtin-not-iterating
         | 'Reshuffle' >> beam.Reshuffle()
         | 'MakeKV' >> beam.Map(lambda x:
                                (b'', str(x).encode())).with_output_types(
                                    typing.Tuple[bytes, bytes])
         | 'WriteToKafka' >> WriteToKafka(
             producer_config={'bootstrap.servers': self.bootstrap_servers},
             topic=self.topic,
             expansion_service=self.expansion_service))