def main(): options = PipelineOptions() options.view_as(StandardOptions).streaming = True with beam.Pipeline(options=options) as p: events = p | GenerateEvent.sample_data() output = apply_transform(events) output | LogElements(with_window=True)
# Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information # regarding copyright ownership. The ASF licenses this file # to you under the Apache License, Version 2.0 (the # "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import apache_beam as beam from log_elements import LogElements with beam.Pipeline() as p: (p | beam.Create(range(1, 11)) | beam.combiners.Top.Largest(1) | LogElements())
# Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information # regarding copyright ownership. The ASF licenses this file # to you under the Apache License, Version 2.0 (the # "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import apache_beam as beam from log_elements import LogElements with beam.Pipeline() as p: wordsStartingWithA = \ p | 'Words starting with A' >> beam.Create(['apple', 'ant', 'arrow']) wordsStartingWithB = \ p | 'Words starting with B' >> beam.Create(['ball', 'book', 'bow']) ((wordsStartingWithA, wordsStartingWithB) | beam.Flatten() | LogElements())
import apache_beam as beam from generate_event import GenerateEvent from apache_beam.transforms.window import FixedWindows from apache_beam.transforms.trigger import AfterWatermark from apache_beam.transforms.trigger import AfterCount from apache_beam.transforms.trigger import AccumulationMode from apache_beam.utils.timestamp import Duration from apache_beam.options.pipeline_options import PipelineOptions from apache_beam.options.pipeline_options import StandardOptions from log_elements import LogElements class CountEventsWithAccumulating(beam.PTransform): def expand(self, events): return (events | beam.WindowInto( FixedWindows(1 * 24 * 60 * 60), # 1 Day Window trigger=AfterWatermark(early=AfterCount(1)), accumulation_mode=AccumulationMode.ACCUMULATING, allowed_lateness=Duration(seconds=0)) | beam.CombineGlobally( beam.combiners.CountCombineFn()).without_defaults()) options = PipelineOptions() options.view_as(StandardOptions).streaming = True with beam.Pipeline(options=options) as p: (p | GenerateEvent.sample_data() | CountEventsWithAccumulating() | LogElements(with_window=True))
# with the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import apache_beam as beam from log_elements import LogElements def partition_fn(number, num_partitions): if number > 100: return 0 else: return 1 with beam.Pipeline() as p: results = \ (p | beam.Create([1, 2, 3, 4, 5, 100, 110, 150, 250]) | beam.Partition(partition_fn, 2)) results[0] | 'Log numbers > 100' >> LogElements(prefix='Number > 100: ') results[1] | 'Log numbers <= 100' >> LogElements(prefix='Number <= 100: ')
def apply_transforms(fruits, countries): def map_to_alphabet_kv(word): return (word[0], word) def cogbk_result_to_wordsalphabet(cgbk_result): (alphabet, words) = cgbk_result return WordsAlphabet(alphabet, words['fruits'][0], words['countries'][0]) fruits_kv = (fruits | 'Fruit to KV' >> beam.Map(map_to_alphabet_kv)) countries_kv = (countries | 'Country to KV' >> beam.Map(map_to_alphabet_kv)) return ({ 'fruits': fruits_kv, 'countries': countries_kv } | beam.CoGroupByKey() | beam.Map(cogbk_result_to_wordsalphabet)) p = beam.Pipeline() fruits = p | 'Fruits' >> beam.Create(['apple', 'banana', 'cherry']) countries = p | 'Countries' >> beam.Create(['australia', 'brazil', 'canada']) (apply_transforms(fruits, countries) | LogElements()) p.run()
# or more contributor license agreements. See the NOTICE file # distributed with this work for additional information # regarding copyright ownership. The ASF licenses this file # to you under the Apache License, Version 2.0 (the # "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import apache_beam as beam from log_elements import LogElements p = beam.Pipeline() numbers = p | beam.Create([1, 2, 3, 4, 5]) mult5_results = numbers | beam.Map(lambda num: num * 5) mult10_results = numbers | beam.Map(lambda num: num * 10) mult5_results | 'Log multiply 5' >> LogElements(prefix='Multiplied by 5: ') mult10_results | 'Log multiply 10' >> LogElements(prefix='Multiplied by 10: ') p.run()
self.id = id self.event = event self.timestamp = timestamp def __str__(self) -> str: return f'Event({self.id}, {self.event}, {self.timestamp})' class AddTimestampDoFn(beam.DoFn): def process(self, element, **kwargs): unix_timestamp = element.timestamp.timestamp() yield window.TimestampedValue(element, unix_timestamp) with beam.Pipeline() as p: (p | beam.Create([ Event('1', 'book-order', datetime.datetime(2020, 3, 4, 0, 0, 0, 0, tzinfo=pytz.UTC)), Event('2', 'pencil-order', datetime.datetime(2020, 3, 5, 0, 0, 0, 0, tzinfo=pytz.UTC)), Event('3', 'paper-order', datetime.datetime(2020, 3, 6, 0, 0, 0, 0, tzinfo=pytz.UTC)), Event('4', 'pencil-order', datetime.datetime(2020, 3, 7, 0, 0, 0, 0, tzinfo=pytz.UTC)), Event('5', 'book-order', datetime.datetime(2020, 3, 8, 0, 0, 0, 0, tzinfo=pytz.UTC)), ]) | beam.ParDo(AddTimestampDoFn()) | LogElements(with_timestamp=True))
# - Multiple Outputs import apache_beam as beam from apache_beam import pvalue from log_elements import LogElements num_below_100_tag = 'num_below_100' num_above_100_tag = 'num_above_100' class ProcessNumbersDoFn(beam.DoFn): def process(self, element): if element <= 100: yield element else: yield pvalue.TaggedOutput(num_above_100_tag, element) with beam.Pipeline() as p: results = \ (p | beam.Create([10, 50, 120, 20, 200, 0]) | beam.ParDo(ProcessNumbersDoFn()) .with_outputs(num_above_100_tag, main=num_below_100_tag)) results[num_below_100_tag] | 'Log numbers <= 100' >> LogElements( prefix='Number <= 100: ') results[num_above_100_tag] | 'Log numbers > 100' >> LogElements( prefix='Number > 100: ')
# or more contributor license agreements. See the NOTICE file # distributed with this work for additional information # regarding copyright ownership. The ASF licenses this file # to you under the Apache License, Version 2.0 (the # "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # beam-playground: # name: HelloBeam # description: Task from katas to create a simple pipeline that takes a hardcoded input element "Hello Beam". # multifile: false # pipeline_options: # categories: # - Testing import apache_beam as beam from log_elements import LogElements with beam.Pipeline() as p: (p | beam.Create(['Hello Beam']) | LogElements())
# Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information # regarding copyright ownership. The ASF licenses this file # to you under the Apache License, Version 2.0 (the # "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import apache_beam as beam from log_elements import LogElements p = beam.Pipeline() (p | beam.Create(range(1, 11)) | beam.combiners.Top.Largest(1) | LogElements()) p.run()
# or more contributor license agreements. See the NOTICE file # distributed with this work for additional information # regarding copyright ownership. The ASF licenses this file # to you under the Apache License, Version 2.0 (the # "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # beam-playground: # name: AggregationSum # description: Task from katas to compute the sum of all elements. # multifile: false # context_line: 29 # categories: # - Combiners import apache_beam as beam from log_elements import LogElements with beam.Pipeline() as p: (p | beam.Create(range(1, 11)) | beam.CombineGlobally(sum) | LogElements())
# "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import apache_beam as beam from log_elements import LogElements def sum(numbers): total = 0 for num in numbers: total += num return total p = beam.Pipeline() (p | beam.Create([1, 2, 3, 4, 5]) | beam.CombineGlobally(sum) | LogElements()) p.run()