Esempio n. 1
0
def main():
    options = PipelineOptions()
    options.view_as(StandardOptions).streaming = True
    with beam.Pipeline(options=options) as p:
        events = p | GenerateEvent.sample_data()
        output = apply_transform(events)
        output | LogElements(with_window=True)
Esempio n. 2
0
#   Licensed to the Apache Software Foundation (ASF) under one
#   or more contributor license agreements.  See the NOTICE file
#   distributed with this work for additional information
#   regarding copyright ownership.  The ASF licenses this file
#   to you under the Apache License, Version 2.0 (the
#   "License"); you may not use this file except in compliance
#   with the License.  You may obtain a copy of the License at
#
#       http://www.apache.org/licenses/LICENSE-2.0
#
#   Unless required by applicable law or agreed to in writing, software
#   distributed under the License is distributed on an "AS IS" BASIS,
#   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#   See the License for the specific language governing permissions and
#   limitations under the License.

import apache_beam as beam

from log_elements import LogElements

with beam.Pipeline() as p:

    (p | beam.Create(range(1, 11))
     | beam.combiners.Top.Largest(1)
     | LogElements())
Esempio n. 3
0
#   Licensed to the Apache Software Foundation (ASF) under one
#   or more contributor license agreements.  See the NOTICE file
#   distributed with this work for additional information
#   regarding copyright ownership.  The ASF licenses this file
#   to you under the Apache License, Version 2.0 (the
#   "License"); you may not use this file except in compliance
#   with the License.  You may obtain a copy of the License at
#
#       http://www.apache.org/licenses/LICENSE-2.0
#
#   Unless required by applicable law or agreed to in writing, software
#   distributed under the License is distributed on an "AS IS" BASIS,
#   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#   See the License for the specific language governing permissions and
#   limitations under the License.

import apache_beam as beam

from log_elements import LogElements

with beam.Pipeline() as p:

    wordsStartingWithA = \
        p | 'Words starting with A' >> beam.Create(['apple', 'ant', 'arrow'])

    wordsStartingWithB = \
        p | 'Words starting with B' >> beam.Create(['ball', 'book', 'bow'])

    ((wordsStartingWithA, wordsStartingWithB) | beam.Flatten() | LogElements())
Esempio n. 4
0
import apache_beam as beam
from generate_event import GenerateEvent
from apache_beam.transforms.window import FixedWindows
from apache_beam.transforms.trigger import AfterWatermark
from apache_beam.transforms.trigger import AfterCount
from apache_beam.transforms.trigger import AccumulationMode
from apache_beam.utils.timestamp import Duration
from apache_beam.options.pipeline_options import PipelineOptions
from apache_beam.options.pipeline_options import StandardOptions
from log_elements import LogElements


class CountEventsWithAccumulating(beam.PTransform):
    def expand(self, events):
        return (events
                | beam.WindowInto(
                    FixedWindows(1 * 24 * 60 * 60),  # 1 Day Window
                    trigger=AfterWatermark(early=AfterCount(1)),
                    accumulation_mode=AccumulationMode.ACCUMULATING,
                    allowed_lateness=Duration(seconds=0))
                | beam.CombineGlobally(
                    beam.combiners.CountCombineFn()).without_defaults())


options = PipelineOptions()
options.view_as(StandardOptions).streaming = True
with beam.Pipeline(options=options) as p:
    (p | GenerateEvent.sample_data()
     | CountEventsWithAccumulating()
     | LogElements(with_window=True))
Esempio n. 5
0
#   with the License.  You may obtain a copy of the License at
#
#       http://www.apache.org/licenses/LICENSE-2.0
#
#   Unless required by applicable law or agreed to in writing, software
#   distributed under the License is distributed on an "AS IS" BASIS,
#   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#   See the License for the specific language governing permissions and
#   limitations under the License.

import apache_beam as beam

from log_elements import LogElements


def partition_fn(number, num_partitions):
    if number > 100:
        return 0
    else:
        return 1


with beam.Pipeline() as p:

    results = \
        (p | beam.Create([1, 2, 3, 4, 5, 100, 110, 150, 250])
           | beam.Partition(partition_fn, 2))

    results[0] | 'Log numbers > 100' >> LogElements(prefix='Number > 100: ')
    results[1] | 'Log numbers <= 100' >> LogElements(prefix='Number <= 100: ')
Esempio n. 6
0
def apply_transforms(fruits, countries):
    def map_to_alphabet_kv(word):
        return (word[0], word)

    def cogbk_result_to_wordsalphabet(cgbk_result):
        (alphabet, words) = cgbk_result
        return WordsAlphabet(alphabet, words['fruits'][0],
                             words['countries'][0])

    fruits_kv = (fruits | 'Fruit to KV' >> beam.Map(map_to_alphabet_kv))
    countries_kv = (countries
                    | 'Country to KV' >> beam.Map(map_to_alphabet_kv))

    return ({
        'fruits': fruits_kv,
        'countries': countries_kv
    }
            | beam.CoGroupByKey()
            | beam.Map(cogbk_result_to_wordsalphabet))


p = beam.Pipeline()

fruits = p | 'Fruits' >> beam.Create(['apple', 'banana', 'cherry'])
countries = p | 'Countries' >> beam.Create(['australia', 'brazil', 'canada'])

(apply_transforms(fruits, countries) | LogElements())

p.run()
Esempio n. 7
0
#  or more contributor license agreements.  See the NOTICE file
#  distributed with this work for additional information
#  regarding copyright ownership.  The ASF licenses this file
#  to you under the Apache License, Version 2.0 (the
#  "License"); you may not use this file except in compliance
#  with the License.  You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
#  Unless required by applicable law or agreed to in writing, software
#  distributed under the License is distributed on an "AS IS" BASIS,
#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#  See the License for the specific language governing permissions and
#  limitations under the License.

import apache_beam as beam

from log_elements import LogElements

p = beam.Pipeline()

numbers = p | beam.Create([1, 2, 3, 4, 5])

mult5_results = numbers | beam.Map(lambda num: num * 5)
mult10_results = numbers | beam.Map(lambda num: num * 10)

mult5_results | 'Log multiply 5' >> LogElements(prefix='Multiplied by 5: ')
mult10_results | 'Log multiply 10' >> LogElements(prefix='Multiplied by 10: ')

p.run()
Esempio n. 8
0
        self.id = id
        self.event = event
        self.timestamp = timestamp

    def __str__(self) -> str:
        return f'Event({self.id}, {self.event}, {self.timestamp})'


class AddTimestampDoFn(beam.DoFn):
    def process(self, element, **kwargs):
        unix_timestamp = element.timestamp.timestamp()
        yield window.TimestampedValue(element, unix_timestamp)


with beam.Pipeline() as p:

    (p | beam.Create([
        Event('1', 'book-order',
              datetime.datetime(2020, 3, 4, 0, 0, 0, 0, tzinfo=pytz.UTC)),
        Event('2', 'pencil-order',
              datetime.datetime(2020, 3, 5, 0, 0, 0, 0, tzinfo=pytz.UTC)),
        Event('3', 'paper-order',
              datetime.datetime(2020, 3, 6, 0, 0, 0, 0, tzinfo=pytz.UTC)),
        Event('4', 'pencil-order',
              datetime.datetime(2020, 3, 7, 0, 0, 0, 0, tzinfo=pytz.UTC)),
        Event('5', 'book-order',
              datetime.datetime(2020, 3, 8, 0, 0, 0, 0, tzinfo=pytz.UTC)),
    ])
     | beam.ParDo(AddTimestampDoFn())
     | LogElements(with_timestamp=True))
Esempio n. 9
0
#     - Multiple Outputs

import apache_beam as beam
from apache_beam import pvalue

from log_elements import LogElements

num_below_100_tag = 'num_below_100'
num_above_100_tag = 'num_above_100'


class ProcessNumbersDoFn(beam.DoFn):
    def process(self, element):
        if element <= 100:
            yield element
        else:
            yield pvalue.TaggedOutput(num_above_100_tag, element)


with beam.Pipeline() as p:

    results = \
        (p | beam.Create([10, 50, 120, 20, 200, 0])
           | beam.ParDo(ProcessNumbersDoFn())
            .with_outputs(num_above_100_tag, main=num_below_100_tag))

    results[num_below_100_tag] | 'Log numbers <= 100' >> LogElements(
        prefix='Number <= 100: ')
    results[num_above_100_tag] | 'Log numbers > 100' >> LogElements(
        prefix='Number > 100: ')
Esempio n. 10
0
#   or more contributor license agreements.  See the NOTICE file
#   distributed with this work for additional information
#   regarding copyright ownership.  The ASF licenses this file
#   to you under the Apache License, Version 2.0 (the
#   "License"); you may not use this file except in compliance
#   with the License.  You may obtain a copy of the License at
#
#       http://www.apache.org/licenses/LICENSE-2.0
#
#   Unless required by applicable law or agreed to in writing, software
#   distributed under the License is distributed on an "AS IS" BASIS,
#   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#   See the License for the specific language governing permissions and
#   limitations under the License.

# beam-playground:
#   name: HelloBeam
#   description: Task from katas to create a simple pipeline that takes a hardcoded input element "Hello Beam".
#   multifile: false
#   pipeline_options:
#   categories:
#     - Testing

import apache_beam as beam

from log_elements import LogElements

with beam.Pipeline() as p:

    (p | beam.Create(['Hello Beam']) | LogElements())
Esempio n. 11
0
#   Licensed to the Apache Software Foundation (ASF) under one
#   or more contributor license agreements.  See the NOTICE file
#   distributed with this work for additional information
#   regarding copyright ownership.  The ASF licenses this file
#   to you under the Apache License, Version 2.0 (the
#   "License"); you may not use this file except in compliance
#   with the License.  You may obtain a copy of the License at
#
#       http://www.apache.org/licenses/LICENSE-2.0
#
#   Unless required by applicable law or agreed to in writing, software
#   distributed under the License is distributed on an "AS IS" BASIS,
#   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#   See the License for the specific language governing permissions and
#   limitations under the License.

import apache_beam as beam

from log_elements import LogElements

p = beam.Pipeline()

(p | beam.Create(range(1, 11)) | beam.combiners.Top.Largest(1) | LogElements())

p.run()
Esempio n. 12
0
#   or more contributor license agreements.  See the NOTICE file
#   distributed with this work for additional information
#   regarding copyright ownership.  The ASF licenses this file
#   to you under the Apache License, Version 2.0 (the
#   "License"); you may not use this file except in compliance
#   with the License.  You may obtain a copy of the License at
#
#       http://www.apache.org/licenses/LICENSE-2.0
#
#   Unless required by applicable law or agreed to in writing, software
#   distributed under the License is distributed on an "AS IS" BASIS,
#   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#   See the License for the specific language governing permissions and
#   limitations under the License.

# beam-playground:
#   name: AggregationSum
#   description: Task from katas to compute the sum of all elements.
#   multifile: false
#   context_line: 29
#   categories:
#     - Combiners

import apache_beam as beam

from log_elements import LogElements

with beam.Pipeline() as p:

    (p | beam.Create(range(1, 11)) | beam.CombineGlobally(sum) | LogElements())
Esempio n. 13
0
#   "License"); you may not use this file except in compliance
#   with the License.  You may obtain a copy of the License at
#
#       http://www.apache.org/licenses/LICENSE-2.0
#
#   Unless required by applicable law or agreed to in writing, software
#   distributed under the License is distributed on an "AS IS" BASIS,
#   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#   See the License for the specific language governing permissions and
#   limitations under the License.

import apache_beam as beam

from log_elements import LogElements


def sum(numbers):
    total = 0

    for num in numbers:
        total += num

    return total


p = beam.Pipeline()

(p | beam.Create([1, 2, 3, 4, 5]) | beam.CombineGlobally(sum) | LogElements())

p.run()