Python WindowedValue 예제들, apache_beam.transforms.window.WindowedValue Python 예제들

예제 #1

0

파일 보기

    def run_trigger_simple(self, window_fn, trigger_fn, accumulation_mode,
                           timestamped_data, expected_panes, *groupings,
                           **kwargs):
        late_data = kwargs.pop('late_data', [])
        assert not kwargs

        def bundle_data(data, size):
            bundle = []
            for timestamp, elem in data:
                windows = window_fn.assign(
                    WindowFn.AssignContext(timestamp, elem))
                bundle.append(WindowedValue(elem, timestamp, windows))
                if len(bundle) == size:
                    yield bundle
                    bundle = []
            if bundle:
                yield bundle

        if not groupings:
            groupings = [1]
        for group_by in groupings:
            bundles = []
            bundle = []
            for timestamp, elem in timestamped_data:
                windows = window_fn.assign(
                    WindowFn.AssignContext(timestamp, elem))
                bundle.append(WindowedValue(elem, timestamp, windows))
                if len(bundle) == group_by:
                    bundles.append(bundle)
                    bundle = []
            bundles.append(bundle)
            self.run_trigger(window_fn, trigger_fn, accumulation_mode,
                             bundle_data(timestamped_data, group_by),
                             bundle_data(late_data, group_by), expected_panes)

예제 #2

0

파일 보기

 def process(self,
             e,
             w=beam.DoFn.WindowParam,
             p=beam.DoFn.PaneInfoParam,
             t=beam.DoFn.TimestampParam):
     yield test_stream.WindowedValueHolder(
         WindowedValue(e, t, [w], p))

예제 #3

0

파일 보기

 def process_elements(self, state, windowed_values,
                      unused_output_watermark):
     if isinstance(windowed_values, list):
         unwindowed = [wv.value for wv in windowed_values]
     else:
         unwindowed = _UnwindowedValues(windowed_values)
     yield WindowedValue(unwindowed, MIN_TIMESTAMP,
                         self.GLOBAL_WINDOW_TUPLE)

예제 #4

0

파일 보기

def _windowed_value_info_map_fn(k,
                                vs,
                                window=beam.DoFn.WindowParam,
                                t=beam.DoFn.TimestampParam,
                                p=beam.DoFn.PaneInfoParam):
    return (k,
            _windowed_value_info(
                WindowedValue(vs, windows=[window], timestamp=t, pane_info=p)))

예제 #5

0

파일 보기

 def process_elements(self, state, windowed_values,
                      unused_output_watermark,
                      unused_input_watermark=MIN_TIMESTAMP):
   yield WindowedValue(
       _UnwindowedValues(windowed_values),
       MIN_TIMESTAMP,
       self.GLOBAL_WINDOW_TUPLE,
       self.ONLY_FIRING)

예제 #6

0

파일 보기

 def finish_bundle(self):
     from apache_beam.utils import timestamp
     from apache_beam.transforms.window import WindowedValue, GlobalWindow
     if len(self._buffer) != 0:
         logging.info("Final Buffer Length: {}".format(len(self._buffer)))
         yield WindowedValue(self._buffer, timestamp.MIN_TIMESTAMP,
                             [GlobalWindow()])
         self._buffer = []

예제 #7

0

파일 보기

파일: bundle_processor.py 프로젝트: junaidsaiyed/beam

 def process(self,
             element,
             timestamp=beam.DoFn.TimestampParam,
             window=beam.DoFn.WindowParam):
     new_windows = self.windowing.windowfn.assign(
         WindowFn.AssignContext(timestamp,
                                element=element,
                                window=window))
     yield WindowedValue(element, timestamp, new_windows)

예제 #8

0

파일 보기

 def test_picklable_output(self):
     global_window = (trigger.GlobalWindow(), )
     driver = trigger.DiscardingGlobalTriggerDriver()
     unpicklable = (WindowedValue(k, 0, global_window) for k in range(10))
     with self.assertRaises(TypeError):
         pickle.dumps(unpicklable)
     for unwindowed in driver.process_elements(None, unpicklable, None):
         self.assertEqual(
             pickle.loads(pickle.dumps(unwindowed)).value, list(range(10)))

예제 #9

0

파일 보기

 def bundle_data(data, size):
   bundle = []
   for timestamp, elem in data:
     windows = window_fn.assign(WindowFn.AssignContext(timestamp, elem))
     bundle.append(WindowedValue(elem, timestamp, windows))
     if len(bundle) == size:
       yield bundle
       bundle = []
   if bundle:
     yield bundle

예제 #10

0

파일 보기

파일: trigger.py 프로젝트: espv/beam-plus-wrapper

  def _output(
      self,
      window,
      finished,
      state,
      input_watermark,
      output_watermark,
      maybe_ontime):
    """Output window and clean up if appropriate."""
    index = state.get_state(window, self.INDEX)
    state.add_state(window, self.INDEX, 1)
    if output_watermark <= window.max_timestamp():
      nonspeculative_index = -1
      timing = windowed_value.PaneInfoTiming.EARLY
      if state.get_state(window, self.NONSPECULATIVE_INDEX):
        nonspeculative_index = state.get_state(
            window, self.NONSPECULATIVE_INDEX)
        state.add_state(window, self.NONSPECULATIVE_INDEX, 1)
        _LOGGER.warning(
            'Watermark moved backwards in time '
            'or late data moved window end forward.')
    else:
      nonspeculative_index = state.get_state(window, self.NONSPECULATIVE_INDEX)
      state.add_state(window, self.NONSPECULATIVE_INDEX, 1)
      timing = (
          windowed_value.PaneInfoTiming.ON_TIME if maybe_ontime and
          nonspeculative_index == 0 else windowed_value.PaneInfoTiming.LATE)
    pane_info = windowed_value.PaneInfo(
        index == 0, finished, timing, index, nonspeculative_index)

    values = state.get_state(window, self.ELEMENTS)
    if finished:
      # TODO(robertwb): allowed lateness
      state.clear_state(window, self.ELEMENTS)
      state.add_state(window, self.TOMBSTONE, 1)
    elif self.accumulation_mode == AccumulationMode.DISCARDING:
      state.clear_state(window, self.ELEMENTS)

    timestamp = state.get_state(window, self.WATERMARK_HOLD)
    if timestamp is None:
      # If no watermark hold was set, output at end of window.
      timestamp = window.max_timestamp()
    elif input_watermark < window.end and self.trigger_fn.has_ontime_pane():
      # Hold the watermark in case there is an empty pane that needs to be fired
      # at the end of the window.
      pass
    else:
      state.clear_state(window, self.WATERMARK_HOLD)

    return WindowedValue(values, timestamp, (window, ), pane_info)

예제 #11

0

파일 보기

파일: transform_evaluator.py 프로젝트: will-lauer/beam

 def process_element(self, element):
     # In order to keep the order of the elements between the script and what
     # flows through the pipeline the same, emit the elements here.
     event = element.value
     if isinstance(event, WatermarkEvent):
         self._watermark = event.new_watermark
     elif isinstance(event, ElementEvent):
         main_output = list(self._outputs)[0]
         bundle = self._evaluation_context.create_bundle(main_output)
         for tv in event.timestamped_values:
             # Unreify the value into the correct window.
             try:
                 bundle.output(WindowedValue(**tv.value))
             except TypeError:
                 bundle.output(
                     GlobalWindows.windowed_value(tv.value,
                                                  timestamp=tv.timestamp))
         self.bundles.append(bundle)

예제 #12

0

파일 보기

파일: trigger.py 프로젝트: sbilac/incubator-beam

  def _output(self, window, finished, state):
    """Output window and clean up if appropriate."""

    values = state.get_state(window, self.ELEMENTS)
    if finished:
      # TODO(robertwb): allowed lateness
      state.clear_state(window, self.ELEMENTS)
      state.add_state(window, self.TOMBSTONE, 1)
    elif self.accumulation_mode == AccumulationMode.DISCARDING:
      state.clear_state(window, self.ELEMENTS)

    timestamp = state.get_state(window, self.WATERMARK_HOLD)
    if timestamp is None:
      # If no watermark hold was set, output at end of window.
      timestamp = window.end
    else:
      state.clear_state(window, self.WATERMARK_HOLD)

    return WindowedValue(values, timestamp, (window,))

예제 #13

0

파일 보기

    def _execute(self, window_fn, trigger_fn, accumulation_mode,
                 timestamp_combiner, transcript, unused_spec):

        driver = GeneralTriggerDriver(
            Windowing(window_fn, trigger_fn, accumulation_mode,
                      timestamp_combiner), TestClock())
        state = InMemoryUnmergedState()
        output = []
        watermark = MIN_TIMESTAMP

        def fire_timers():
            to_fire = state.get_and_clear_timers(watermark)
            while to_fire:
                for timer_window, (name, time_domain, t_timestamp) in to_fire:
                    for wvalue in driver.process_timer(timer_window, name,
                                                       time_domain,
                                                       t_timestamp, state):
                        window, = wvalue.windows
                        output.append({
                            'window': [window.start, window.end - 1],
                            'values': sorted(wvalue.value),
                            'timestamp': wvalue.timestamp
                        })
                to_fire = state.get_and_clear_timers(watermark)

        for action, params in transcript:

            if action != 'expect':
                # Fail if we have output that was not expected in the transcript.
                self.assertEqual([],
                                 output,
                                 msg='Unexpected output: %s before %s: %s' %
                                 (output, action, params))

            if action == 'input':
                bundle = [
                    WindowedValue(
                        t, t, window_fn.assign(WindowFn.AssignContext(t, t)))
                    for t in params
                ]
                output = [{
                    'window':
                    [wvalue.windows[0].start, wvalue.windows[0].end - 1],
                    'values':
                    sorted(wvalue.value),
                    'timestamp':
                    wvalue.timestamp
                } for wvalue in driver.process_elements(
                    state, bundle, watermark)]
                fire_timers()

            elif action == 'watermark':
                watermark = params
                fire_timers()

            elif action == 'expect':
                for expected_output in params:
                    for candidate in output:
                        if all(candidate[k] == expected_output[k]
                               for k in candidate if k in expected_output):
                            output.remove(candidate)
                            break
                    else:
                        self.fail('Unmatched output %s in %s' %
                                  (expected_output, output))

            elif action == 'state':
                # TODO(robertwb): Implement once we support allowed lateness.
                pass

            else:
                self.fail('Unknown action: ' + action)

        # Fail if we have output that was not expected in the transcript.
        self.assertEqual([], output, msg='Unexpected output: %s' % output)

예제 #14

0

파일 보기

파일: trigger_test.py 프로젝트: ziel/beam

    def _execute(self, window_fn, trigger_fn, accumulation_mode,
                 timestamp_combiner, transcript, spec):

        runner_name = TestPipeline().runner.__class__.__name__
        if runner_name in spec.get('broken_on', ()):
            self.skipTest('Known to be broken on %s' % runner_name)

        test_stream = TestStream()
        for action, params in transcript:
            if action == 'expect':
                test_stream.add_elements([('expect', params)])
            else:
                test_stream.add_elements([('expect', [])])
                if action == 'input':
                    test_stream.add_elements([('input', e) for e in params])
                elif action == 'watermark':
                    test_stream.advance_watermark_to(params)
                elif action == 'clock':
                    test_stream.advance_processing_time(params)
                elif action == 'state':
                    pass  # Requires inspection of implementation details.
                else:
                    raise ValueError('Unexpected action: %s' % action)
        test_stream.add_elements([('expect', [])])

        class Check(beam.DoFn):
            """A StatefulDoFn that verifies outputs are produced as expected.

      This DoFn takes in two kinds of inputs, actual outputs and
      expected outputs.  When an actual output is received, it is buffered
      into state, and when an expected output is received, this buffered
      state is retrieved and compared against the expected value(s) to ensure
      they match.

      The key is ignored, but all items must be on the same key to share state.
      """
            def process(self,
                        element,
                        seen=beam.DoFn.StateParam(
                            beam.transforms.userstate.BagStateSpec(
                                'seen', beam.coders.FastPrimitivesCoder()))):
                _, (action, data) = element
                if action == 'actual':
                    seen.add(data)

                elif action == 'expect':
                    actual = list(seen.read())
                    seen.clear()

                    if len(actual) > len(data):
                        raise AssertionError(
                            'Unexpected output: expected %s but got %s' %
                            (data, actual))
                    elif len(data) > len(actual):
                        raise AssertionError(
                            'Unmatched output: expected %s but got %s' %
                            (data, actual))
                    else:

                        def diff(actual, expected):
                            for key in sorted(expected.keys(), reverse=True):
                                if key in actual:
                                    if actual[key] != expected[key]:
                                        return key

                        for output in actual:
                            diffs = [
                                diff(output, expected) for expected in data
                            ]
                            if all(diffs):
                                raise AssertionError(
                                    'Unmatched output: %s not found in %s (diffs in %s)'
                                    % (output, data, diffs))

                else:
                    raise ValueError('Unexpected action: %s' % action)

        with TestPipeline(options=PipelineOptions(streaming=True)) as p:
            # Split the test stream into a branch of to-be-processed elements, and
            # a branch of expected results.
            inputs, expected = (
                p
                | test_stream
                | beam.MapTuple(lambda tag, value: beam.pvalue.TaggedOutput(
                    tag, ('key', value))).with_outputs('input', 'expect'))
            # Process the inputs with the given windowing to produce actual outputs.
            outputs = (
                inputs
                | beam.MapTuple(lambda key, value: TimestampedValue(
                    (key, value), value))
                | beam.WindowInto(window_fn,
                                  trigger=trigger_fn,
                                  accumulation_mode=accumulation_mode,
                                  timestamp_combiner=timestamp_combiner)
                | beam.GroupByKey()
                | beam.MapTuple(
                    lambda k, vs, window=beam.DoFn.WindowParam, t=beam.DoFn.
                    TimestampParam, p=beam.DoFn.PaneInfoParam:
                    (k,
                     self._windowed_value_info(
                         WindowedValue(
                             vs, windows=[window], timestamp=t, pane_info=p))))
                # Place outputs back into the global window to allow flattening
                # and share a single state in Check.
                | 'Global' >> beam.WindowInto(
                    beam.transforms.window.GlobalWindows()))
            # Feed both the expected and actual outputs to Check() for comparison.
            tagged_expected = (
                expected
                | beam.MapTuple(lambda key, value: (key, ('expect', value))))
            tagged_outputs = (
                outputs
                | beam.MapTuple(lambda key, value: (key, ('actual', value))))
            # pylint: disable=expression-not-assigned
            (tagged_expected, tagged_outputs) | beam.Flatten() | beam.ParDo(
                Check())

예제 #15

0

파일 보기

파일: trigger_test.py 프로젝트: tneymanov/beam

    def _execute(self, window_fn, trigger_fn, accumulation_mode,
                 timestamp_combiner, transcript, spec):

        runner_name = TestPipeline().runner.__class__.__name__
        if runner_name in spec.get('broken_on', ()):
            self.skipTest('Known to be broken on %s' % runner_name)

        # Elements are encoded as a json strings to allow other languages to
        # decode elements while executing the test stream.
        # TODO(BEAM-8600): Eliminate these gymnastics.
        test_stream = TestStream(
            coder=coders.StrUtf8Coder()).with_output_types(str)
        for action, params in transcript:
            if action == 'expect':
                test_stream.add_elements([json.dumps(('expect', params))])
            else:
                test_stream.add_elements([json.dumps(('expect', []))])
                if action == 'input':
                    test_stream.add_elements(
                        [json.dumps(('input', e)) for e in params])
                elif action == 'watermark':
                    test_stream.advance_watermark_to(params)
                elif action == 'clock':
                    test_stream.advance_processing_time(params)
                elif action == 'state':
                    pass  # Requires inspection of implementation details.
                else:
                    raise ValueError('Unexpected action: %s' % action)
        test_stream.add_elements([json.dumps(('expect', []))])

        read_test_stream = test_stream | beam.Map(json.loads)

        class Check(beam.DoFn):
            """A StatefulDoFn that verifies outputs are produced as expected.

      This DoFn takes in two kinds of inputs, actual outputs and
      expected outputs.  When an actual output is received, it is buffered
      into state, and when an expected output is received, this buffered
      state is retrieved and compared against the expected value(s) to ensure
      they match.

      The key is ignored, but all items must be on the same key to share state.
      """
            def __init__(self, allow_out_of_order=True):
                # Some runners don't support cross-stage TestStream semantics.
                self.allow_out_of_order = allow_out_of_order

            def process(self,
                        element,
                        seen=beam.DoFn.StateParam(
                            beam.transforms.userstate.BagStateSpec(
                                'seen', beam.coders.FastPrimitivesCoder())),
                        expected=beam.DoFn.StateParam(
                            beam.transforms.userstate.BagStateSpec(
                                'expected',
                                beam.coders.FastPrimitivesCoder()))):
                _, (action, data) = element

                if self.allow_out_of_order:
                    if action == 'expect' and not list(seen.read()):
                        if data:
                            expected.add(data)
                        return
                    elif action == 'actual' and list(expected.read()):
                        seen.add(data)
                        all_data = list(seen.read())
                        all_expected = list(expected.read())
                        if len(all_data) == len(all_expected[0]):
                            expected.clear()
                            for expect in all_expected[1:]:
                                expected.add(expect)
                            action, data = 'expect', all_expected[0]
                        else:
                            return

                if action == 'actual':
                    seen.add(data)

                elif action == 'expect':
                    actual = list(seen.read())
                    seen.clear()

                    if len(actual) > len(data):
                        raise AssertionError(
                            'Unexpected output: expected %s but got %s' %
                            (data, actual))
                    elif len(data) > len(actual):
                        raise AssertionError(
                            'Unmatched output: expected %s but got %s' %
                            (data, actual))
                    else:

                        def diff(actual, expected):
                            for key in sorted(expected.keys(), reverse=True):
                                if key in actual:
                                    if actual[key] != expected[key]:
                                        return key

                        for output in actual:
                            diffs = [
                                diff(output, expected) for expected in data
                            ]
                            if all(diffs):
                                raise AssertionError(
                                    'Unmatched output: %s not found in %s (diffs in %s)'
                                    % (output, data, diffs))

                else:
                    raise ValueError('Unexpected action: %s' % action)

        with TestPipeline() as p:
            # TODO(BEAM-8601): Pass this during pipeline construction.
            p.options.view_as(StandardOptions).streaming = True
            # Split the test stream into a branch of to-be-processed elements, and
            # a branch of expected results.
            inputs, expected = (
                p
                | read_test_stream
                | beam.MapTuple(lambda tag, value: beam.pvalue.TaggedOutput(
                    tag, ('key', value))).with_outputs('input', 'expect'))
            # Process the inputs with the given windowing to produce actual outputs.
            outputs = (
                inputs
                | beam.MapTuple(lambda key, value: TimestampedValue(
                    (key, value), value))
                | beam.WindowInto(window_fn,
                                  trigger=trigger_fn,
                                  accumulation_mode=accumulation_mode,
                                  timestamp_combiner=timestamp_combiner)
                | beam.GroupByKey()
                | beam.MapTuple(
                    lambda k, vs, window=beam.DoFn.WindowParam, t=beam.DoFn.
                    TimestampParam, p=beam.DoFn.PaneInfoParam:
                    (k,
                     _windowed_value_info(
                         WindowedValue(
                             vs, windows=[window], timestamp=t, pane_info=p))))
                # Place outputs back into the global window to allow flattening
                # and share a single state in Check.
                | 'Global' >> beam.WindowInto(
                    beam.transforms.window.GlobalWindows()))
            # Feed both the expected and actual outputs to Check() for comparison.
            tagged_expected = (
                expected
                | beam.MapTuple(lambda key, value: (key, ('expect', value))))
            tagged_outputs = (
                outputs
                | beam.MapTuple(lambda key, value: (key, ('actual', value))))
            # pylint: disable=expression-not-assigned
            ([tagged_expected, tagged_outputs]
             | beam.Flatten()
             | beam.ParDo(Check(self.allow_out_of_order)))

예제 #16

0

파일 보기

파일: trigger_test.py 프로젝트: horvathaa/beamTest

    def _run_log(self, spec):
        def parse_int_list(s):
            """Parses strings like '[1, 2, 3]'."""
            s = s.strip()
            assert s[0] == '[' and s[-1] == ']', s
            if not s[1:-1].strip():
                return []
            return [int(x) for x in s[1:-1].split(',')]

        def split_args(s):
            """Splits 'a, b, [c, d]' into ['a', 'b', '[c, d]']."""
            args = []
            start = 0
            depth = 0
            for ix in range(len(s)):
                c = s[ix]
                if c in '({[':
                    depth += 1
                elif c in ')}]':
                    depth -= 1
                elif c == ',' and depth == 0:
                    args.append(s[start:ix].strip())
                    start = ix + 1
            assert depth == 0, s
            args.append(s[start:].strip())
            return args

        def parse(s, names):
            """Parse (recursive) 'Foo(arg, kw=arg)' for Foo in the names dict."""
            s = s.strip()
            if s in names:
                return names[s]
            elif s[0] == '[':
                return parse_int_list(s)
            elif '(' in s:
                assert s[-1] == ')', s
                callee = parse(s[:s.index('(')], names)
                posargs = []
                kwargs = {}
                for arg in split_args(s[s.index('(') + 1:-1]):
                    if '=' in arg:
                        kw, value = arg.split('=', 1)
                        kwargs[kw] = parse(value, names)
                    else:
                        posargs.append(parse(arg, names))
                return callee(*posargs, **kwargs)
            else:
                try:
                    return int(s)
                except ValueError:
                    raise ValueError('Unknown function: %s' % s)

        def parse_fn(s, names):
            """Like parse(), but implicitly calls no-arg constructors."""
            fn = parse(s, names)
            if isinstance(fn, type):
                return fn()
            return fn

        # pylint: disable=wrong-import-order, wrong-import-position
        from apache_beam.transforms import window as window_module
        # pylint: enable=wrong-import-order, wrong-import-position
        window_fn_names = dict(window_module.__dict__)
        window_fn_names.update({
            'CustomTimestampingFixedWindowsWindowFn':
            CustomTimestampingFixedWindowsWindowFn
        })
        trigger_names = {'Default': DefaultTrigger}
        trigger_names.update(trigger.__dict__)

        window_fn = parse_fn(spec.get('window_fn', 'GlobalWindows'),
                             window_fn_names)
        trigger_fn = parse_fn(spec.get('trigger_fn', 'Default'), trigger_names)
        accumulation_mode = getattr(
            AccumulationMode,
            spec.get('accumulation_mode', 'ACCUMULATING').upper())
        timestamp_combiner = getattr(
            TimestampCombiner,
            spec.get('timestamp_combiner', 'OUTPUT_AT_EOW').upper())

        driver = GeneralTriggerDriver(
            Windowing(window_fn, trigger_fn, accumulation_mode,
                      timestamp_combiner), TestClock())
        state = InMemoryUnmergedState()
        output = []
        watermark = MIN_TIMESTAMP

        def fire_timers():
            to_fire = state.get_and_clear_timers(watermark)
            while to_fire:
                for timer_window, (name, time_domain, t_timestamp) in to_fire:
                    for wvalue in driver.process_timer(timer_window, name,
                                                       time_domain,
                                                       t_timestamp, state):
                        window, = wvalue.windows
                        output.append({
                            'window': [window.start, window.end - 1],
                            'values': sorted(wvalue.value),
                            'timestamp': wvalue.timestamp
                        })
                to_fire = state.get_and_clear_timers(watermark)

        for line in spec['transcript']:

            action, params = line.items()[0]

            if action != 'expect':
                # Fail if we have output that was not expected in the transcript.
                self.assertEquals([],
                                  output,
                                  msg='Unexpected output: %s before %s' %
                                  (output, line))

            if action == 'input':
                bundle = [
                    WindowedValue(
                        t, t, window_fn.assign(WindowFn.AssignContext(t, t)))
                    for t in params
                ]
                output = [{
                    'window':
                    [wvalue.windows[0].start, wvalue.windows[0].end - 1],
                    'values':
                    sorted(wvalue.value),
                    'timestamp':
                    wvalue.timestamp
                } for wvalue in driver.process_elements(
                    state, bundle, watermark)]
                fire_timers()

            elif action == 'watermark':
                watermark = params
                fire_timers()

            elif action == 'expect':
                for expected_output in params:
                    for candidate in output:
                        if all(candidate[k] == expected_output[k]
                               for k in candidate if k in expected_output):
                            output.remove(candidate)
                            break
                    else:
                        self.fail('Unmatched output %s in %s' %
                                  (expected_output, output))

            elif action == 'state':
                # TODO(robertwb): Implement once we support allowed lateness.
                pass

            else:
                self.fail('Unknown action: ' + action)

        # Fail if we have output that was not expected in the transcript.
        self.assertEquals([], output, msg='Unexpected output: %s' % output)

예제 #17

0

파일 보기

파일: window_test.py 프로젝트: ziel/beam

 def timestamped_key_values(self, pipeline, key, *timestamps):
   return (pipeline | 'start' >> Create(timestamps)
           | Map(lambda x: WindowedValue((key, x), x, [GlobalWindow()])))

예제 #18

0

파일 보기

 def process_element(self, element):
     result = WindowedValue((element.value, self.timing_info),
                            element.timestamp, element.windows,
                            element.pane_info)
     self.bundle.output(result)

예제 #19

0

파일 보기

    def finish_bundle(self):
        user_id_list = []
        new_engagement_records = []

        for group in self.batch:
            if len(group[1].get('engagements')) > 0:
                user_id_list.append(int(group[0]))

        events = []

        if len(user_id_list) > 0:
            # Get open PN events that happened from (from_ts -engagement_range) to (to_ts)
            # This make sure that if a question is created right after from_ts, we may
            # still get the open PN event that happened (engagement_range) days before that which leads to that question
            events = get_open_pn_events_of_user_list(
                user_id_list,
                from_ts=self.from_ts - self.engagement_range * 24 * 60 * 60,
                to_ts=self.to_ts,
                giap_es_username=self.giap_es_username,
                giap_es_password=self.giap_es_password,
                giap_es_index=self.giap_es_index)

        for group in self.batch:
            uid = int(group[0])
            questions = group[1].get('questions')
            engagements = group[1].get('engagements')
            users = group[1].get('users')

            if len(users) == 0 or len(questions) == 0 or len(engagements) == 0:
                continue

            user = users[0]

            user_events = [
                event for event in events if event['_source']['uid'] == uid
            ]

            for question in questions:
                question_created = int(question.get('created'))

                for engagement in engagements:
                    send_push_noti_time = int(
                        engagement.get('send_push_noti_time'))
                    question_created_date = datetime.fromtimestamp(
                        question_created)
                    n_days_ago_date = question_created_date - timedelta(
                        days=self.engagement_range)
                    n_days_ago_ts = n_days_ago_date.timestamp()

                    pn_campaign = None

                    for event in user_events:
                        # Find the last event which happens after send_push_noti_time and before question_created
                        if send_push_noti_time * 1000 < event['_source'][
                                '$time'] < question_created * 1000:
                            pn_campaign = event['_source'].get('campaign')
                            break

                    if question_created > send_push_noti_time > n_days_ago_ts:
                        new_engagement_record = {
                            'created':
                            int(datetime.now().timestamp()),
                            'action':
                            'ask',
                            'question_id':
                            question['qid'],
                            'uid':
                            uid,
                            'type':
                            engagement.get('type'),
                            'inactive_days':
                            engagement.get('inactive_days'),
                            'balance':
                            user.get('balance', 0),
                            'grade':
                            engagement.get('grade'),
                            'send_push_noti_time':
                            engagement.get('send_push_noti_time'),
                            'action_time':
                            question['created'],
                            'campaign':
                            pn_campaign
                        }

                        new_engagement_records.append(new_engagement_record)

        for record in new_engagement_records:
            yield WindowedValue(
                value=record,
                timestamp=0,
                windows=[self.window],
            )