Python EmulatedIterable Exemples, apache_beam.runners.worker.sideinputs.EmulatedIterable Python Exemples

Exemple #1

0

Afficher le fichier

  def _read_side_inputs(self, tags_and_types):
    # type: (...) -> Iterator[apache_sideinputs.SideInputMap]

    """Generator reading side inputs in the order prescribed by tags_and_types.

    Args:
      tags_and_types: List of tuples (tag, type). Each side input has a string
        tag that is specified in the worker instruction. The type is actually
        a boolean which is True for singleton input (read just first value)
        and False for collection input (read all values).

    Yields:
      With each iteration it yields the result of reading an entire side source
      either in singleton or collection mode according to the tags_and_types
      argument.
    """
    # Only call this on the old path where side_input_maps was not
    # provided directly.
    assert self.side_input_maps is None

    # We will read the side inputs in the order prescribed by the
    # tags_and_types argument because this is exactly the order needed to
    # replace the ArgumentPlaceholder objects in the args/kwargs of the DoFn
    # getting the side inputs.
    #
    # Note that for each tag there could be several read operations in the
    # specification. This can happen for instance if the source has been
    # sharded into several files.
    for i, (side_tag, view_class, view_options) in enumerate(tags_and_types):
      sources = []
      # Using the side_tag in the lambda below will trigger a pylint warning.
      # However in this case it is fine because the lambda is used right away
      # while the variable has the value assigned by the current iteration of
      # the for loop.
      # pylint: disable=cell-var-from-loop
      for si in filter(lambda o: o.tag == side_tag, self.spec.side_inputs):
        if not isinstance(si, operation_specs.WorkerSideInputSource):
          raise NotImplementedError('Unknown side input type: %r' % si)
        sources.append(si.source)
        # The tracking of time spend reading and bytes read from side inputs is
        # behind an experiment flag to test its performance impact.
        si_counter = opcounters.SideInputReadCounter(
            self.counter_factory,
            self.state_sampler,
            declaring_step=self.name_context.step_name,
            # Inputs are 1-indexed, so we add 1 to i in the side input id
            input_index=i + 1)
      iterator_fn = sideinputs.get_iterator_fn_for_sources(
          sources, read_counter=si_counter)

      # Backwards compatibility for pre BEAM-733 SDKs.
      if isinstance(view_options, tuple):
        if view_class == pvalue.AsSingleton:
          has_default, default = view_options
          view_options = {'default': default} if has_default else {}
        else:
          view_options = {}

      yield apache_sideinputs.SideInputMap(
          view_class, view_options, sideinputs.EmulatedIterable(iterator_fn))

Exemple #2

0

Afficher le fichier

Fichier : sideinputs_test.py Projet : jeganbaskar/GAE-Data-Flow

 def test_emulated_iterable(self):
   def _iterable_fn():
     for i in range(10):
       yield i
   iterable = sideinputs.EmulatedIterable(_iterable_fn)
   # Check that multiple iterations are supported.
   for _ in range(0, 5):
     for i, j in enumerate(iterable):
       self.assertEqual(i, j)

Exemple #3

0

Afficher le fichier

Fichier : sideinputs_test.py Projet : jeganbaskar/GAE-Data-Flow

 def test_large_iterable_values(self):
   # Here, we create a large collection that would be too big for memory-
   # constained test environments, but should be under the memory limit if
   # materialized one at a time.
   def _iterable_fn():
     for i in range(10):
       yield ('%d' % i) * (200 * 1024 * 1024)
   iterable = sideinputs.EmulatedIterable(_iterable_fn)
   # Check that multiple iterations are supported.
   for _ in range(0, 3):
     for i, j in enumerate(iterable):
       self.assertEqual(('%d' % i) * (200 * 1024 * 1024), j)

Exemple #4

0

Afficher le fichier

    def _read_side_inputs(self, tags_and_types):
        """Generator reading side inputs in the order prescribed by tags_and_types.

    Args:
      tags_and_types: List of tuples (tag, type). Each side input has a string
        tag that is specified in the worker instruction. The type is actually
        a boolean which is True for singleton input (read just first value)
        and False for collection input (read all values).

    Yields:
      With each iteration it yields the result of reading an entire side source
      either in singleton or collection mode according to the tags_and_types
      argument.
    """
        # We will read the side inputs in the order prescribed by the
        # tags_and_types argument because this is exactly the order needed to
        # replace the ArgumentPlaceholder objects in the args/kwargs of the DoFn
        # getting the side inputs.
        #
        # Note that for each tag there could be several read operations in the
        # specification. This can happen for instance if the source has been
        # sharded into several files.
        for side_tag, view_class, view_options in tags_and_types:
            sources = []
            # Using the side_tag in the lambda below will trigger a pylint warning.
            # However in this case it is fine because the lambda is used right away
            # while the variable has the value assigned by the current iteration of
            # the for loop.
            # pylint: disable=cell-var-from-loop
            for si in itertools.ifilter(lambda o: o.tag == side_tag,
                                        self.spec.side_inputs):
                if not isinstance(si, operation_specs.WorkerSideInputSource):
                    raise NotImplementedError('Unknown side input type: %r' %
                                              si)
                sources.append(si.source)
            iterator_fn = sideinputs.get_iterator_fn_for_sources(sources)

            # Backwards compatibility for pre BEAM-733 SDKs.
            if isinstance(view_options, tuple):
                if view_class == pvalue.AsSingleton:
                    has_default, default = view_options
                    view_options = {'default': default} if has_default else {}
                else:
                    view_options = {}

            yield apache_sideinputs.SideInputMap(
                view_class, view_options,
                sideinputs.EmulatedIterable(iterator_fn))