예제 #1
0
 def test_not_has_unbounded_source(self):
     p = beam.Pipeline(interactive_runner.InteractiveRunner())
     ie.current_env().set_cache_manager(InMemoryCache(), p)
     with tempfile.NamedTemporaryFile(delete=False) as f:
         f.write(b'test')
     _ = p | 'ReadBoundedSource' >> beam.io.ReadFromText(f.name)
     self.assertFalse(instr.has_unbounded_sources(p))
예제 #2
0
def has_source_to_cache(user_pipeline):
  """Determines if a user-defined pipeline contains any source that need to be
  cached."""
  from apache_beam.runners.interactive import pipeline_instrument as instr
  # TODO(BEAM-8335): we temporarily only cache replaceable unbounded sources.
  # Add logic for other cacheable sources here when they are available.
  return instr.has_unbounded_sources(user_pipeline)
예제 #3
0
def has_source_to_cache(user_pipeline):
    """Determines if a user-defined pipeline contains any source that need to be
  cached. If so, also immediately wrap current cache manager held by current
  interactive environment into a streaming cache if this has not been done.
  The wrapping doesn't invalidate existing cache in any way.

  This can help determining if a background source recording job is needed to
  write cache for sources and if a test stream service is needed to serve the
  cache.

  Throughout the check, if source-to-cache has changed from the last check, it
  also cleans up the invalidated cache early on.
  """
    from apache_beam.runners.interactive import pipeline_instrument as instr
    # TODO(BEAM-8335): we temporarily only cache replaceable unbounded sources.
    # Add logic for other cacheable sources here when they are available.
    has_cache = instr.has_unbounded_sources(user_pipeline)
    if has_cache:
        if not isinstance(
                ie.current_env().get_cache_manager(user_pipeline,
                                                   create_if_absent=True),
                streaming_cache.StreamingCache):

            ie.current_env().set_cache_manager(
                streaming_cache.StreamingCache(
                    ie.current_env().get_cache_manager(
                        user_pipeline)._cache_dir,
                    is_cache_complete=is_cache_complete,
                    sample_resolution_sec=1.0), user_pipeline)
    return has_cache
예제 #4
0
def has_source_to_cache(user_pipeline):
  """Determines if a user-defined pipeline contains any source that need to be
  cached. If so, also immediately wrap current cache manager held by current
  interactive environment into a streaming cache if this has not been done.
  The wrapping doesn't invalidate existing cache in any way.

  This can help determining if a background caching job is needed to write cache
  for sources and if a test stream service is needed to serve the cache.

  Throughout the check, if source-to-cache has changed from the last check, it
  also cleans up the invalidated cache early on.
  """
  from apache_beam.runners.interactive import pipeline_instrument as instr
  # TODO(BEAM-8335): we temporarily only cache replaceable unbounded sources.
  # Add logic for other cacheable sources here when they are available.
  has_cache = instr.has_unbounded_sources(user_pipeline)
  if has_cache:
    if not isinstance(ie.current_env().cache_manager(),
                      streaming_cache.StreamingCache):
      # TODO(BEAM-8335): convert the cache manager into a streaming cache
      # manager. Note this does not invalidate the current cache including the
      # source data capture.
      pass
  return has_cache
예제 #5
0
 def test_not_has_unbounded_source(self):
     p = beam.Pipeline(interactive_runner.InteractiveRunner())
     with tempfile.NamedTemporaryFile(delete=False) as f:
         f.write(b'test')
     _ = p | 'ReadBoundedSource' >> beam.io.ReadFromText(f.name)
     self.assertFalse(instr.has_unbounded_sources(p))
예제 #6
0
 def test_has_unbounded_source(self):
     p = beam.Pipeline(interactive_runner.InteractiveRunner())
     _ = p | 'ReadUnboundedSource' >> beam.io.ReadFromPubSub(
         subscription='projects/fake-project/subscriptions/fake_sub')
     self.assertTrue(instr.has_unbounded_sources(p))
예제 #7
0
 def test_has_unbounded_source(self):
     p = beam.Pipeline(interactive_runner.InteractiveRunner())
     ie.current_env().set_cache_manager(InMemoryCache(), p)
     _ = p | 'ReadUnboundedSource' >> beam.io.ReadFromPubSub(
         subscription='projects/fake-project/subscriptions/fake_sub')
     self.assertTrue(instr.has_unbounded_sources(p))