def test_not_has_unbounded_source(self): p = beam.Pipeline(interactive_runner.InteractiveRunner()) ie.current_env().set_cache_manager(InMemoryCache(), p) with tempfile.NamedTemporaryFile(delete=False) as f: f.write(b'test') _ = p | 'ReadBoundedSource' >> beam.io.ReadFromText(f.name) self.assertFalse(instr.has_unbounded_sources(p))
def has_source_to_cache(user_pipeline): """Determines if a user-defined pipeline contains any source that need to be cached.""" from apache_beam.runners.interactive import pipeline_instrument as instr # TODO(BEAM-8335): we temporarily only cache replaceable unbounded sources. # Add logic for other cacheable sources here when they are available. return instr.has_unbounded_sources(user_pipeline)
def has_source_to_cache(user_pipeline): """Determines if a user-defined pipeline contains any source that need to be cached. If so, also immediately wrap current cache manager held by current interactive environment into a streaming cache if this has not been done. The wrapping doesn't invalidate existing cache in any way. This can help determining if a background source recording job is needed to write cache for sources and if a test stream service is needed to serve the cache. Throughout the check, if source-to-cache has changed from the last check, it also cleans up the invalidated cache early on. """ from apache_beam.runners.interactive import pipeline_instrument as instr # TODO(BEAM-8335): we temporarily only cache replaceable unbounded sources. # Add logic for other cacheable sources here when they are available. has_cache = instr.has_unbounded_sources(user_pipeline) if has_cache: if not isinstance( ie.current_env().get_cache_manager(user_pipeline, create_if_absent=True), streaming_cache.StreamingCache): ie.current_env().set_cache_manager( streaming_cache.StreamingCache( ie.current_env().get_cache_manager( user_pipeline)._cache_dir, is_cache_complete=is_cache_complete, sample_resolution_sec=1.0), user_pipeline) return has_cache
def has_source_to_cache(user_pipeline): """Determines if a user-defined pipeline contains any source that need to be cached. If so, also immediately wrap current cache manager held by current interactive environment into a streaming cache if this has not been done. The wrapping doesn't invalidate existing cache in any way. This can help determining if a background caching job is needed to write cache for sources and if a test stream service is needed to serve the cache. Throughout the check, if source-to-cache has changed from the last check, it also cleans up the invalidated cache early on. """ from apache_beam.runners.interactive import pipeline_instrument as instr # TODO(BEAM-8335): we temporarily only cache replaceable unbounded sources. # Add logic for other cacheable sources here when they are available. has_cache = instr.has_unbounded_sources(user_pipeline) if has_cache: if not isinstance(ie.current_env().cache_manager(), streaming_cache.StreamingCache): # TODO(BEAM-8335): convert the cache manager into a streaming cache # manager. Note this does not invalidate the current cache including the # source data capture. pass return has_cache
def test_not_has_unbounded_source(self): p = beam.Pipeline(interactive_runner.InteractiveRunner()) with tempfile.NamedTemporaryFile(delete=False) as f: f.write(b'test') _ = p | 'ReadBoundedSource' >> beam.io.ReadFromText(f.name) self.assertFalse(instr.has_unbounded_sources(p))
def test_has_unbounded_source(self): p = beam.Pipeline(interactive_runner.InteractiveRunner()) _ = p | 'ReadUnboundedSource' >> beam.io.ReadFromPubSub( subscription='projects/fake-project/subscriptions/fake_sub') self.assertTrue(instr.has_unbounded_sources(p))
def test_has_unbounded_source(self): p = beam.Pipeline(interactive_runner.InteractiveRunner()) ie.current_env().set_cache_manager(InMemoryCache(), p) _ = p | 'ReadUnboundedSource' >> beam.io.ReadFromPubSub( subscription='projects/fake-project/subscriptions/fake_sub') self.assertTrue(instr.has_unbounded_sources(p))