Esempio n. 1
0
    def get_cache_manager(self, pipeline, create_if_absent=False):
        """Gets the cache manager held by current Interactive Environment for the
    given pipeline. If the pipeline is absent from the environment while
    create_if_absent is True, creates and returns a new file based cache
    manager for the pipeline."""
        if self._is_in_ipython:
            warnings.filterwarnings(
                'ignore',
                'options is deprecated since First stable release. References to '
                '<pipeline>.options will not be supported',
                category=DeprecationWarning)

        cache_manager = self._cache_managers.get(str(id(pipeline)), None)
        pipeline_runner = detect_pipeline_runner(pipeline)
        if not cache_manager and create_if_absent:
            cache_root = self.options.cache_root
            if cache_root:
                if cache_root.startswith('gs://'):
                    cache_dir = self._get_gcs_cache_dir(pipeline, cache_root)
                else:
                    cache_dir = tempfile.mkdtemp(dir=cache_root)
                    if not isinstance(pipeline_runner,
                                      direct_runner.DirectRunner):
                        _LOGGER.warning(
                            'A local cache directory has been specified while '
                            'not using DirectRunner. It is recommended to cache into a '
                            'GCS bucket instead.')
            else:
                staging_location = pipeline.options.get_all_options(
                )['staging_location']
                if isinstance(pipeline_runner,
                              DataflowRunner) and staging_location:
                    cache_dir = self._get_gcs_cache_dir(
                        pipeline, staging_location)
                    _LOGGER.info(
                        'No cache_root detected. '
                        'Defaulting to staging_location %s for cache location.',
                        staging_location)
                else:
                    cache_dir = tempfile.mkdtemp(suffix=str(id(pipeline)),
                                                 prefix='it-',
                                                 dir=os.environ.get(
                                                     'TEST_TMPDIR', None))
            cache_manager = cache.FileBasedCacheManager(cache_dir)
            self._cache_managers[str(id(pipeline))] = cache_manager
        return cache_manager
Esempio n. 2
0
 def test_detect_pipeline_no_runner(self):
     pipeline_runner = utils.detect_pipeline_runner(None)
     self.assertEqual(pipeline_runner, None)
Esempio n. 3
0
 def test_detect_pipeline_no_underlying_runner(self):
     p = beam.Pipeline(InteractiveRunner())
     pipeline_runner = utils.detect_pipeline_runner(p)
     from apache_beam.runners.direct.direct_runner import DirectRunner
     self.assertTrue(isinstance(pipeline_runner, DirectRunner))
Esempio n. 4
0
 def test_detect_pipeline_underlying_runner(self):
     p = beam.Pipeline(InteractiveRunner(underlying_runner=FlinkRunner()))
     pipeline_runner = utils.detect_pipeline_runner(p)
     self.assertTrue(isinstance(pipeline_runner, FlinkRunner))