예제 #1
0
파일: utils_test.py 프로젝트: mszb/beam
 def test_ipython_log_handler_not_double_registered(self):
     utils.register_ipython_log_handler()
     ipython_log_handlers = list(
         filter(lambda x: isinstance(x, utils.IPythonLogHandler), [
             handler for handler in self._interactive_root_logger.handlers
         ]))
     self.assertEqual(1, len(ipython_log_handlers))
예제 #2
0
    def __init__(self, cache_manager=None):
        self._cache_manager = cache_manager
        # Register a cleanup routine when kernel is restarted or terminated.
        if cache_manager:
            atexit.register(self.cleanup)
        # Holds class instances, module object, string of module names.
        self._watching_set = set()
        # Holds variables list of (Dict[str, object]).
        self._watching_dict_list = []
        # Holds results of main jobs as Dict[Pipeline, PipelineResult].
        # Each key is a pipeline instance defined by the end user. The
        # InteractiveRunner is responsible for populating this dictionary
        # implicitly.
        self._main_pipeline_results = {}
        # Holds background caching jobs as Dict[Pipeline, BackgroundCachingJob].
        # Each key is a pipeline instance defined by the end user. The
        # InteractiveRunner or its enclosing scope is responsible for populating
        # this dictionary implicitly when a background caching jobs is started.
        self._background_caching_jobs = {}
        # Holds TestStreamServiceControllers that controls gRPC servers serving
        # events as test stream of TestStreamPayload.Event.
        # Dict[Pipeline, TestStreamServiceController]. Each key is a pipeline
        # instance defined by the end user. The InteractiveRunner or its enclosing
        # scope is responsible for populating this dictionary implicitly when a new
        # controller is created to start a new gRPC server. The server stays alive
        # until a new background caching job is started thus invalidating everything
        # the gRPC server serves.
        self._test_stream_service_controllers = {}
        self._cached_source_signature = {}
        self._tracked_user_pipelines = set()
        # Tracks the computation completeness of PCollections. PCollections tracked
        # here don't need to be re-computed when data introspection is needed.
        self._computed_pcolls = set()
        # Always watch __main__ module.
        self.watch('__main__')
        # Do a warning level logging if current python version is below 3.6.
        if sys.version_info < (3, 6):
            self._is_py_version_ready = False
            _LOGGER.warning('Interactive Beam requires Python 3.5.3+.')
        else:
            self._is_py_version_ready = True
        # Check if [interactive] dependencies are installed.
        try:
            import IPython  # pylint: disable=unused-import
            import timeloop  # pylint: disable=unused-import
            from facets_overview.generic_feature_statistics_generator import GenericFeatureStatisticsGenerator  # pylint: disable=unused-import
            self._is_interactive_ready = True
        except ImportError:
            self._is_interactive_ready = False
            _LOGGER.warning(
                'Dependencies required for Interactive Beam PCollection '
                'visualization are not available, please use: `pip '
                'install apache-beam[interactive]` to install necessary '
                'dependencies to enable all data visualization features.')

        self._is_in_ipython = is_in_ipython()
        self._is_in_notebook = is_in_notebook()
        if not self._is_in_ipython:
            _LOGGER.warning(
                'You cannot use Interactive Beam features when you are '
                'not in an interactive environment such as a Jupyter '
                'notebook or ipython terminal.')
        if self._is_in_ipython and not self._is_in_notebook:
            _LOGGER.warning(
                'You have limited Interactive Beam features since your '
                'ipython kernel is not connected any notebook frontend.')
        if self._is_in_notebook:
            self.load_jquery_with_datatable()
            self.import_html_to_head([
                'https://raw.githubusercontent.com/PAIR-code/facets/1.0.0/facets-dist'
                '/facets-jupyter.html'
            ])
            register_ipython_log_handler()
예제 #3
0
파일: utils_test.py 프로젝트: mszb/beam
 def setUp(self):
     utils.register_ipython_log_handler()
     self._interactive_root_logger = logging.getLogger(
         'apache_beam.runners.interactive')
예제 #4
0
  def __init__(self):
    # Registers a cleanup routine when system exits.
    atexit.register(self.cleanup)
    # Holds cache managers that manage source recording and intermediate
    # PCollection cache for each pipeline. Each key is a stringified user
    # defined pipeline instance's id.
    self._cache_managers = {}
    # Holds RecordingManagers keyed by pipeline instance id.
    self._recording_managers = {}
    # Holds class instances, module object, string of module names.
    self._watching_set = set()
    # Holds variables list of (Dict[str, object]).
    self._watching_dict_list = []
    # Holds results of main jobs as Dict[str, PipelineResult].
    # Each key is a pipeline instance defined by the end user. The
    # InteractiveRunner is responsible for populating this dictionary
    # implicitly.
    self._main_pipeline_results = {}
    # Holds background caching jobs as Dict[str, BackgroundCachingJob].
    # Each key is a pipeline instance defined by the end user. The
    # InteractiveRunner or its enclosing scope is responsible for populating
    # this dictionary implicitly when a background caching jobs is started.
    self._background_caching_jobs = {}
    # Holds TestStreamServiceControllers that controls gRPC servers serving
    # events as test stream of TestStreamPayload.Event.
    # Dict[str, TestStreamServiceController]. Each key is a pipeline
    # instance defined by the end user. The InteractiveRunner or its enclosing
    # scope is responsible for populating this dictionary implicitly when a new
    # controller is created to start a new gRPC server. The server stays alive
    # until a new background caching job is started thus invalidating everything
    # the gRPC server serves.
    self._test_stream_service_controllers = {}
    self._cached_source_signature = {}
    self._tracked_user_pipelines = UserPipelineTracker()
    from apache_beam.runners.interactive.interactive_beam import clusters
    self.clusters = clusters

    # Tracks the computation completeness of PCollections. PCollections tracked
    # here don't need to be re-computed when data introspection is needed.
    self._computed_pcolls = set()
    # Always watch __main__ module.
    self.watch('__main__')
    # Check if [interactive] dependencies are installed.
    try:
      import IPython  # pylint: disable=unused-import
      import timeloop  # pylint: disable=unused-import
      from facets_overview.generic_feature_statistics_generator import GenericFeatureStatisticsGenerator  # pylint: disable=unused-import
      from google.cloud import dataproc_v1  # pylint: disable=unused-import
      self._is_interactive_ready = True
    except ImportError:
      self._is_interactive_ready = False
      _LOGGER.warning(
          'Dependencies required for Interactive Beam PCollection '
          'visualization are not available, please use: `pip '
          'install apache-beam[interactive]` to install necessary '
          'dependencies to enable all data visualization features.')

    self._is_in_ipython = is_in_ipython()
    self._is_in_notebook = is_in_notebook()
    if not self._is_in_ipython:
      _LOGGER.warning(
          'You cannot use Interactive Beam features when you are '
          'not in an interactive environment such as a Jupyter '
          'notebook or ipython terminal.')
    if self._is_in_ipython and not self._is_in_notebook:
      _LOGGER.warning(
          'You have limited Interactive Beam features since your '
          'ipython kernel is not connected to any notebook frontend.')
    if self._is_in_notebook:
      self.load_jquery_with_datatable()
      register_ipython_log_handler()

    # A singleton inspector instance to message information of current
    # environment to other applications.
    self._inspector = InteractiveEnvironmentInspector()
    # A similar singleton inspector except it includes synthetic variables
    # generated by Interactive Beam.
    self._inspector_with_synthetic = InteractiveEnvironmentInspector(
        ignore_synthetic=False)

    self.sql_chain = {}