def test_ipython_log_handler_not_double_registered(self): utils.register_ipython_log_handler() ipython_log_handlers = list( filter(lambda x: isinstance(x, utils.IPythonLogHandler), [ handler for handler in self._interactive_root_logger.handlers ])) self.assertEqual(1, len(ipython_log_handlers))
def __init__(self, cache_manager=None): self._cache_manager = cache_manager # Register a cleanup routine when kernel is restarted or terminated. if cache_manager: atexit.register(self.cleanup) # Holds class instances, module object, string of module names. self._watching_set = set() # Holds variables list of (Dict[str, object]). self._watching_dict_list = [] # Holds results of main jobs as Dict[Pipeline, PipelineResult]. # Each key is a pipeline instance defined by the end user. The # InteractiveRunner is responsible for populating this dictionary # implicitly. self._main_pipeline_results = {} # Holds background caching jobs as Dict[Pipeline, BackgroundCachingJob]. # Each key is a pipeline instance defined by the end user. The # InteractiveRunner or its enclosing scope is responsible for populating # this dictionary implicitly when a background caching jobs is started. self._background_caching_jobs = {} # Holds TestStreamServiceControllers that controls gRPC servers serving # events as test stream of TestStreamPayload.Event. # Dict[Pipeline, TestStreamServiceController]. Each key is a pipeline # instance defined by the end user. The InteractiveRunner or its enclosing # scope is responsible for populating this dictionary implicitly when a new # controller is created to start a new gRPC server. The server stays alive # until a new background caching job is started thus invalidating everything # the gRPC server serves. self._test_stream_service_controllers = {} self._cached_source_signature = {} self._tracked_user_pipelines = set() # Tracks the computation completeness of PCollections. PCollections tracked # here don't need to be re-computed when data introspection is needed. self._computed_pcolls = set() # Always watch __main__ module. self.watch('__main__') # Do a warning level logging if current python version is below 3.6. if sys.version_info < (3, 6): self._is_py_version_ready = False _LOGGER.warning('Interactive Beam requires Python 3.5.3+.') else: self._is_py_version_ready = True # Check if [interactive] dependencies are installed. try: import IPython # pylint: disable=unused-import import timeloop # pylint: disable=unused-import from facets_overview.generic_feature_statistics_generator import GenericFeatureStatisticsGenerator # pylint: disable=unused-import self._is_interactive_ready = True except ImportError: self._is_interactive_ready = False _LOGGER.warning( 'Dependencies required for Interactive Beam PCollection ' 'visualization are not available, please use: `pip ' 'install apache-beam[interactive]` to install necessary ' 'dependencies to enable all data visualization features.') self._is_in_ipython = is_in_ipython() self._is_in_notebook = is_in_notebook() if not self._is_in_ipython: _LOGGER.warning( 'You cannot use Interactive Beam features when you are ' 'not in an interactive environment such as a Jupyter ' 'notebook or ipython terminal.') if self._is_in_ipython and not self._is_in_notebook: _LOGGER.warning( 'You have limited Interactive Beam features since your ' 'ipython kernel is not connected any notebook frontend.') if self._is_in_notebook: self.load_jquery_with_datatable() self.import_html_to_head([ 'https://raw.githubusercontent.com/PAIR-code/facets/1.0.0/facets-dist' '/facets-jupyter.html' ]) register_ipython_log_handler()
def setUp(self): utils.register_ipython_log_handler() self._interactive_root_logger = logging.getLogger( 'apache_beam.runners.interactive')
def __init__(self): # Registers a cleanup routine when system exits. atexit.register(self.cleanup) # Holds cache managers that manage source recording and intermediate # PCollection cache for each pipeline. Each key is a stringified user # defined pipeline instance's id. self._cache_managers = {} # Holds RecordingManagers keyed by pipeline instance id. self._recording_managers = {} # Holds class instances, module object, string of module names. self._watching_set = set() # Holds variables list of (Dict[str, object]). self._watching_dict_list = [] # Holds results of main jobs as Dict[str, PipelineResult]. # Each key is a pipeline instance defined by the end user. The # InteractiveRunner is responsible for populating this dictionary # implicitly. self._main_pipeline_results = {} # Holds background caching jobs as Dict[str, BackgroundCachingJob]. # Each key is a pipeline instance defined by the end user. The # InteractiveRunner or its enclosing scope is responsible for populating # this dictionary implicitly when a background caching jobs is started. self._background_caching_jobs = {} # Holds TestStreamServiceControllers that controls gRPC servers serving # events as test stream of TestStreamPayload.Event. # Dict[str, TestStreamServiceController]. Each key is a pipeline # instance defined by the end user. The InteractiveRunner or its enclosing # scope is responsible for populating this dictionary implicitly when a new # controller is created to start a new gRPC server. The server stays alive # until a new background caching job is started thus invalidating everything # the gRPC server serves. self._test_stream_service_controllers = {} self._cached_source_signature = {} self._tracked_user_pipelines = UserPipelineTracker() from apache_beam.runners.interactive.interactive_beam import clusters self.clusters = clusters # Tracks the computation completeness of PCollections. PCollections tracked # here don't need to be re-computed when data introspection is needed. self._computed_pcolls = set() # Always watch __main__ module. self.watch('__main__') # Check if [interactive] dependencies are installed. try: import IPython # pylint: disable=unused-import import timeloop # pylint: disable=unused-import from facets_overview.generic_feature_statistics_generator import GenericFeatureStatisticsGenerator # pylint: disable=unused-import from google.cloud import dataproc_v1 # pylint: disable=unused-import self._is_interactive_ready = True except ImportError: self._is_interactive_ready = False _LOGGER.warning( 'Dependencies required for Interactive Beam PCollection ' 'visualization are not available, please use: `pip ' 'install apache-beam[interactive]` to install necessary ' 'dependencies to enable all data visualization features.') self._is_in_ipython = is_in_ipython() self._is_in_notebook = is_in_notebook() if not self._is_in_ipython: _LOGGER.warning( 'You cannot use Interactive Beam features when you are ' 'not in an interactive environment such as a Jupyter ' 'notebook or ipython terminal.') if self._is_in_ipython and not self._is_in_notebook: _LOGGER.warning( 'You have limited Interactive Beam features since your ' 'ipython kernel is not connected to any notebook frontend.') if self._is_in_notebook: self.load_jquery_with_datatable() register_ipython_log_handler() # A singleton inspector instance to message information of current # environment to other applications. self._inspector = InteractiveEnvironmentInspector() # A similar singleton inspector except it includes synthetic variables # generated by Interactive Beam. self._inspector_with_synthetic = InteractiveEnvironmentInspector( ignore_synthetic=False) self.sql_chain = {}