class Foo(Configurable): i = Integer(0, help="The integer i.").tag(config=True) j = Integer(1, help="The integer j.").tag(config=True) name = Unicode(u'Brian', help="First name.").tag(config=True) la = List([]).tag(config=True) fdict = Dict().tag(config=True, multiplicity='+')
class DefaultConfigurable(Configurable): a = Integer(config=True) def _config_default(self): if SomeSingleton.initialized(): return SomeSingleton.instance().config return Config()
class Bar(Configurable): b = Integer(0, help="The integer b.").tag(config=True) enabled = Bool(True, help="Enable bar.").tag(config=True) tb = Tuple(()).tag(config=True, multiplicity='*') aset = Set().tag(config=True, multiplicity='+') bdict = Dict().tag(config=True)
class CookaApp(Application): server_port = Integer(8000).tag(config=True) language = Unicode("use_client").tag(config=True) data_directory = Unicode("~/cooka").tag(config=True) notebook_portal = Unicode(_get_default_notebook_portal()).tag(config=True) optimize_metric = Dict( per_key_traits={ "multi_classification_optimize": Unicode("accuracy").tag(config=True), "binary_classification": Unicode("auc").tag(config=True), "regression": Unicode("rmse").tag(config=True) } ).tag(config=True) max_trials = Dict( per_key_traits={ "performance": Integer(50), "quick": Integer(10), "minimal": Integer(1) } ).tag(config=True)
class Foo(Configurable): a = Integer(0, help="The integer a.").tag(config=True) b = Unicode('nope').tag(config=True) flist = List([]).tag(config=True) fdict = Dict().tag(config=True)
class A(LoggingConfigurable): foo = Integer(config=True) bar = Integer(config=True) baz = Integer(config=True)
class MyConfigurable(Configurable): a = Integer(1, help="The integer a.").tag(config=True) b = Float(1.0, help="The integer b.").tag(config=True) c = Unicode('no config')
class IPKernelApp(BaseIPythonApplication, InteractiveShellApp, ConnectionFileMixin): name = "ipython-kernel" aliases = Dict(kernel_aliases) flags = Dict(kernel_flags) classes = [IPythonKernel, ZMQInteractiveShell, ProfileDir, Session] # the kernel class, as an importstring kernel_class = Type( "ipykernel.ipkernel.IPythonKernel", klass="ipykernel.kernelbase.Kernel", help="""The Kernel subclass to be used. This should allow easy re-use of the IPKernelApp entry point to configure and launch kernels other than IPython's own. """, ).tag(config=True) kernel = Any() poller = Any() # don't restrict this even though current pollers are all Threads heartbeat = Instance(Heartbeat, allow_none=True) context = Any() shell_socket = Any() control_socket = Any() debugpy_socket = Any() debug_shell_socket = Any() stdin_socket = Any() iopub_socket = Any() iopub_thread = Any() control_thread = Any() _ports = Dict() subcommands = { "install": ( "ipykernel.kernelspec.InstallIPythonKernelSpecApp", "Install the IPython kernel", ), } # connection info: connection_dir = Unicode() @default("connection_dir") def _default_connection_dir(self): return jupyter_runtime_dir() @property def abs_connection_file(self): if os.path.basename(self.connection_file) == self.connection_file: return os.path.join(self.connection_dir, self.connection_file) else: return self.connection_file # streams, etc. no_stdout = Bool(False, help="redirect stdout to the null device").tag(config=True) no_stderr = Bool(False, help="redirect stderr to the null device").tag(config=True) trio_loop = Bool(False, help="Set main event loop.").tag(config=True) quiet = Bool(True, help="Only send stdout/stderr to output stream").tag(config=True) outstream_class = DottedObjectName( "ipykernel.iostream.OutStream", help="The importstring for the OutStream factory" ).tag(config=True) displayhook_class = DottedObjectName( "ipykernel.displayhook.ZMQDisplayHook", help="The importstring for the DisplayHook factory" ).tag(config=True) capture_fd_output = Bool( True, help="""Attempt to capture and forward low-level output, e.g. produced by Extension libraries. """, ).tag(config=True) # polling parent_handle = Integer( int(os.environ.get("JPY_PARENT_PID") or 0), help="""kill this process if its parent dies. On Windows, the argument specifies the HANDLE of the parent process, otherwise it is simply boolean. """, ).tag(config=True) interrupt = Integer( int(os.environ.get("JPY_INTERRUPT_EVENT") or 0), help="""ONLY USED ON WINDOWS Interrupt this process when the parent is signaled. """, ).tag(config=True) def init_crash_handler(self): sys.excepthook = self.excepthook def excepthook(self, etype, evalue, tb): # write uncaught traceback to 'real' stderr, not zmq-forwarder traceback.print_exception(etype, evalue, tb, file=sys.__stderr__) def init_poller(self): if sys.platform == "win32": if self.interrupt or self.parent_handle: self.poller = ParentPollerWindows(self.interrupt, self.parent_handle) elif self.parent_handle and self.parent_handle != 1: # PID 1 (init) is special and will never go away, # only be reassigned. # Parent polling doesn't work if ppid == 1 to start with. self.poller = ParentPollerUnix() def _try_bind_socket(self, s, port): iface = "%s://%s" % (self.transport, self.ip) if self.transport == "tcp": if port <= 0: port = s.bind_to_random_port(iface) else: s.bind("tcp://%s:%i" % (self.ip, port)) elif self.transport == "ipc": if port <= 0: port = 1 path = "%s-%i" % (self.ip, port) while os.path.exists(path): port = port + 1 path = "%s-%i" % (self.ip, port) else: path = "%s-%i" % (self.ip, port) s.bind("ipc://%s" % path) return port def _bind_socket(self, s, port): try: win_in_use = errno.WSAEADDRINUSE # type:ignore[attr-defined] except AttributeError: win_in_use = None # Try up to 100 times to bind a port when in conflict to avoid # infinite attempts in bad setups max_attempts = 1 if port else 100 for attempt in range(max_attempts): try: return self._try_bind_socket(s, port) except zmq.ZMQError as ze: # Raise if we have any error not related to socket binding if ze.errno != errno.EADDRINUSE and ze.errno != win_in_use: raise if attempt == max_attempts - 1: raise def write_connection_file(self): """write connection info to JSON file""" cf = self.abs_connection_file self.log.debug("Writing connection file: %s", cf) write_connection_file( cf, ip=self.ip, key=self.session.key, transport=self.transport, shell_port=self.shell_port, stdin_port=self.stdin_port, hb_port=self.hb_port, iopub_port=self.iopub_port, control_port=self.control_port, ) def cleanup_connection_file(self): cf = self.abs_connection_file self.log.debug("Cleaning up connection file: %s", cf) try: os.remove(cf) except OSError: pass self.cleanup_ipc_files() def init_connection_file(self): if not self.connection_file: self.connection_file = "kernel-%s.json" % os.getpid() try: self.connection_file = filefind(self.connection_file, [".", self.connection_dir]) except OSError: self.log.debug("Connection file not found: %s", self.connection_file) # This means I own it, and I'll create it in this directory: os.makedirs(os.path.dirname(self.abs_connection_file), mode=0o700, exist_ok=True) # Also, I will clean it up: atexit.register(self.cleanup_connection_file) return try: self.load_connection_file() except Exception: self.log.error( "Failed to load connection file: %r", self.connection_file, exc_info=True ) self.exit(1) def init_sockets(self): # Create a context, a session, and the kernel sockets. self.log.info("Starting the kernel at pid: %i", os.getpid()) assert self.context is None, "init_sockets cannot be called twice!" self.context = context = zmq.Context() atexit.register(self.close) self.shell_socket = context.socket(zmq.ROUTER) self.shell_socket.linger = 1000 self.shell_port = self._bind_socket(self.shell_socket, self.shell_port) self.log.debug("shell ROUTER Channel on port: %i" % self.shell_port) self.stdin_socket = context.socket(zmq.ROUTER) self.stdin_socket.linger = 1000 self.stdin_port = self._bind_socket(self.stdin_socket, self.stdin_port) self.log.debug("stdin ROUTER Channel on port: %i" % self.stdin_port) if hasattr(zmq, "ROUTER_HANDOVER"): # set router-handover to workaround zeromq reconnect problems # in certain rare circumstances # see ipython/ipykernel#270 and zeromq/libzmq#2892 self.shell_socket.router_handover = self.stdin_socket.router_handover = 1 self.init_control(context) self.init_iopub(context) def init_control(self, context): self.control_socket = context.socket(zmq.ROUTER) self.control_socket.linger = 1000 self.control_port = self._bind_socket(self.control_socket, self.control_port) self.log.debug("control ROUTER Channel on port: %i" % self.control_port) self.debugpy_socket = context.socket(zmq.STREAM) self.debugpy_socket.linger = 1000 self.debug_shell_socket = context.socket(zmq.DEALER) self.debug_shell_socket.linger = 1000 if self.shell_socket.getsockopt(zmq.LAST_ENDPOINT): self.debug_shell_socket.connect(self.shell_socket.getsockopt(zmq.LAST_ENDPOINT)) if hasattr(zmq, "ROUTER_HANDOVER"): # set router-handover to workaround zeromq reconnect problems # in certain rare circumstances # see ipython/ipykernel#270 and zeromq/libzmq#2892 self.control_socket.router_handover = 1 self.control_thread = ControlThread(daemon=True) def init_iopub(self, context): self.iopub_socket = context.socket(zmq.PUB) self.iopub_socket.linger = 1000 self.iopub_port = self._bind_socket(self.iopub_socket, self.iopub_port) self.log.debug("iopub PUB Channel on port: %i" % self.iopub_port) self.configure_tornado_logger() self.iopub_thread = IOPubThread(self.iopub_socket, pipe=True) self.iopub_thread.start() # backward-compat: wrap iopub socket API in background thread self.iopub_socket = self.iopub_thread.background_socket def init_heartbeat(self): """start the heart beating""" # heartbeat doesn't share context, because it mustn't be blocked # by the GIL, which is accessed by libzmq when freeing zero-copy messages hb_ctx = zmq.Context() self.heartbeat = Heartbeat(hb_ctx, (self.transport, self.ip, self.hb_port)) self.hb_port = self.heartbeat.port self.log.debug("Heartbeat REP Channel on port: %i" % self.hb_port) self.heartbeat.start() def close(self): """Close zmq sockets in an orderly fashion""" # un-capture IO before we start closing channels self.reset_io() self.log.info("Cleaning up sockets") if self.heartbeat: self.log.debug("Closing heartbeat channel") self.heartbeat.context.term() if self.iopub_thread: self.log.debug("Closing iopub channel") self.iopub_thread.stop() self.iopub_thread.close() if self.control_thread and self.control_thread.is_alive(): self.log.debug("Closing control thread") self.control_thread.stop() self.control_thread.join() if self.debugpy_socket and not self.debugpy_socket.closed: self.debugpy_socket.close() if self.debug_shell_socket and not self.debug_shell_socket.closed: self.debug_shell_socket.close() for channel in ("shell", "control", "stdin"): self.log.debug("Closing %s channel", channel) socket = getattr(self, channel + "_socket", None) if socket and not socket.closed: socket.close() self.log.debug("Terminating zmq context") self.context.term() self.log.debug("Terminated zmq context") def log_connection_info(self): """display connection info, and store ports""" basename = os.path.basename(self.connection_file) if ( basename == self.connection_file or os.path.dirname(self.connection_file) == self.connection_dir ): # use shortname tail = basename else: tail = self.connection_file lines = [ "To connect another client to this kernel, use:", " --existing %s" % tail, ] # log connection info # info-level, so often not shown. # frontends should use the %connect_info magic # to see the connection info for line in lines: self.log.info(line) # also raw print to the terminal if no parent_handle (`ipython kernel`) # unless log-level is CRITICAL (--quiet) if not self.parent_handle and self.log_level < logging.CRITICAL: print(_ctrl_c_message, file=sys.__stdout__) for line in lines: print(line, file=sys.__stdout__) self._ports = dict( shell=self.shell_port, iopub=self.iopub_port, stdin=self.stdin_port, hb=self.hb_port, control=self.control_port, ) def init_blackhole(self): """redirects stdout/stderr to devnull if necessary""" if self.no_stdout or self.no_stderr: blackhole = open(os.devnull, "w") if self.no_stdout: sys.stdout = sys.__stdout__ = blackhole if self.no_stderr: sys.stderr = sys.__stderr__ = blackhole def init_io(self): """Redirect input streams and set a display hook.""" if self.outstream_class: outstream_factory = import_item(str(self.outstream_class)) if sys.stdout is not None: sys.stdout.flush() e_stdout = None if self.quiet else sys.__stdout__ e_stderr = None if self.quiet else sys.__stderr__ if not self.capture_fd_output: outstream_factory = partial(outstream_factory, watchfd=False) sys.stdout = outstream_factory(self.session, self.iopub_thread, "stdout", echo=e_stdout) if sys.stderr is not None: sys.stderr.flush() sys.stderr = outstream_factory(self.session, self.iopub_thread, "stderr", echo=e_stderr) if hasattr(sys.stderr, "_original_stdstream_copy"): for handler in self.log.handlers: if isinstance(handler, StreamHandler) and (handler.stream.buffer.fileno() == 2): self.log.debug("Seeing logger to stderr, rerouting to raw filedescriptor.") handler.stream = TextIOWrapper( FileIO( sys.stderr._original_stdstream_copy, # type:ignore[attr-defined] "w", ) ) if self.displayhook_class: displayhook_factory = import_item(str(self.displayhook_class)) self.displayhook = displayhook_factory(self.session, self.iopub_socket) sys.displayhook = self.displayhook self.patch_io() def reset_io(self): """restore original io restores state after init_io """ sys.stdout = sys.__stdout__ sys.stderr = sys.__stderr__ sys.displayhook = sys.__displayhook__ def patch_io(self): """Patch important libraries that can't handle sys.stdout forwarding""" try: import faulthandler except ImportError: pass else: # Warning: this is a monkeypatch of `faulthandler.enable`, watch for possible # updates to the upstream API and update accordingly (up-to-date as of Python 3.5): # https://docs.python.org/3/library/faulthandler.html#faulthandler.enable # change default file to __stderr__ from forwarded stderr faulthandler_enable = faulthandler.enable def enable(file=sys.__stderr__, all_threads=True, **kwargs): return faulthandler_enable(file=file, all_threads=all_threads, **kwargs) faulthandler.enable = enable if hasattr(faulthandler, "register"): faulthandler_register = faulthandler.register def register(signum, file=sys.__stderr__, all_threads=True, chain=False, **kwargs): return faulthandler_register( signum, file=file, all_threads=all_threads, chain=chain, **kwargs ) faulthandler.register = register def init_signal(self): signal.signal(signal.SIGINT, signal.SIG_IGN) def init_kernel(self): """Create the Kernel object itself""" shell_stream = ZMQStream(self.shell_socket) control_stream = ZMQStream(self.control_socket, self.control_thread.io_loop) debugpy_stream = ZMQStream(self.debugpy_socket, self.control_thread.io_loop) self.control_thread.start() kernel_factory = self.kernel_class.instance kernel = kernel_factory( parent=self, session=self.session, control_stream=control_stream, debugpy_stream=debugpy_stream, debug_shell_socket=self.debug_shell_socket, shell_stream=shell_stream, control_thread=self.control_thread, iopub_thread=self.iopub_thread, iopub_socket=self.iopub_socket, stdin_socket=self.stdin_socket, log=self.log, profile_dir=self.profile_dir, user_ns=self.user_ns, ) kernel.record_ports({name + "_port": port for name, port in self._ports.items()}) self.kernel = kernel # Allow the displayhook to get the execution count self.displayhook.get_execution_count = lambda: kernel.execution_count def init_gui_pylab(self): """Enable GUI event loop integration, taking pylab into account.""" # Register inline backend as default # this is higher priority than matplotlibrc, # but lower priority than anything else (mpl.use() for instance). # This only affects matplotlib >= 1.5 if not os.environ.get("MPLBACKEND"): os.environ["MPLBACKEND"] = "module://matplotlib_inline.backend_inline" # Provide a wrapper for :meth:`InteractiveShellApp.init_gui_pylab` # to ensure that any exception is printed straight to stderr. # Normally _showtraceback associates the reply with an execution, # which means frontends will never draw it, as this exception # is not associated with any execute request. shell = self.shell assert shell is not None _showtraceback = shell._showtraceback try: # replace error-sending traceback with stderr def print_tb(etype, evalue, stb): print("GUI event loop or pylab initialization failed", file=sys.stderr) assert shell is not None print(shell.InteractiveTB.stb2text(stb), file=sys.stderr) shell._showtraceback = print_tb InteractiveShellApp.init_gui_pylab(self) finally: shell._showtraceback = _showtraceback def init_shell(self): self.shell = getattr(self.kernel, "shell", None) if self.shell: self.shell.configurables.append(self) def configure_tornado_logger(self): """Configure the tornado logging.Logger. Must set up the tornado logger or else tornado will call basicConfig for the root logger which makes the root logger go to the real sys.stderr instead of the capture streams. This function mimics the setup of logging.basicConfig. """ logger = logging.getLogger("tornado") handler = logging.StreamHandler() formatter = logging.Formatter(logging.BASIC_FORMAT) handler.setFormatter(formatter) logger.addHandler(handler) def _init_asyncio_patch(self): """set default asyncio policy to be compatible with tornado Tornado 6 (at least) is not compatible with the default asyncio implementation on Windows Pick the older SelectorEventLoopPolicy on Windows if the known-incompatible default policy is in use. Support for Proactor via a background thread is available in tornado 6.1, but it is still preferable to run the Selector in the main thread instead of the background. do this as early as possible to make it a low priority and overrideable ref: https://github.com/tornadoweb/tornado/issues/2608 FIXME: if/when tornado supports the defaults in asyncio without threads, remove and bump tornado requirement for py38. Most likely, this will mean a new Python version where asyncio.ProactorEventLoop supports add_reader and friends. """ if sys.platform.startswith("win") and sys.version_info >= (3, 8): import asyncio try: from asyncio import ( WindowsProactorEventLoopPolicy, WindowsSelectorEventLoopPolicy, ) except ImportError: pass # not affected else: if type(asyncio.get_event_loop_policy()) is WindowsProactorEventLoopPolicy: # WindowsProactorEventLoopPolicy is not compatible with tornado 6 # fallback to the pre-3.8 default of Selector asyncio.set_event_loop_policy(WindowsSelectorEventLoopPolicy()) def init_pdb(self): """Replace pdb with IPython's version that is interruptible. With the non-interruptible version, stopping pdb() locks up the kernel in a non-recoverable state. """ import pdb from IPython.core import debugger if hasattr(debugger, "InterruptiblePdb"): # Only available in newer IPython releases: debugger.Pdb = debugger.InterruptiblePdb pdb.Pdb = debugger.Pdb # type:ignore[misc] pdb.set_trace = debugger.set_trace @catch_config_error def initialize(self, argv=None): self._init_asyncio_patch() super().initialize(argv) if self.subapp is not None: return self.init_pdb() self.init_blackhole() self.init_connection_file() self.init_poller() self.init_sockets() self.init_heartbeat() # writing/displaying connection info must be *after* init_sockets/heartbeat self.write_connection_file() # Log connection info after writing connection file, so that the connection # file is definitely available at the time someone reads the log. self.log_connection_info() self.init_io() try: self.init_signal() except Exception: # Catch exception when initializing signal fails, eg when running the # kernel on a separate thread if self.log_level < logging.CRITICAL: self.log.error("Unable to initialize signal:", exc_info=True) self.init_kernel() # shell init steps self.init_path() self.init_shell() if self.shell: self.init_gui_pylab() self.init_extensions() self.init_code() # flush stdout/stderr, so that anything written to these streams during # initialization do not get associated with the first execution request sys.stdout.flush() sys.stderr.flush() def start(self): if self.subapp is not None: return self.subapp.start() if self.poller is not None: self.poller.start() self.kernel.start() self.io_loop = ioloop.IOLoop.current() if self.trio_loop: from ipykernel.trio_runner import TrioRunner tr = TrioRunner() tr.initialize(self.kernel, self.io_loop) try: tr.run() except KeyboardInterrupt: pass else: try: self.io_loop.start() except KeyboardInterrupt: pass
class Bar(Configurable): b = Integer(0, help="The integer b.").tag(config=True) enabled = Bool(True, help="Enable bar.").tag(config=True)
class ComponentCatalogConnector(LoggingConfigurable): """ Abstract class to model component_entry readers that can read components from different locations """ max_threads_default = 3 max_readers_env = "ELYRA_CATALOG_CONNECTOR_MAX_READERS" max_readers = Integer( max_threads_default, help="""Sets the maximum number of reader threads to be used to read catalog entries in parallel""", ).tag(config=True) @default("max_readers") def max_readers_default(self): max_reader_threads = ComponentCatalogConnector.max_threads_default try: max_reader_threads = int( os.getenv(self.max_readers_env, max_reader_threads)) except ValueError: self.log.info( f"Unable to parse environmental variable {self.max_readers_env}, " f"using the default value of {self.max_threads_default}") return max_reader_threads def __init__(self, file_types: List[str], **kwargs): super().__init__(**kwargs) self._file_types = file_types @abstractmethod def get_catalog_entries( self, catalog_metadata: Dict[str, Any]) -> List[Dict[str, Any]]: """ Returns a list of catalog_entry_data dictionary instances, one per entry in the given catalog. Each catalog_entry_data dictionary contains the information needed to access a single component definition. The form that each catalog_entry_data takes is determined by the unique requirements of the reader class. For example, the FilesystemCatalogConnector includes both a base directory ('base_dir') key-value pair and a relative path ('path') key-value pair in its 'catalog_entry_data' dict. Both fields are needed in order to access the corresponding definition in get_entry_data(). Every catalog_entry_data should contain each of the keys returned in get_hash_keys() to ensure uniqueness and portability among entries. For the same reason, no two catalog entries should have equivalent catalog_entry_data dictionaries. :param catalog_metadata: the dictionary form of the metadata associated with a single catalog; the general structure is given in the example below example: { "description": "...", # only present if a description is added "runtime_type": "...", # must be present "categories": ["category1", "category2", ...], # may be an empty array "your_property1": value1, "your_property2": value2, ... } :returns: a list of catalog entry dictionaries, each of which contains the information needed to access a component definition in get_entry_data() """ raise NotImplementedError( "abstract method 'get_catalog_entries()' must be implemented") @deprecated( deprecated_in="3.7.0", removed_in="4.0", current_version=__version__, details="Implement the get_entry_data function instead", ) def read_catalog_entry(self, catalog_entry_data: Dict[str, Any], catalog_metadata: Dict[str, Any]) -> Optional[str]: """ DEPRECATED. Will be removed in 4.0. get_entry_data() must be implemented instead. Reads a component definition for a single catalog entry using the catalog_entry_data returned from get_catalog_entries() and, if needed, the catalog metadata. :param catalog_entry_data: a dictionary that contains the information needed to read the content of the component definition; below is an example data structure returned from get_catalog_entries() example: { "directory_path": "/Users/path/to/directory", "relative_path": "subdir/file.py" } :param catalog_metadata: the metadata associated with the catalog in which this catalog entry is stored; this is the same dictionary that is passed into get_catalog_entries(); in addition to catalog_entry_data, catalog_metadata may also be needed to read the component definition for certain types of catalogs :returns: the content of the given catalog entry's definition in string form, if found, or None; if None is returned, this catalog entry is skipped and a warning message logged """ raise NotImplementedError( "abstract method 'read_catalog_entry()' must be implemented") def get_entry_data( self, catalog_entry_data: Dict[str, Any], catalog_metadata: Dict[str, Any]) -> Optional[EntryData]: """ Reads a component definition (and other information-of-interest) for a single catalog entry and creates an EntryData object to represent it. Uses the catalog_entry_data returned from get_catalog_entries() and, if needed, the catalog metadata to retrieve the definition. :param catalog_entry_data: a dictionary that contains the information needed to read the content of the component definition; below is an example data structure returned from get_catalog_entries() example: { "directory_path": "/Users/path/to/directory", "relative_path": "subdir/file.py" } :param catalog_metadata: the metadata associated with the catalog in which this catalog entry is stored; this is the same dictionary that is passed into get_catalog_entries(); in addition to catalog_entry_data, catalog_metadata may also be needed to read the component definition for certain types of catalogs :returns: an EntryData object representing the definition (and other identifying info) for a single catalog entry; if None is returned, this catalog entry is skipped and a warning message logged """ raise NotImplementedError( "method 'get_entry_data()' must be overridden") @classmethod def get_hash_keys(cls) -> List[Any]: """ Provides a list of keys, available in the 'catalog_entry_data' dictionary, whose values will be used to construct a unique hash id for each entry with the given catalog type. This function has been changed to a class method as of version 3.7. Connectors that still implement this function as an abstract method will be supported in a fallback scenario. Besides being a means to uniquely identify a single component (catalog entry), the hash id also enables pipeline portability across installations when the keys returned here are chosen strategically. For example, the FilesystemCatalogConnector includes both a base directory key-value pair and a relative path key-value pair in its 'catalog_entry_data' dict. Both fields are required to access the component definition in get_entry_data(), but only the relative path field is used to create the unique hash. This allows a component that has the same relative path defined in two separate a catalogs in two separate installations to resolve to the same unique id in each, and therefore to be portable across pipelines in these installations. To ensure the hash is unique, no two catalog entries can have the same key-value pairs over the set of keys returned by this function. If two entries resolve to the same hash, the one whose definition is read last will overwrite the other(s). Example: Given a set of keys ['key1', 'key2', 'key3'], the below two catalog_entry_data dictionaries will produce unique hashes. The same can not be said, however, if the set of keys returned is ['key2', 'key3']. component_entry_data for entry1: component_entry_data for entry2: { { 'key1': 'value1', 'key1': 'value4', 'key2': 'value2', 'key2': 'value2', 'key3': 'value3' 'key3': 'value3' } { Additionally, every catalog_entry_data dict should include each key in the set returned here. If this is not the case, a catalog entry's portability and uniqueness may be negatively affected. :returns: a list of keys """ raise NotImplementedError( "abstract method 'get_hash_keys()' must be implemented") def read_component_definitions( self, catalog_instance: Metadata) -> List[CatalogEntry]: """ This function compiles the definitions of all catalog entries in a given catalog. Catalog entry data is first retrieved for each entry in the given catalog. This data is added to a queue, and a number of reader threads ('max_reader' or fewer) are started. Each reader thread pulls the data for a singe catalog entry from the queue and uses it to read the definition associated with that entry. As a mutable object, the 'catalog_entry_map' provides a means to retrieve a return value for each thread. If a thread is able to successfully read the content of the given catalog entry, a unique hash is created for the entry and a mapping is added to the catalog_entry_map. The catalog_instance Metadata parameter will have the following attributes of interest in addition to a few additional attributes used internally: :param catalog_instance: the Metadata instance for this catalog; below is an example instance example: display_name: str = "Catalog Name" schema_name: str = "connector-type" metadata: Dict[str, Any] = { "description": "...", # only present if a description is added "runtime": "...", # must be present "categories": ["category1", "category2", ...], # may be an empty array "your_property1": value1, "your_property2": value2, ... } :returns: a mapping of a unique component ids to their definition and identifying data """ catalog_entry_q = Queue() catalog_entries: List[CatalogEntry] = [] try: # Retrieve list of keys that will be used to construct # the catalog entry hash for each entry in the catalog try: # Attempt to use get_hash_keys as class method (Elyra version 3.7+) keys_to_hash = ComponentCatalogConnector.get_hash_keys() except Exception: # Fall back to using abstract method (version 3.6 and earlier) keys_to_hash = self.get_hash_keys() # Add display_name attribute to the metadata dictionary catalog_metadata = deepcopy(catalog_instance.metadata) catalog_metadata["display_name"] = catalog_instance.display_name # Add catalog entry data dictionaries to the thread queue for entry in self.get_catalog_entries(catalog_metadata): catalog_entry_q.put_nowait(entry) except NotImplementedError as e: err_msg = f"{self.__class__.__name__} does not meet the requirements of a catalog connector class: {e}" self.log.error(err_msg) except Exception as e: err_msg = f"Could not get catalog entry information for catalog '{catalog_instance.display_name}': {e}" # Dump stack trace with error message self.log.exception(err_msg) def read_with_thread(): """ Gets a catalog entry data dictionary from the queue and attempts to read corresponding definition """ while not catalog_entry_q.empty(): try: # Pull a catalog entry dictionary from the queue catalog_entry_data = catalog_entry_q.get(timeout=0.1) except Empty: continue try: # Read the entry definition given its returned data and the catalog entry data self.log.debug( f"Attempting read of definition for catalog entry with identifying information: " f"{str(catalog_entry_data)}...") try: # Attempt to get an EntryData object from get_entry_data first entry_data: EntryData = self.get_entry_data( catalog_entry_data=catalog_entry_data, catalog_metadata=catalog_metadata) except NotImplementedError: # Connector class does not implement get_catalog_definition and we must # manually coerce this entry's returned values into a EntryData object definition = self.read_catalog_entry( catalog_entry_data=catalog_entry_data, catalog_metadata=catalog_metadata) entry_data: EntryData = EntryData( definition=definition) # Ignore this entry if no definition content is returned if not entry_data or not entry_data.definition: self.log.warning( f"No definition content found for catalog entry with identifying information: " f"{catalog_entry_data}. Skipping...") catalog_entry_q.task_done() continue # Create a CatalogEntry object with the returned EntryData and other # necessary information from the catalog instance and connector class catalog_entry = CatalogEntry( entry_data=entry_data, entry_reference=catalog_entry_data, catalog_instance=catalog_instance, hash_keys=keys_to_hash, ) catalog_entries.append(catalog_entry) except NotImplementedError as e: msg = f"{self.__class__.__name__} does not meet the requirements of a catalog connector class: {e}." self.log.error(msg) except Exception as e: # Dump stack trace with error message and continue self.log.exception( f"Could not read definition for catalog entry with identifying information: " f"{str(catalog_entry_data)}: {e}") # Mark this thread's read as complete catalog_entry_q.task_done() # Start 'max_reader' reader threads if catalog includes more than 'max_reader' # number of catalog entries, else start one thread per entry num_threads = min(catalog_entry_q.qsize(), self.max_readers) for i in range(num_threads): Thread(target=read_with_thread).start() # Wait for all queued entries to be processed catalog_entry_q.join() return catalog_entries
class Viewer(ConfigurableMoveableTabContainer): """ Contains multiple TabbedViewingAreas """ tab_titles = Signal([tuple]) num_viewing_areas = Integer(2, config=True) def __init__(self, *args, menuBar, **kwargs): super().__init__(*args, **kwargs) self._run_to_tabs = collections.defaultdict(list) self._title_to_tab = {} self._tabs_from_streaming = [] self._overplot = OverPlotState.individual_tab self._overplot_target = None self._live_enabled = False self._live_run_router = RunRouter([self.route_live_stream]) self._containers = [ TabbedViewingArea(viewer=self, menuBar=menuBar) for _ in range(self.num_viewing_areas) ] layout = QVBoxLayout() splitter = QSplitter(Qt.Vertical) layout.addWidget(splitter) for container in self._containers: splitter.addWidget(container) self.setLayout(layout) overplot_group = QActionGroup(self) self.off = QAction('&Off', self) self.off.setStatusTip('Drop streaming data.') self.individual_tab = QAction('&New Tab', self) self.individual_tab.setStatusTip('Open a new viewer tab for each Run.') self.latest_live = QAction('&Latest Live Tab', self) self.latest_live.setStatusTip( 'Attempt to overplot on the most recent live Run.') self.fixed = QAction('&Fixed Tab...', self) self.fixed.setStatusTip('Attempt to overplot on a specific tab.') self.fixed.setEnabled(False) overplot_group.addAction(self.off) overplot_group.addAction(self.individual_tab) overplot_group.addAction(self.latest_live) overplot_group.addAction(self.fixed) for action in overplot_group.actions(): action.setCheckable(True) overplot_group.setExclusive(True) self.off.setChecked(True) overplot_menu = menuBar().addMenu('&Streaming') overplot_menu.addActions(overplot_group.actions()) self.off.triggered.connect(self.disable_live) self.individual_tab.triggered.connect( partial(self.set_overplot_state, OverPlotState.individual_tab)) self.latest_live.triggered.connect( partial(self.set_overplot_state, OverPlotState.latest_live)) def set_overplot_target(): item, ok = QInputDialog.getItem(self, "Select Tab", "Tab", tuple(self._title_to_tab), 0, False) if not ok: # Abort and fallback to Off. Would be better to fall back to # previous state (which could be latest_live) but it's not # clear how to know what that state was. self.off.setChecked(True) return self.set_overplot_state(OverPlotState.fixed) self._overplot_target = item self.fixed.triggered.connect(set_overplot_target) def enable_live(self): self._live_enabled = True def disable_live(self): self._live_enabled = False def consumer(self, item): """Slot that receives (name, doc) and unpacks it into RunRouter.""" self._live_run_router(*item) def route_live_stream(self, name, start_doc): """Create or choose a Viewer to receive this Run.""" if not self._live_enabled: log.debug("Streaming Run ignored because Streaming is disabled.") return [], [] self.fixed.setEnabled(True) target_area = self._containers[0] uid = start_doc['uid'] if self._overplot == OverPlotState.individual_tab: viewer = RunViewer() tab_title = uid[:8] index = target_area.addTab(viewer, tab_title) self._title_to_tab[tab_title] = viewer self._tabs_from_streaming.append(viewer) target_area.setCurrentIndex(index) self.tab_titles.emit(tuple(self._title_to_tab)) elif self._overplot == OverPlotState.fixed: viewer = self._title_to_tab[self._overplot_target] elif self._overplot == OverPlotState.latest_live: if self._tabs_from_streaming: viewer = self._tabs_from_streaming[-1] else: viewer = RunViewer() tab_title = uid[:8] index = target_area.addTab(viewer, tab_title) self._title_to_tab[tab_title] = viewer self._tabs_from_streaming.append(viewer) target_area.setCurrentIndex(index) self.tab_titles.emit(tuple(self._title_to_tab)) self._run_to_tabs[uid].append(viewer) viewer.run_router('start', start_doc) return [viewer.run_router], [] def show_entries(self, target, entries): self.fixed.setEnabled(True) target_area = self._containers[0] if not target: # Add new Viewer tab. viewer = RunViewer() if len(entries) == 1: entry, = entries uid = entry.describe()['metadata']['start']['uid'] tab_title = uid[:8] else: tab_title = self.get_title() index = target_area.addTab(viewer, tab_title) self._title_to_tab[tab_title] = viewer target_area.setCurrentIndex(index) self.tab_titles.emit(tuple(self._title_to_tab)) else: viewer = self._title_to_tab[target] for entry in entries: viewer.load_entry(entry) uid = entry.describe()['metadata']['start']['uid'] self._run_to_tabs[uid].append(viewer) # TODO Make last entry in the list the current widget. def get_title(self): for i in itertools.count(1): title = f'Group {i}' if title in self._title_to_tab: continue return title def set_overplot_state(self, state): self.enable_live() log.debug('Overplot state is %s', state) self._overplot = state def close_run_viewer(self, widget): try: self._tabs_from_streaming.remove(widget) except ValueError: pass for uid in widget.uids: self._run_to_tabs[uid].remove(widget) for title, tab in list(self._title_to_tab.items()): if tab == widget: del self._title_to_tab[title] self.tab_titles.emit(tuple(self._title_to_tab)) if title == self._overplot_target: self.set_overplot_state(OverPlotState.off) if not self._title_to_tab: self.fixed.setEnabled(False)
class Foo(Configurable): i = Integer(0, help="The integer i.").tag(config=True) j = Integer(1, help="The integer j.").tag(config=True) name = Unicode(u'Brian', help="First name.").tag(config=True)
class FirstUseAuthenticator(Authenticator): """ JupyterHub authenticator that lets users set password on first use. """ dbm_path = Unicode('passwords.dbm', config=True, help=""" Path to store the db file with username / pwd hash in """) create_users = Bool(True, config=True, help=""" Create users if they do not exist already. When set to false, users would have to be explicitly created before they can log in. Users can be created via the admin panel or by setting whitelist / admin list. """) min_password_length = Integer(7, config=True, help=""" The minimum length of the password when user is created. When set to 0, users will be allowed to set 0 length passwords. """) check_passwords_on_startup = Bool( True, config=True, help=""" Check for non-normalized-username passwords on startup. Prior to 1.0, multiple passwords could be set for the same username, without normalization. When True, duplicate usernames will be detected and removed, and ensure all usernames are normalized. If any duplicates are found, a backup of the original is created, which can be inspected manually. Typically, this will only need to run once. """, ) def __init__(self, **kwargs): super().__init__(**kwargs) if self.check_passwords_on_startup: self._check_passwords() def _check_passwords(self): """Validation checks on the password database at startup Mainly checks for the presence of passwords for non-normalized usernames If a username is present only in one non-normalized form, it will be renamed to the normalized form. If multiple forms of the same normalized username are present, ensure that at least the normalized form is also present. It will continue to produce warnings until manual intervention removes the non-normalized entries. Non-normalized entries will never be used during login. """ # it's nontrival to check for db existence, because there are so many extensions # and you don't give dbm a path, you give it a *base* name, # which may point to one or more paths. # There's no way to retrieve the actual path(s) for a db dbm_extensions = ("", ".db", ".pag", ".dir", ".dat", ".bak") dbm_files = list( filter(os.path.isfile, (self.dbm_path + ext for ext in dbm_extensions))) if not dbm_files: # no database, nothing to do return backup_path = self.dbm_path + "-backup" backup_files = list( filter(os.path.isfile, (backup_path + ext for ext in dbm_extensions))) collision_warning = ( f"Duplicate password entries have been found, and stored in {backup_path!r}." f" Duplicate entries have been removed from {self.dbm_path!r}." f" If you are happy with the solution, you can delete the backup file(s): {' '.join(backup_files)}." " Or you can inspect the backup database with:\n" " import dbm\n" f" with dbm.open({backup_path!r}, 'r') as db:\n" " for username in db.keys():\n" " print(username, db[username])\n") if backup_files: self.log.warning(collision_warning) return # create a temporary backup of the passwords db # to be retained only if collisions are detected # or deleted if no collisions are detected backup_files = [] for path in dbm_files: base, ext = os.path.splitext(path) if ext not in dbm_extensions: # catch weird names with '.' and no .db extension base = path ext = "" backup = f"{base}-backup{ext}" shutil.copyfile(path, backup) backup_files.append(backup) collision_found = False with dbm.open(self.dbm_path, "w") as db: # load the username:hashed_password dict passwords = {} for key in db.keys(): passwords[key.decode("utf8")] = db[key] # normalization map # compute the full map before checking in case two non-normalized forms are used # keys are normalized usernames, # values are lists of all names present in the db # which normalize to the same user normalized_usernames = {} for username in passwords: normalized_username = self.normalize_username(username) normalized_usernames.setdefault(normalized_username, []).append(username) # check if any non-normalized usernames are in the db for normalized_username, usernames in normalized_usernames.items(): # case 1. only one form, make sure it's stored in the normalized username if len(usernames) == 1: username = usernames[0] # case 1.a only normalized form, nothing to do if username == normalized_username: continue # 1.b only one form, not normalized. Unambiguous to fix. # move password from non-normalized to normalized. self.log.warning( f"Normalizing username in password db {username}->{normalized_username}" ) db[normalized_username.encode( "utf8")] = passwords[username] del db[username] else: # collision! Multiple passwords for the same Hub user with different normalization # do not clear these automatically because the 'right' answer is ambiguous, # but make sure the normalized_username is set, # so that after upgrade, there is always a password set # the non-normalized username passwords will never be used # after jupyterhub-firstuseauthenticator 1.0 self.log.warning( f"{len(usernames)} variations of the username {normalized_username} present in password database: {usernames}." f" Only the password stored for the normalized {normalized_username} will be used." ) collision_found = True if normalized_username not in passwords: # we choose usernames[0] as most likely to be the first entry # this isn't guaranteed, but it's the best information we have username = usernames[0] self.log.warning( f"Normalizing username in password db {username}->{normalized_username}" ) db[normalized_username.encode( "utf8")] = passwords[username] for username in usernames: if username != normalized_username: self.log.warning( f"Removing un-normalized username from password db {username}" ) del db[username] if collision_found: self.log.warning(collision_warning) else: # remove backup files, if we didn't find anything to backup self.log.debug( f"No collisions found, removing backup files {backup_files}") for path in backup_files: try: os.remove(path) except FileNotFoundError: pass def _user_exists(self, username): """ Return true if given user already exists. Note: Depends on internal details of JupyterHub that might change across versions. Tested with v0.9 """ return self.db.query(User).filter_by(name=username).first() is not None def _validate_password(self, password): return len(password) >= self.min_password_length def validate_username(self, name): invalid_chars = [',', ' '] if any((char in name) for char in invalid_chars): return False return super().validate_username(name) async def authenticate(self, handler, data): username = self.normalize_username(data["username"]) password = data["password"] if not self.create_users: if not self._user_exists(username): return None with dbm.open(self.dbm_path, 'c', 0o600) as db: stored_pw = db.get(username.encode("utf8"), None) if stored_pw is not None: # for existing passwords: ensure password hash match if bcrypt.hashpw(password.encode("utf8"), stored_pw) != stored_pw: return None else: # for new users: ensure password validity and store password hash if not self._validate_password(password): handler.custom_login_error = ( 'Password too short! Please choose a password at least %d characters long.' % self.min_password_length) self.log.error(handler.custom_login_error) return None db[username] = bcrypt.hashpw(password.encode("utf8"), bcrypt.gensalt()) return username def delete_user(self, user): """ When user is deleted, remove their entry from password db. This lets passwords be reset by deleting users. """ try: with dbm.open(self.dbm_path, 'c', 0o600) as db: del db[user.name] except KeyError: pass def reset_password(self, username, new_password): """ This allows changing the password of a logged user. """ if not self._validate_password(new_password): login_err = ( 'Password too short! Please choose a password at least %d characters long.' % self.min_password_length) self.log.error(login_err) # Resetting the password will fail if the new password is too short. return login_err with dbm.open(self.dbm_path, "c", 0o600) as db: db[username] = bcrypt.hashpw(new_password.encode("utf8"), bcrypt.gensalt()) login_msg = "Your password has been changed successfully!" self.log.info(login_msg) return login_msg def get_handlers(self, app): return [ (r"/login", CustomLoginHandler), (r"/auth/change-password", ResetPasswordHandler), ]
class Foo(Configurable): a = Integer(0, help="The integer a.").tag(config=True) b = Unicode('nope').tag(config=True)
class FirstUseAuthenticator(Authenticator): """ JupyterHub authenticator that lets users set password on first use. """ dbm_path = Unicode('passwords.dbm', config=True, help=""" Path to store the db file with username / pwd hash in """) create_users = Bool(True, config=True, help=""" Create users if they do not exist already. When set to false, users would have to be explicitly created before they can log in. Users can be created via the admin panel or by setting whitelist / admin list. """) min_password_length = Integer(7, config=True, help=""" The minimum length of the password when user is created. When set to 0, users will be allowed to set 0 length passwords. """) def _user_exists(self, username): """ Return true if given user already exists. Note: Depends on internal details of JupyterHub that might change across versions. Tested with v0.9 """ return self.db.query(User).filter_by(name=username).first() is not None def _validate_password(self, password): return len(password) >= self.min_password_length def validate_username(self, name): invalid_chars = [',', ' '] if any((char in name) for char in invalid_chars): return False return super().validate_username(name) @gen.coroutine def authenticate(self, handler, data): username = data['username'] if not self.create_users: if not self._user_exists(username): return None password = data['password'] # Don't enforce password length requirement on existing users, since that can # lock users out of their hubs. with dbm.open(self.dbm_path, 'c', 0o600) as db: stored_pw = db.get(username.encode(), None) if stored_pw is not None: if bcrypt.hashpw(password.encode(), stored_pw) != stored_pw: return None else: if not self._validate_password(password): handler.custom_login_error = ( 'Password too short! Please choose a password at least %d characters long.' % self.min_password_length) self.log.error(handler.custom_login_error) return None db[username] = bcrypt.hashpw(password.encode(), bcrypt.gensalt()) return username def delete_user(self, user): """ When user is deleted, remove their entry from password db. This lets passwords be reset by deleting users. """ try: with dbm.open(self.dbm_path, 'c', 0o600) as db: del db[user.name] except KeyError as k: pass def reset_password(self, username, new_password): """ This allows changing the password of a logged user. """ if not self._validate_password(new_password): login_err = ( 'Password too short! Please choose a password at least %d characters long.' % self.min_password_length) self.log.error(login_err) # Resetting the password will fail if the new password is too short. return login_err with dbm.open(self.dbm_path, 'c', 0o600) as db: db[username] = bcrypt.hashpw(new_password.encode(), bcrypt.gensalt()) login_msg = "Your password has been changed successfully!" self.log.info(login_msg) return login_msg def get_handlers(self, app): return [(r'/login', CustomLoginHandler), (r'/auth/change-password', ResetPasswordHandler)]
class Foo(Configurable): a = Integer(0, config=True, help="The integer a.") b = Unicode('nope', config=True)
class FileMetadataCache(SingletonConfigurable): """FileMetadataCache is used exclusively by FileMetadataStore to cache file-based metadata instances. FileMetadataCache utilizes a watchdog handler to monitor directories corresponding to any files it contains. The handler is primarily used to determine which cached entries to remove (on delete operations). The cache is implemented as a simple LRU cache using an OrderedDict. """ max_size = Integer( min=1, max=1024, default_value=128, config=True, help="The maximum number of entries allowed in the cache.") enabled = Bool(default_value=True, config=True, help="Caching is enabled (True) or disabled (False).") def __init__(self, **kwargs): super().__init__(**kwargs) self.hits: int = 0 self.misses: int = 0 self.trims: int = 0 self._entries: OrderedDict = OrderedDict() if self.enabled: # Only create and start an observer when enabled self.observed_dirs = set( ) # Tracks which directories are being watched self.observer = Observer() self.observer.start() else: self.log.info( "The file metadata cache is currently disabled via configuration. " "Set FileMetadataCache.enabled=True to enable instance caching." ) def __len__(self) -> int: """Return the number of running kernels.""" return len(self._entries) def __contains__(self, path: str) -> bool: return path in self._entries @caching_enabled def add_item(self, path: str, entry: Dict[str, Any]) -> None: """Adds the named entry and its entry to the cache. If this causes the cache to grow beyond its max size, the least recently used entry is removed. """ md_dir: str = os.path.dirname(path) if md_dir not in self.observed_dirs and os.path.isdir(md_dir): self.observer.schedule(FileChangeHandler(self), md_dir, recursive=True) self.observed_dirs.add(md_dir) self._entries[path] = copy.deepcopy(entry) self._entries.move_to_end(path) if len(self._entries) > self.max_size: self.trims += 1 self._entries.popitem(last=False) # pop LRU entry @caching_enabled def get_item(self, path: str) -> Optional[Dict[str, Any]]: """Gets the named entry and returns its value or None if not present.""" if path in self._entries: self.hits += 1 self._entries.move_to_end(path) return copy.deepcopy(self._entries[path]) self.misses += 1 return None @caching_enabled def remove_item(self, path: str) -> Optional[Dict[str, Any]]: """Removes the named entry and returns its value or None if not present.""" if path in self._entries: return self._entries.pop(path) return None