def test_args(): arg_parser = get_arg_parser() WebServer.add_args(arg_parser) GraphContainer.add_args(arg_parser) Processor.add_args(arg_parser) PluginLoader.add_args(arg_parser) event_add_args(arg_parser) arg_parser.parse_args() assert ArgumentParser.args.interval == 3600
def test_processor(): arg_parser = get_arg_parser() Processor.add_args(arg_parser) GraphContainer.add_args(arg_parser) event_add_args(arg_parser) arg_parser.parse_args() graph_container = GraphContainer(cache_graph=False) plugins = [SomeTestPlugin] processor = Processor(graph_container, plugins) processor.daemon = True processor.start() time.sleep(1) assert len(processor.gc.graph.nodes) == num_resources + 2 processor.shutdown(Event(EventType.SHUTDOWN))
def test_web(): tcp = socket.socket(socket.AF_INET, socket.SOCK_STREAM) tcp.bind(("", 0)) _, free_port = tcp.getsockname() tcp.close() # fixme: race arg_parser = get_arg_parser() WebServer.add_args(arg_parser) event_add_args(arg_parser) arg_parser.parse_args() ArgumentParser.args.web_port = free_port gc = GraphContainer(cache_graph=False) web_server = WebServer(gc) web_server.daemon = True web_server.start() endpoint = f"http://localhost:{free_port}" r = requests.get(f"{endpoint}/health") assert r.content == b"ok\r\n"
def main() -> None: log.info("Cloudkeeper initializing") # Try to run in a new process group and # ignore if not possible for whatever reason try: os.setpgid(0, 0) except: pass cloudkeeper.signal.parent_pid = os.getpid() # Add cli args arg_parser = get_arg_parser() logging.add_args(arg_parser) Cli.add_args(arg_parser) WebServer.add_args(arg_parser) Scheduler.add_args(arg_parser) Processor.add_args(arg_parser) Cleaner.add_args(arg_parser) PluginLoader.add_args(arg_parser) GraphContainer.add_args(arg_parser) event_add_args(arg_parser) # Find cloudkeeper Plugins in the cloudkeeper.plugins module plugin_loader = PluginLoader() plugin_loader.add_plugin_args(arg_parser) # At this point the CLI, all Plugins as well as the WebServer have # added their args to the arg parser arg_parser.parse_args() # Handle Ctrl+c and other means of termination/shutdown cloudkeeper.signal.initializer() add_event_listener(EventType.SHUTDOWN, shutdown, blocking=False) # Try to increase nofile and nproc limits increase_limits() # We're using a GraphContainer() to contain the graph which gets replaced # at runtime. This way we're not losing the context in other places like # the webserver when the graph gets reassigned. graph_container = GraphContainer() # GraphCollector() is a custom Prometheus Collector that # takes a graph and yields its metrics graph_collector = GraphCollector(graph_container) REGISTRY.register(graph_collector) # Scheduler() starts an APScheduler instance scheduler = Scheduler(graph_container) scheduler.daemon = True scheduler.start() # Cli() is the CLI Thread cli = Cli(graph_container, scheduler) cli.daemon = True cli.start() # WebServer is handed the graph container context so it can e.g. produce graphml # from it. The webserver serves Prometheus Metrics as well as different graph # endpoints. web_server = WebServer(graph_container) web_server.daemon = True web_server.start() for Plugin in plugin_loader.plugins(PluginType.PERSISTENT): try: log.debug(f"Starting persistent Plugin {Plugin}") plugin = Plugin() plugin.daemon = True plugin.start() except Exception as e: log.exception(f"Caught unhandled persistent Plugin exception {e}") processor = Processor(graph_container, plugin_loader.plugins(PluginType.COLLECTOR)) processor.daemon = True processor.start() # Dispatch the STARTUP event dispatch_event(Event(EventType.STARTUP)) # We wait for the shutdown Event to be set() and then end the program # While doing so we print the list of active threads once per 15 minutes while not shutdown_event.is_set(): log_stats() shutdown_event.wait(900) time.sleep(5) cloudkeeper.signal.kill_children(cloudkeeper.signal.SIGTERM, ensure_death=True) log.info("Shutdown complete") quit()
def main() -> None: # Add cli args arg_parser = get_arg_parser() Cli.add_args(arg_parser) WebServer.add_args(arg_parser) Scheduler.add_args(arg_parser) Processor.add_args(arg_parser) Cleaner.add_args(arg_parser) PluginLoader.add_args(arg_parser) GraphContainer.add_args(arg_parser) event_add_args(arg_parser) # Find cloudkeeper Plugins in the cloudkeeper.plugins module plugin_loader = PluginLoader() plugin_loader.add_plugin_args(arg_parser) # At this point the CLI, all Plugins as well as the WebServer have added their args to the arg parser arg_parser.parse_args() # Write log to a file in addition to stdout if ArgumentParser.args.logfile: log_formatter = logging.Formatter(log_format) fh = logging.FileHandler(ArgumentParser.args.logfile) fh.setFormatter(log_formatter) logging.getLogger().addHandler(fh) # Handle Ctrl+c and other means of termination/shutdown signal_on_parent_exit() add_event_listener(EventType.SHUTDOWN, shutdown, blocking=False) signal(SIGINT, signal_handler) signal(SIGTERM, signal_handler) signal(SIGUSR1, signal_handler) # We're using a GraphContainer() to contain the graph which gets replaced at runtime. # This way we're not losing the context in other places like the webserver when the # graph gets reassigned. graph_container = GraphContainer() # GraphCollector() is a custom Prometheus Collector that # takes a graph and yields its metrics graph_collector = GraphCollector(graph_container) REGISTRY.register(graph_collector) # Scheduler() starts an APScheduler instance scheduler = Scheduler(graph_container) scheduler.daemon = True scheduler.start() # Cli() is the CLI Thread cli = Cli(graph_container, scheduler) cli.daemon = True cli.start() # WebServer is handed the graph container context so it can e.g. produce graphml from it # The webserver serves Prometheus Metrics as well as different graph endpoints web_server = WebServer(graph_container) web_server.daemon = True web_server.start() for Plugin in plugin_loader.plugins(PluginType.PERSISTENT): try: log.debug(f'Starting persistent Plugin {Plugin}') plugin = Plugin() plugin.daemon = True plugin.start() except Exception as e: log.exception(f'Caught unhandled persistent Plugin exception {e}') collector = Processor(graph_container, plugin_loader.plugins(PluginType.COLLECTOR)) collector.daemon = True collector.start() # Dispatch the STARTUP event dispatch_event(Event(EventType.STARTUP)) # We wait for the shutdown Event to be set() and then end the program # While doing so we print the list of active threads once per 15 minutes while not shutdown_event.is_set(): log_stats() shutdown_event.wait(900) time.sleep(5) log.info('Shutdown complete') quit()