def cleanup(self) -> None: """cleanup() is run after collect() and creates a new instance of the Cleaner()""" if ArgumentParser.args.cleanup: if ArgumentParser.args.no_cleanup_after_collect: log.debug("Not running automated cleanup after collect") return resource_cleaner = Cleaner(self.gc.graph) resource_cleaner.cleanup()
def cmd_cleanup(self, items: Iterable, args: str) -> Iterable: '''Usage: cleanup Perform a resource cleanup. WARNING: THIS WILL IMMEDIATELY DELETE ALL RESOURCES FLAGGED FOR CLEANUP ''' yield 'Beginning cleanup' resource_cleaner = Cleaner(self.graph) resource_cleaner.cleanup() yield 'Cleanup finished'
def main() -> None: log.info("Cloudkeeper initializing") # Try to run in a new process group and # ignore if not possible for whatever reason try: os.setpgid(0, 0) except: pass cloudkeeper.signal.parent_pid = os.getpid() # Add cli args arg_parser = get_arg_parser() logging.add_args(arg_parser) Cli.add_args(arg_parser) WebServer.add_args(arg_parser) Scheduler.add_args(arg_parser) Processor.add_args(arg_parser) Cleaner.add_args(arg_parser) PluginLoader.add_args(arg_parser) GraphContainer.add_args(arg_parser) event_add_args(arg_parser) # Find cloudkeeper Plugins in the cloudkeeper.plugins module plugin_loader = PluginLoader() plugin_loader.add_plugin_args(arg_parser) # At this point the CLI, all Plugins as well as the WebServer have # added their args to the arg parser arg_parser.parse_args() # Handle Ctrl+c and other means of termination/shutdown cloudkeeper.signal.initializer() add_event_listener(EventType.SHUTDOWN, shutdown, blocking=False) # Try to increase nofile and nproc limits increase_limits() # We're using a GraphContainer() to contain the graph which gets replaced # at runtime. This way we're not losing the context in other places like # the webserver when the graph gets reassigned. graph_container = GraphContainer() # GraphCollector() is a custom Prometheus Collector that # takes a graph and yields its metrics graph_collector = GraphCollector(graph_container) REGISTRY.register(graph_collector) # Scheduler() starts an APScheduler instance scheduler = Scheduler(graph_container) scheduler.daemon = True scheduler.start() # Cli() is the CLI Thread cli = Cli(graph_container, scheduler) cli.daemon = True cli.start() # WebServer is handed the graph container context so it can e.g. produce graphml # from it. The webserver serves Prometheus Metrics as well as different graph # endpoints. web_server = WebServer(graph_container) web_server.daemon = True web_server.start() for Plugin in plugin_loader.plugins(PluginType.PERSISTENT): try: log.debug(f"Starting persistent Plugin {Plugin}") plugin = Plugin() plugin.daemon = True plugin.start() except Exception as e: log.exception(f"Caught unhandled persistent Plugin exception {e}") processor = Processor(graph_container, plugin_loader.plugins(PluginType.COLLECTOR)) processor.daemon = True processor.start() # Dispatch the STARTUP event dispatch_event(Event(EventType.STARTUP)) # We wait for the shutdown Event to be set() and then end the program # While doing so we print the list of active threads once per 15 minutes while not shutdown_event.is_set(): log_stats() shutdown_event.wait(900) time.sleep(5) cloudkeeper.signal.kill_children(cloudkeeper.signal.SIGTERM, ensure_death=True) log.info("Shutdown complete") quit()
def cleanup(self) -> None: """cleanup() is run after collect() and creates a new instance of the Cleaner() """ if ArgumentParser.args.cleanup: resource_cleaner = Cleaner(self.gc.graph) resource_cleaner.cleanup()
def main() -> None: # Add cli args arg_parser = get_arg_parser() Cli.add_args(arg_parser) WebServer.add_args(arg_parser) Scheduler.add_args(arg_parser) Processor.add_args(arg_parser) Cleaner.add_args(arg_parser) PluginLoader.add_args(arg_parser) GraphContainer.add_args(arg_parser) event_add_args(arg_parser) # Find cloudkeeper Plugins in the cloudkeeper.plugins module plugin_loader = PluginLoader() plugin_loader.add_plugin_args(arg_parser) # At this point the CLI, all Plugins as well as the WebServer have added their args to the arg parser arg_parser.parse_args() # Write log to a file in addition to stdout if ArgumentParser.args.logfile: log_formatter = logging.Formatter(log_format) fh = logging.FileHandler(ArgumentParser.args.logfile) fh.setFormatter(log_formatter) logging.getLogger().addHandler(fh) # Handle Ctrl+c and other means of termination/shutdown signal_on_parent_exit() add_event_listener(EventType.SHUTDOWN, shutdown, blocking=False) signal(SIGINT, signal_handler) signal(SIGTERM, signal_handler) signal(SIGUSR1, signal_handler) # We're using a GraphContainer() to contain the graph which gets replaced at runtime. # This way we're not losing the context in other places like the webserver when the # graph gets reassigned. graph_container = GraphContainer() # GraphCollector() is a custom Prometheus Collector that # takes a graph and yields its metrics graph_collector = GraphCollector(graph_container) REGISTRY.register(graph_collector) # Scheduler() starts an APScheduler instance scheduler = Scheduler(graph_container) scheduler.daemon = True scheduler.start() # Cli() is the CLI Thread cli = Cli(graph_container, scheduler) cli.daemon = True cli.start() # WebServer is handed the graph container context so it can e.g. produce graphml from it # The webserver serves Prometheus Metrics as well as different graph endpoints web_server = WebServer(graph_container) web_server.daemon = True web_server.start() for Plugin in plugin_loader.plugins(PluginType.PERSISTENT): try: log.debug(f'Starting persistent Plugin {Plugin}') plugin = Plugin() plugin.daemon = True plugin.start() except Exception as e: log.exception(f'Caught unhandled persistent Plugin exception {e}') collector = Processor(graph_container, plugin_loader.plugins(PluginType.COLLECTOR)) collector.daemon = True collector.start() # Dispatch the STARTUP event dispatch_event(Event(EventType.STARTUP)) # We wait for the shutdown Event to be set() and then end the program # While doing so we print the list of active threads once per 15 minutes while not shutdown_event.is_set(): log_stats() shutdown_event.wait(900) time.sleep(5) log.info('Shutdown complete') quit()