def __init__(self): super().__init__() self.name = "cleanup_aws_loadbalancers" self.exit = threading.Event() if ArgumentParser.args.cleanup_aws_loadbalancers: try: self.age = parse_delta( ArgumentParser.args.cleanup_aws_loadbalancers_age ) log.debug(f"AWS Loadbalancer Cleanup Plugin Age {self.age}") add_event_listener(EventType.SHUTDOWN, self.shutdown) add_event_listener( EventType.CLEANUP_PLAN, self.loadbalancer_cleanup, blocking=True, timeout=3600, ) except ValueError: log.exception( ( f"Error while parsing AWS Loadbalancer " f"Cleanup Age {ArgumentParser.args.cleanup_aws_loadbalancers_age}" ) ) else: self.exit.set()
def __init__(self): super().__init__() self.name = "example_persistent" self.exit = threading.Event() add_event_listener(EventType.SHUTDOWN, self.shutdown) add_event_listener( EventType.PROCESS_FINISH, self.example_event_handler, blocking=False )
def __init__(self, gc) -> None: super().__init__() self.name = "scheduler" self.exit = threading.Event() self.gc = gc self._sched = BackgroundScheduler(daemon=True) self._event_prefixes = tuple((f"{e.name.lower()}:" for e in EventType)) add_event_listener(EventType.SHUTDOWN, self.shutdown)
def __init__(self): super().__init__() self.name = 'cleanup_expired' self.exit = threading.Event() if ArgumentParser.args.cleanup_expired: add_event_listener(EventType.SHUTDOWN, self.shutdown) add_event_listener(EventType.CLEANUP_PLAN, self.expired_cleanup, blocking=True) else: self.exit.set()
def __init__(self, gc: GraphContainer, plugins: List) -> None: super().__init__() self.name = 'processor' self.gc = gc self.plugins = plugins self.__run = True self.__run_event = threading.Event() self.__interval = ArgumentParser.args.interval add_event_listener(EventType.SHUTDOWN, self.shutdown) add_event_listener(EventType.START_COLLECT, self.start_collect)
def __init__(self): super().__init__() self.name = 'metrics_age_range' self.exit = threading.Event() if ArgumentParser.args.metrics_age_range: add_event_listener(EventType.GENERATE_METRICS, self.generate_age_range_metrics, blocking=True) add_event_listener(EventType.SHUTDOWN, self.shutdown) else: self.exit.set()
def __init__(self): super().__init__() self.name = 'logdump' self.exit = threading.Event() if not ArgumentParser.args.logdump_path: self.exit.set() return self.logdump_path = Path(ArgumentParser.args.logdump_path) self.logdump_path.mkdir(parents=True, exist_ok=True) add_event_listener(EventType.SHUTDOWN, self.shutdown) add_event_listener(EventType.PROCESS_FINISH, self.dump_resource_event_logs, blocking=False)
def __init__(self, gc: GraphContainer, scheduler) -> None: super().__init__() self.name = 'cli' self.exit = threading.Event() self.gc = gc self.scheduler = scheduler self.__run = not ArgumentParser.args.no_cli for action in ArgumentParser.args.cli_actions: register_cli_action(action) read_cli_actions_config() add_event_listener(EventType.SHUTDOWN, self.shutdown)
def __init__(self, cache_graph=True) -> None: self._graph = None self._observers = [] self.__lock = threading.Lock() self.graph = Graph() resource_attr = get_resource_attributes(self.GRAPH_ROOT) self.graph.add_node(self.GRAPH_ROOT, label=self.GRAPH_ROOT.id, **resource_attr) if cache_graph: self.cache = GraphCache() self.cache.update_cache(Event(EventType.STARTUP, self.graph)) add_event_listener(EventType.COLLECT_FINISH, self.cache.update_cache) add_event_listener(EventType.CLEANUP_FINISH, self.cache.update_cache) else: self.cache = None
def __init__(self): super().__init__() self.name = 'tagvalidator' self.exit = threading.Event() self.run_lock = threading.Lock() if ArgumentParser.args.tagvalidator_config: self.config = TagValidatorConfig( ArgumentParser.args.tagvalidator_config) add_event_listener(EventType.SHUTDOWN, self.shutdown) add_event_listener(EventType.COLLECT_FINISH, self.tag_validator, blocking=True, timeout=900) else: self.exit.set()
def __init__(self): super().__init__() self.name = 'protect_snowflakes' self.exit = threading.Event() if ArgumentParser.args.protect_snowflakes_config: self.config = ProtectSnowflakesConfig( config_file=ArgumentParser.args.protect_snowflakes_config) self.config.read() # initial read to ensure config format is valid add_event_listener(EventType.SHUTDOWN, self.shutdown) add_event_listener(EventType.COLLECT_FINISH, self.protect_snowflakes, blocking=True, timeout=900) else: self.exit.set()
def __init__(self): super().__init__() self.name = "remote_event_callback" self.exit = threading.Event() add_event_listener(EventType.SHUTDOWN, self.shutdown) for endpoint in ArgumentParser.args.remote_event_endpoint: for event_type in EventType: event_prefix = f"{event_type.name.lower()}:" if str(endpoint).startswith(event_prefix): endpoint = endpoint[len(event_prefix) :] f = partial(self.remote_event_callback, endpoint) add_event_listener(event_type, f, blocking=False, one_shot=False) else: log.error(f"Invalid remote event callback endpoint {endpoint}")
def __init__(self): super().__init__() self.name = "report_cleanups" self.exit = threading.Event() if not ArgumentParser.args.report_cleanups_path: self.exit.set() return self.report_cleanups_path = Path(ArgumentParser.args.report_cleanups_path) self.report_cleanups_path.mkdir(parents=True, exist_ok=True) add_event_listener(EventType.SHUTDOWN, self.shutdown) add_event_listener( EventType.CLEANUP_FINISH, self.report_cleanup, blocking=False )
def __init__(self): super().__init__() self.name = "tag_aws_ctime" self.exit = threading.Event() self.run_lock = threading.Lock() if ArgumentParser.args.tag_aws_ctime: log.debug("AWS ctime Tagger plugin initializing") add_event_listener(EventType.SHUTDOWN, self.shutdown) add_event_listener( EventType.COLLECT_FINISH, self.aws_ctime_tagger, blocking=False, timeout=900, ) else: self.exit.set()
def __init__(self, gc) -> None: super().__init__() self.name = 'webserver' api = falcon.API() api.add_route('/health', HealthCheck()) api.add_route('/metrics', Metrics()) api.add_route('/graph', Remote(gc)) api.add_route('/collect', Collect()) api.add_route('/graph.gexf', GEXF(gc)) api.add_route('/graph.graphml', GraphML(gc)) api.add_route('/graph.json', JSON(gc)) api.add_route('/graph.net', Pajek(gc)) api.add_route('/graph.txt', TXT(gc)) self.httpd = make_server('', ArgumentParser.args.web_port, api, ThreadingWSGIServer, CloudkeeperRequestHandler) add_event_listener(EventType.SHUTDOWN, self.shutdown)
def __init__(self): super().__init__() self.name = 'cleanup_volumes' self.exit = threading.Event() if ArgumentParser.args.cleanup_volumes: try: self.age = parse_delta(ArgumentParser.args.cleanup_volumes_age) log.debug(f'Volume Cleanup Plugin Age {self.age}') add_event_listener(EventType.SHUTDOWN, self.shutdown) add_event_listener(EventType.CLEANUP_PLAN, self.volumes_cleanup, blocking=True) except ValueError: log.exception( f'Error while parsing Volume Cleanup Age {ArgumentParser.args.volclean_age}' ) else: self.exit.set()
def __init__(self): super().__init__() self.name = "cleanup_aws_vpcs" self.exit = threading.Event() if ArgumentParser.args.cleanup_aws_vpcs: add_event_listener(EventType.SHUTDOWN, self.shutdown) add_event_listener( EventType.CLEANUP_BEGIN, self.vpc_cleanup, blocking=True, timeout=3600, ) else: self.exit.set() self.config = {} if ArgumentParser.args.cleanup_aws_vpcs_config: self.config = CleanupAWSVPCsConfig( config_file=ArgumentParser.args.cleanup_aws_vpcs_config ) self.config.read() # initial read to ensure config format is valid
def __init__(self): super().__init__() self.name = 'slack_bot' if not ArgumentParser.args.slack_bot_token: return self.client = slack.WebClient( token=ArgumentParser.args.slack_bot_token) self.exit = threading.Event() self.users2id = {} self.emails2id = {} self.usergroups2id = {} self.channels2id = {} add_event_listener(EventType.SHUTDOWN, self.shutdown) add_event_listener(EventType.PROCESS_FINISH, self.process_cloudkeeper_events, blocking=False)
def __init__(self): super().__init__() self.name = "cleanup_untagged" self.exit = threading.Event() if ArgumentParser.args.cleanup_untagged_config: self.config = CleanupUntaggedConfig( config_file=ArgumentParser.args.cleanup_untagged_config ) self.config.read() # initial read to ensure config format is valid add_event_listener(EventType.SHUTDOWN, self.shutdown) add_event_listener( EventType.CLEANUP_PLAN, self.cleanup_untagged, blocking=True, timeout=900, ) else: self.exit.set()
def __init__(self, gc) -> None: super().__init__() self.name = "webserver" api = falcon.API() api.add_route("/health", HealthCheck()) api.add_route("/metrics", Metrics()) api.add_route("/graph", Remote(gc)) api.add_route("/collect", Collect()) api.add_route("/graph.gexf", GEXF(gc)) api.add_route("/graph.graphml", GraphML(gc)) api.add_route("/graph.json", JSON(gc)) api.add_route("/graph.net", Pajek(gc)) api.add_route("/graph.txt", TXT(gc)) self.httpd = make_server( "", ArgumentParser.args.web_port, api, ThreadingWSGIServer, CloudkeeperRequestHandler, ) add_event_listener(EventType.SHUTDOWN, self.shutdown)
def register_cli_action(action: str, one_shot: bool = False) -> bool: if ':' not in action: log.error(f'Invalid CLI action {action}') return False event, command = action.split(':', 1) event = event.strip() command = command.strip() if event.startswith('1'): one_shot = True event = event[1:] for e in EventType: if event == e.name.lower(): f = partial(cli_event_handler, command) return add_event_listener(e, f, blocking=True, one_shot=one_shot) else: log.error(f'Invalid event type {event}') return False
def main() -> None: log.info("Cloudkeeper initializing") # Try to run in a new process group and # ignore if not possible for whatever reason try: os.setpgid(0, 0) except: pass cloudkeeper.signal.parent_pid = os.getpid() # Add cli args arg_parser = get_arg_parser() logging.add_args(arg_parser) Cli.add_args(arg_parser) WebServer.add_args(arg_parser) Scheduler.add_args(arg_parser) Processor.add_args(arg_parser) Cleaner.add_args(arg_parser) PluginLoader.add_args(arg_parser) GraphContainer.add_args(arg_parser) event_add_args(arg_parser) # Find cloudkeeper Plugins in the cloudkeeper.plugins module plugin_loader = PluginLoader() plugin_loader.add_plugin_args(arg_parser) # At this point the CLI, all Plugins as well as the WebServer have # added their args to the arg parser arg_parser.parse_args() # Handle Ctrl+c and other means of termination/shutdown cloudkeeper.signal.initializer() add_event_listener(EventType.SHUTDOWN, shutdown, blocking=False) # Try to increase nofile and nproc limits increase_limits() # We're using a GraphContainer() to contain the graph which gets replaced # at runtime. This way we're not losing the context in other places like # the webserver when the graph gets reassigned. graph_container = GraphContainer() # GraphCollector() is a custom Prometheus Collector that # takes a graph and yields its metrics graph_collector = GraphCollector(graph_container) REGISTRY.register(graph_collector) # Scheduler() starts an APScheduler instance scheduler = Scheduler(graph_container) scheduler.daemon = True scheduler.start() # Cli() is the CLI Thread cli = Cli(graph_container, scheduler) cli.daemon = True cli.start() # WebServer is handed the graph container context so it can e.g. produce graphml # from it. The webserver serves Prometheus Metrics as well as different graph # endpoints. web_server = WebServer(graph_container) web_server.daemon = True web_server.start() for Plugin in plugin_loader.plugins(PluginType.PERSISTENT): try: log.debug(f"Starting persistent Plugin {Plugin}") plugin = Plugin() plugin.daemon = True plugin.start() except Exception as e: log.exception(f"Caught unhandled persistent Plugin exception {e}") processor = Processor(graph_container, plugin_loader.plugins(PluginType.COLLECTOR)) processor.daemon = True processor.start() # Dispatch the STARTUP event dispatch_event(Event(EventType.STARTUP)) # We wait for the shutdown Event to be set() and then end the program # While doing so we print the list of active threads once per 15 minutes while not shutdown_event.is_set(): log_stats() shutdown_event.wait(900) time.sleep(5) cloudkeeper.signal.kill_children(cloudkeeper.signal.SIGTERM, ensure_death=True) log.info("Shutdown complete") quit()
def __init__(self) -> None: super().__init__() self.__regions = [] self.__graph_lock = Lock() self._executor = None add_event_listener(EventType.SHUTDOWN, self.shutdown)
def __init__(self): super().__init__() self.name = "backup" if ArgumentParser.args.backup_to: add_event_listener(EventType.COLLECT_FINISH, BackupPlugin.backup_graph)
def main() -> None: # Add cli args arg_parser = get_arg_parser() Cli.add_args(arg_parser) WebServer.add_args(arg_parser) Scheduler.add_args(arg_parser) Processor.add_args(arg_parser) Cleaner.add_args(arg_parser) PluginLoader.add_args(arg_parser) GraphContainer.add_args(arg_parser) event_add_args(arg_parser) # Find cloudkeeper Plugins in the cloudkeeper.plugins module plugin_loader = PluginLoader() plugin_loader.add_plugin_args(arg_parser) # At this point the CLI, all Plugins as well as the WebServer have added their args to the arg parser arg_parser.parse_args() # Write log to a file in addition to stdout if ArgumentParser.args.logfile: log_formatter = logging.Formatter(log_format) fh = logging.FileHandler(ArgumentParser.args.logfile) fh.setFormatter(log_formatter) logging.getLogger().addHandler(fh) # Handle Ctrl+c and other means of termination/shutdown signal_on_parent_exit() add_event_listener(EventType.SHUTDOWN, shutdown, blocking=False) signal(SIGINT, signal_handler) signal(SIGTERM, signal_handler) signal(SIGUSR1, signal_handler) # We're using a GraphContainer() to contain the graph which gets replaced at runtime. # This way we're not losing the context in other places like the webserver when the # graph gets reassigned. graph_container = GraphContainer() # GraphCollector() is a custom Prometheus Collector that # takes a graph and yields its metrics graph_collector = GraphCollector(graph_container) REGISTRY.register(graph_collector) # Scheduler() starts an APScheduler instance scheduler = Scheduler(graph_container) scheduler.daemon = True scheduler.start() # Cli() is the CLI Thread cli = Cli(graph_container, scheduler) cli.daemon = True cli.start() # WebServer is handed the graph container context so it can e.g. produce graphml from it # The webserver serves Prometheus Metrics as well as different graph endpoints web_server = WebServer(graph_container) web_server.daemon = True web_server.start() for Plugin in plugin_loader.plugins(PluginType.PERSISTENT): try: log.debug(f'Starting persistent Plugin {Plugin}') plugin = Plugin() plugin.daemon = True plugin.start() except Exception as e: log.exception(f'Caught unhandled persistent Plugin exception {e}') collector = Processor(graph_container, plugin_loader.plugins(PluginType.COLLECTOR)) collector.daemon = True collector.start() # Dispatch the STARTUP event dispatch_event(Event(EventType.STARTUP)) # We wait for the shutdown Event to be set() and then end the program # While doing so we print the list of active threads once per 15 minutes while not shutdown_event.is_set(): log_stats() shutdown_event.wait(900) time.sleep(5) log.info('Shutdown complete') quit()
def __init__(self, webapp) -> None: super().__init__() self.name = "webserver" self.webapp = webapp add_event_listener(EventType.SHUTDOWN, self.shutdown)