def _main(config_dir_path: Parameter.REQUIRED, *, no_checks=False, no_debugging=False, verbose=False): """ Run the diagnostic daemon. :param config_dir_path: Configuration directory to scan for Ambassador YAML files :param no_checks: If True, don't do Envoy-cluster health checking :param no_debugging: If True, don't run Flask in debug mode :param verbose: If True, be more verbose """ app.estats = EnvoyStats() app.health_checks = False app.debugging = not no_debugging # This feels like overkill. app._logger = logging.getLogger(app.logger_name) app.logger.setLevel(logging.INFO) if app.debugging or verbose: app.logger.setLevel(logging.DEBUG) logging.getLogger().setLevel(logging.DEBUG) if not no_checks: app.health_checks = True app.logger.debug("Starting periodic updates") app.stats_updater = PeriodicTrigger(app.estats.update, period=5) app.config_dir_prefix = config_dir_path app.run(host='0.0.0.0', port=aconf(app).diag_port(), debug=app.debugging)
def load(self): if self.application.health_checks: self.application.logger.info("Starting periodic updates") self.application.stats_updater = PeriodicTrigger( self.application.estats.update, period=5) return self.application
def _load_ir(self, aconf: Config, fetcher: ResourceFetcher, secret_reader: Callable[['IRTLSContext', str, str], SavedSecret], snapshot: str) -> None: aconf.load_all(fetcher.sorted()) aconf_path = os.path.join(app.snapshot_path, "aconf-%s.json" % snapshot) open(aconf_path, "w").write(aconf.as_json()) ir = IR(aconf, secret_reader=secret_reader) ir_path = os.path.join(app.snapshot_path, "ir-%s.json" % snapshot) open(ir_path, "w").write(ir.as_json()) check_scout(app, "update", ir) econf = EnvoyConfig.generate(ir, "V2") diag = Diagnostics(ir, econf) bootstrap_config, ads_config = econf.split_config() if not self.validate_envoy_config(config=ads_config): self.logger.info( "no updates were performed due to invalid envoy configuration, continuing with current configuration..." ) return self.logger.info("saving Envoy configuration for snapshot %s" % snapshot) with open(app.bootstrap_path, "w") as output: output.write(json.dumps(bootstrap_config, sort_keys=True, indent=4)) with open(app.ads_path, "w") as output: output.write(json.dumps(ads_config, sort_keys=True, indent=4)) app.aconf = aconf app.ir = ir app.econf = econf app.diag = diag if app.kick: self.logger.info("running '%s'" % app.kick) os.system(app.kick) elif app.ambex_pid != 0: self.logger.info("notifying PID %d ambex" % app.ambex_pid) os.kill(app.ambex_pid, signal.SIGHUP) self.logger.info("configuration updated") if app.health_checks and not app.stats_updater: app.logger.info("starting Envoy status updater") app.stats_updater = PeriodicTrigger(app.watcher.update_estats, period=5)
def load(self): # This is a little weird, but whatever. self.application.watcher = AmbassadorEventWatcher(self.application) self.application.watcher.start() if self.application.health_checks: self.application.logger.info("Starting periodic updates") self.application.stats_updater = PeriodicTrigger( self.application.watcher.update_estats, period=5) return self.application
def run(self): self.logger.info("starting Scout checker") self.app.scout_checker = PeriodicTrigger( lambda: self.check_scout("checkin"), period=86400) # Yup, one day. self.logger.info("starting event watcher") while True: cmd, arg, rqueue = self.events.get() # self.logger.info("EVENT: %s" % cmd) if cmd == 'ESTATS': # self.logger.info("updating estats") try: self._respond(rqueue, 200, 'updating') self.app.estats.update() except Exception as e: self.logger.error("could not update estats: %s" % e) self.logger.exception(e) elif cmd == 'CONFIG_FS': try: self.load_config_fs(rqueue, arg) except Exception as e: self.logger.error("could not reconfigure: %s" % e) self.logger.exception(e) elif cmd == 'CONFIG': version, url = arg try: if version == 'kw': self.load_config_kubewatch(rqueue, url) elif version == 'watt': self.load_config_watt(rqueue, url) else: raise RuntimeError("config from %s not supported" % version) except Exception as e: self.logger.error("could not reconfigure: %s" % e) self.logger.exception(e) elif cmd == 'SCOUT': try: self._respond(rqueue, 200, 'checking Scout') self.check_scout(*arg) except Exception as e: self.logger.error("could not reconfigure: %s" % e) self.logger.exception(e) else: self.logger.error("unknown event type: '%s' '%s'" % (cmd, arg))
def _load_ir(self, rqueue: queue.Queue, aconf: Config, fetcher: ResourceFetcher, secret_handler: SecretHandler, snapshot: str) -> None: aconf.load_all(fetcher.sorted()) aconf_path = os.path.join(app.snapshot_path, "aconf-tmp.json") open(aconf_path, "w").write(aconf.as_json()) ir = IR(aconf, secret_handler=secret_handler) ir_path = os.path.join(app.snapshot_path, "ir-tmp.json") open(ir_path, "w").write(ir.as_json()) econf = EnvoyConfig.generate(ir, "V2") diag = Diagnostics(ir, econf) bootstrap_config, ads_config = econf.split_config() if not self.validate_envoy_config(config=ads_config): self.logger.info( "no updates were performed due to invalid envoy configuration, continuing with current configuration..." ) app.check_scout("attempted bad update") self._respond( rqueue, 500, 'ignoring: invalid Envoy configuration in snapshot %s' % snapshot) return snapcount = int(os.environ.get('AMBASSADOR_SNAPSHOT_COUNT', "4")) snaplist: List[Tuple[str, str]] = [] if snapcount > 0: self.logger.debug("rotating snapshots for snapshot %s" % snapshot) # If snapcount is 4, this range statement becomes range(-4, -1) # which gives [ -4, -3, -2 ], which the list comprehension turns # into [ ( "-3", "-4" ), ( "-2", "-3" ), ( "-1", "-2" ) ]... # which is the list of suffixes to rename to rotate the snapshots. snaplist += [(str(x + 1), str(x)) for x in range(-1 * snapcount, -1)] # After dealing with that, we need to rotate the current file into -1. snaplist.append(('', '-1')) # Whether or not we do any rotation, we need to cycle in the '-tmp' file. snaplist.append(('-tmp', '')) for from_suffix, to_suffix in snaplist: for fmt in [ "aconf{}.json", "econf{}.json", "ir{}.json", "snapshot{}.yaml" ]: from_path = os.path.join(app.snapshot_path, fmt.format(from_suffix)) to_path = os.path.join(app.snapshot_path, fmt.format(to_suffix)) try: self.logger.debug("rotate: %s -> %s" % (from_path, to_path)) os.rename(from_path, to_path) except IOError as e: self.logger.debug("skip %s -> %s: %s" % (from_path, to_path, e)) pass except Exception as e: self.logger.debug("could not rename %s -> %s: %s" % (from_path, to_path, e)) self.logger.info("saving Envoy configuration for snapshot %s" % snapshot) with open(app.bootstrap_path, "w") as output: output.write(json.dumps(bootstrap_config, sort_keys=True, indent=4)) with open(app.ads_path, "w") as output: output.write(json.dumps(ads_config, sort_keys=True, indent=4)) app.aconf = aconf app.ir = ir app.econf = econf app.diag = diag if app.kick: self.logger.info("running '%s'" % app.kick) os.system(app.kick) elif app.ambex_pid != 0: self.logger.info("notifying PID %d ambex" % app.ambex_pid) os.kill(app.ambex_pid, signal.SIGHUP) self.logger.info("configuration updated from snapshot %s" % snapshot) self._respond(rqueue, 200, 'configuration updated from snapshot %s' % snapshot) if app.health_checks and not app.stats_updater: app.logger.info("starting Envoy status updater") app.stats_updater = PeriodicTrigger(app.watcher.update_estats, period=5) # app.scout_updater = PeriodicTrigger(lambda: app.watcher.check_scout("30s"), period=30) # Don't use app.check_scout; it will deadlock. And don't bother doing the Scout # update until after we've taken care of Envoy. self.check_scout("update")
def _load_ir(self, rqueue: queue.Queue, aconf: Config, fetcher: ResourceFetcher, secret_handler: SecretHandler, snapshot: str) -> None: aconf.load_all(fetcher.sorted()) aconf_path = os.path.join(app.snapshot_path, "aconf-tmp.json") open(aconf_path, "w").write(aconf.as_json()) ir = IR(aconf, secret_handler=secret_handler) ir_path = os.path.join(app.snapshot_path, "ir-tmp.json") open(ir_path, "w").write(ir.as_json()) econf = EnvoyConfig.generate(ir, "V2") diag = Diagnostics(ir, econf) bootstrap_config, ads_config = econf.split_config() if not self.validate_envoy_config(config=ads_config, retries=self.app.validation_retries): self.logger.info( "no updates were performed due to invalid envoy configuration, continuing with current configuration..." ) # Don't use app.check_scout; it will deadlock. self.check_scout("attempted bad update") self._respond( rqueue, 500, 'ignoring: invalid Envoy configuration in snapshot %s' % snapshot) return snapcount = int(os.environ.get('AMBASSADOR_SNAPSHOT_COUNT', "4")) snaplist: List[Tuple[str, str]] = [] if snapcount > 0: self.logger.debug("rotating snapshots for snapshot %s" % snapshot) # If snapcount is 4, this range statement becomes range(-4, -1) # which gives [ -4, -3, -2 ], which the list comprehension turns # into [ ( "-3", "-4" ), ( "-2", "-3" ), ( "-1", "-2" ) ]... # which is the list of suffixes to rename to rotate the snapshots. snaplist += [(str(x + 1), str(x)) for x in range(-1 * snapcount, -1)] # After dealing with that, we need to rotate the current file into -1. snaplist.append(('', '-1')) # Whether or not we do any rotation, we need to cycle in the '-tmp' file. snaplist.append(('-tmp', '')) for from_suffix, to_suffix in snaplist: for fmt in [ "aconf{}.json", "econf{}.json", "ir{}.json", "snapshot{}.yaml" ]: from_path = os.path.join(app.snapshot_path, fmt.format(from_suffix)) to_path = os.path.join(app.snapshot_path, fmt.format(to_suffix)) try: self.logger.debug("rotate: %s -> %s" % (from_path, to_path)) os.rename(from_path, to_path) except IOError as e: self.logger.debug("skip %s -> %s: %s" % (from_path, to_path, e)) pass except Exception as e: self.logger.debug("could not rename %s -> %s: %s" % (from_path, to_path, e)) app.latest_snapshot = snapshot self.logger.info("saving Envoy configuration for snapshot %s" % snapshot) with open(app.bootstrap_path, "w") as output: output.write(json.dumps(bootstrap_config, sort_keys=True, indent=4)) with open(app.ads_path, "w") as output: output.write(json.dumps(ads_config, sort_keys=True, indent=4)) app.aconf = aconf app.ir = ir app.econf = econf app.diag = diag if app.kick: self.logger.info("running '%s'" % app.kick) os.system(app.kick) elif app.ambex_pid != 0: self.logger.info("notifying PID %d ambex" % app.ambex_pid) os.kill(app.ambex_pid, signal.SIGHUP) if app.ir.k8s_status_updates: for name in app.ir.k8s_status_updates.keys(): kind, update = app.ir.k8s_status_updates[name] self.logger.info( f"doing K8s status update for {kind} {name}...") text = json.dumps(update) with open(f'/tmp/kstat-{kind}-{name}', 'w') as out: out.write(text) cmd = [ '/ambassador/kubestatus', kind, '-f', f'metadata.name={name}', '-u', '/dev/fd/0' ] self.logger.info(f"Running command: {cmd}") try: rc = subprocess.run(cmd, input=text.encode('utf-8'), timeout=5) self.logger.info(f'...update finished, rc {rc.returncode}') except subprocess.TimeoutExpired as e: self.logger.error(f'...update timed out, {e}') self.logger.info("configuration updated from snapshot %s" % snapshot) self._respond(rqueue, 200, 'configuration updated from snapshot %s' % snapshot) if app.health_checks and not app.stats_updater: app.logger.info("starting Envoy status updater") app.stats_updater = PeriodicTrigger(app.watcher.update_estats, period=5) # Check our environment... self.check_environment() self.chime()
def _load_ir(self, rqueue: queue.Queue, aconf: Config, fetcher: ResourceFetcher, secret_reader: Callable[['IRTLSContext', str, str], SavedSecret], snapshot: str) -> None: aconf.load_all(fetcher.sorted()) aconf_path = os.path.join(app.snapshot_path, "aconf-tmp.json") open(aconf_path, "w").write(aconf.as_json()) ir = IR(aconf, secret_reader=secret_reader) ir_path = os.path.join(app.snapshot_path, "ir-tmp.json") open(ir_path, "w").write(ir.as_json()) econf = EnvoyConfig.generate(ir, "V2") diag = Diagnostics(ir, econf) bootstrap_config, ads_config = econf.split_config() if not self.validate_envoy_config(config=ads_config): self.logger.info( "no updates were performed due to invalid envoy configuration, continuing with current configuration..." ) app.check_scout("attempted bad update") self._respond( rqueue, 500, 'ignoring: invalid Envoy configuration in snapshot %s' % snapshot) return self.logger.info("rotating snapshots for snapshot %s" % snapshot) for from_suffix, to_suffix in [('-3', '-4'), ('-2', '-3'), ('-1', '-2'), ('', '-1'), ('-tmp', '')]: for fmt in [ "aconf{}.json", "econf{}.json", "ir{}.json", "snapshot{}.yaml" ]: try: from_path = os.path.join(app.snapshot_path, fmt.format(from_suffix)) to_path = os.path.join(app.snapshot_path, fmt.format(to_suffix)) self.logger.debug("rotate: %s -> %s" % (from_path, to_path)) os.rename(from_path, to_path) except IOError as e: self.logger.debug("skip %s -> %s: %s" % (from_path, to_path, e)) except Exception as e: self.logger.debug("could not rename %s -> %s: %s" % (from_path, to_path, e)) self.logger.info("saving Envoy configuration for snapshot %s" % snapshot) with open(app.bootstrap_path, "w") as output: output.write(json.dumps(bootstrap_config, sort_keys=True, indent=4)) with open(app.ads_path, "w") as output: output.write(json.dumps(ads_config, sort_keys=True, indent=4)) app.aconf = aconf app.ir = ir app.econf = econf app.diag = diag if app.kick: self.logger.info("running '%s'" % app.kick) os.system(app.kick) elif app.ambex_pid != 0: self.logger.info("notifying PID %d ambex" % app.ambex_pid) os.kill(app.ambex_pid, signal.SIGHUP) self.logger.info("configuration updated from snapshot %s" % snapshot) self._respond(rqueue, 200, 'configuration updated from snapshot %s' % snapshot) if app.health_checks and not app.stats_updater: app.logger.info("starting Envoy status updater") app.stats_updater = PeriodicTrigger(app.watcher.update_estats, period=5) # Don't use app.check_scout; it will deadlock. And don't bother doing the Scout # update until after we've taken care of Envoy. self.check_scout("update")