Beispiel #1
0
def _main(config_dir_path: Parameter.REQUIRED,
          *,
          no_checks=False,
          no_debugging=False,
          verbose=False):
    """
    Run the diagnostic daemon.

    :param config_dir_path: Configuration directory to scan for Ambassador YAML files
    :param no_checks: If True, don't do Envoy-cluster health checking
    :param no_debugging: If True, don't run Flask in debug mode
    :param verbose: If True, be more verbose
    """

    app.estats = EnvoyStats()
    app.health_checks = False
    app.debugging = not no_debugging

    # This feels like overkill.
    app._logger = logging.getLogger(app.logger_name)
    app.logger.setLevel(logging.INFO)

    if app.debugging or verbose:
        app.logger.setLevel(logging.DEBUG)
        logging.getLogger().setLevel(logging.DEBUG)

    if not no_checks:
        app.health_checks = True
        app.logger.debug("Starting periodic updates")
        app.stats_updater = PeriodicTrigger(app.estats.update, period=5)

    app.config_dir_prefix = config_dir_path

    app.run(host='0.0.0.0', port=aconf(app).diag_port(), debug=app.debugging)
Beispiel #2
0
    def load(self):
        if self.application.health_checks:
            self.application.logger.info("Starting periodic updates")
            self.application.stats_updater = PeriodicTrigger(
                self.application.estats.update, period=5)

        return self.application
Beispiel #3
0
    def _load_ir(self, aconf: Config, fetcher: ResourceFetcher,
                 secret_reader: Callable[['IRTLSContext', str, str],
                                         SavedSecret], snapshot: str) -> None:

        aconf.load_all(fetcher.sorted())

        aconf_path = os.path.join(app.snapshot_path,
                                  "aconf-%s.json" % snapshot)
        open(aconf_path, "w").write(aconf.as_json())

        ir = IR(aconf, secret_reader=secret_reader)

        ir_path = os.path.join(app.snapshot_path, "ir-%s.json" % snapshot)
        open(ir_path, "w").write(ir.as_json())

        check_scout(app, "update", ir)

        econf = EnvoyConfig.generate(ir, "V2")
        diag = Diagnostics(ir, econf)

        bootstrap_config, ads_config = econf.split_config()

        if not self.validate_envoy_config(config=ads_config):
            self.logger.info(
                "no updates were performed due to invalid envoy configuration, continuing with current configuration..."
            )
            return

        self.logger.info("saving Envoy configuration for snapshot %s" %
                         snapshot)

        with open(app.bootstrap_path, "w") as output:
            output.write(json.dumps(bootstrap_config, sort_keys=True,
                                    indent=4))

        with open(app.ads_path, "w") as output:
            output.write(json.dumps(ads_config, sort_keys=True, indent=4))

        app.aconf = aconf
        app.ir = ir
        app.econf = econf
        app.diag = diag

        if app.kick:
            self.logger.info("running '%s'" % app.kick)
            os.system(app.kick)
        elif app.ambex_pid != 0:
            self.logger.info("notifying PID %d ambex" % app.ambex_pid)
            os.kill(app.ambex_pid, signal.SIGHUP)

        self.logger.info("configuration updated")

        if app.health_checks and not app.stats_updater:
            app.logger.info("starting Envoy status updater")
            app.stats_updater = PeriodicTrigger(app.watcher.update_estats,
                                                period=5)
Beispiel #4
0
    def load(self):
        # This is a little weird, but whatever.
        self.application.watcher = AmbassadorEventWatcher(self.application)
        self.application.watcher.start()

        if self.application.health_checks:
            self.application.logger.info("Starting periodic updates")
            self.application.stats_updater = PeriodicTrigger(
                self.application.watcher.update_estats, period=5)

        return self.application
Beispiel #5
0
    def run(self):
        self.logger.info("starting Scout checker")
        self.app.scout_checker = PeriodicTrigger(
            lambda: self.check_scout("checkin"), period=86400)  # Yup, one day.

        self.logger.info("starting event watcher")

        while True:
            cmd, arg, rqueue = self.events.get()
            # self.logger.info("EVENT: %s" % cmd)

            if cmd == 'ESTATS':
                # self.logger.info("updating estats")
                try:
                    self._respond(rqueue, 200, 'updating')
                    self.app.estats.update()
                except Exception as e:
                    self.logger.error("could not update estats: %s" % e)
                    self.logger.exception(e)
            elif cmd == 'CONFIG_FS':
                try:
                    self.load_config_fs(rqueue, arg)
                except Exception as e:
                    self.logger.error("could not reconfigure: %s" % e)
                    self.logger.exception(e)
            elif cmd == 'CONFIG':
                version, url = arg

                try:
                    if version == 'kw':
                        self.load_config_kubewatch(rqueue, url)
                    elif version == 'watt':
                        self.load_config_watt(rqueue, url)
                    else:
                        raise RuntimeError("config from %s not supported" %
                                           version)
                except Exception as e:
                    self.logger.error("could not reconfigure: %s" % e)
                    self.logger.exception(e)
            elif cmd == 'SCOUT':
                try:
                    self._respond(rqueue, 200, 'checking Scout')
                    self.check_scout(*arg)
                except Exception as e:
                    self.logger.error("could not reconfigure: %s" % e)
                    self.logger.exception(e)
            else:
                self.logger.error("unknown event type: '%s' '%s'" % (cmd, arg))
Beispiel #6
0
    def _load_ir(self, rqueue: queue.Queue, aconf: Config,
                 fetcher: ResourceFetcher, secret_handler: SecretHandler,
                 snapshot: str) -> None:
        aconf.load_all(fetcher.sorted())

        aconf_path = os.path.join(app.snapshot_path, "aconf-tmp.json")
        open(aconf_path, "w").write(aconf.as_json())

        ir = IR(aconf, secret_handler=secret_handler)

        ir_path = os.path.join(app.snapshot_path, "ir-tmp.json")
        open(ir_path, "w").write(ir.as_json())

        econf = EnvoyConfig.generate(ir, "V2")
        diag = Diagnostics(ir, econf)

        bootstrap_config, ads_config = econf.split_config()

        if not self.validate_envoy_config(config=ads_config):
            self.logger.info(
                "no updates were performed due to invalid envoy configuration, continuing with current configuration..."
            )
            app.check_scout("attempted bad update")
            self._respond(
                rqueue, 500,
                'ignoring: invalid Envoy configuration in snapshot %s' %
                snapshot)
            return

        snapcount = int(os.environ.get('AMBASSADOR_SNAPSHOT_COUNT', "4"))
        snaplist: List[Tuple[str, str]] = []

        if snapcount > 0:
            self.logger.debug("rotating snapshots for snapshot %s" % snapshot)

            # If snapcount is 4, this range statement becomes range(-4, -1)
            # which gives [ -4, -3, -2 ], which the list comprehension turns
            # into [ ( "-3", "-4" ), ( "-2", "-3" ), ( "-1", "-2" ) ]...
            # which is the list of suffixes to rename to rotate the snapshots.

            snaplist += [(str(x + 1), str(x))
                         for x in range(-1 * snapcount, -1)]

            # After dealing with that, we need to rotate the current file into -1.
            snaplist.append(('', '-1'))

        # Whether or not we do any rotation, we need to cycle in the '-tmp' file.
        snaplist.append(('-tmp', ''))

        for from_suffix, to_suffix in snaplist:
            for fmt in [
                    "aconf{}.json", "econf{}.json", "ir{}.json",
                    "snapshot{}.yaml"
            ]:
                from_path = os.path.join(app.snapshot_path,
                                         fmt.format(from_suffix))
                to_path = os.path.join(app.snapshot_path,
                                       fmt.format(to_suffix))

                try:
                    self.logger.debug("rotate: %s -> %s" %
                                      (from_path, to_path))
                    os.rename(from_path, to_path)
                except IOError as e:
                    self.logger.debug("skip %s -> %s: %s" %
                                      (from_path, to_path, e))
                    pass
                except Exception as e:
                    self.logger.debug("could not rename %s -> %s: %s" %
                                      (from_path, to_path, e))

        self.logger.info("saving Envoy configuration for snapshot %s" %
                         snapshot)

        with open(app.bootstrap_path, "w") as output:
            output.write(json.dumps(bootstrap_config, sort_keys=True,
                                    indent=4))

        with open(app.ads_path, "w") as output:
            output.write(json.dumps(ads_config, sort_keys=True, indent=4))

        app.aconf = aconf
        app.ir = ir
        app.econf = econf
        app.diag = diag

        if app.kick:
            self.logger.info("running '%s'" % app.kick)
            os.system(app.kick)
        elif app.ambex_pid != 0:
            self.logger.info("notifying PID %d ambex" % app.ambex_pid)
            os.kill(app.ambex_pid, signal.SIGHUP)

        self.logger.info("configuration updated from snapshot %s" % snapshot)
        self._respond(rqueue, 200,
                      'configuration updated from snapshot %s' % snapshot)

        if app.health_checks and not app.stats_updater:
            app.logger.info("starting Envoy status updater")
            app.stats_updater = PeriodicTrigger(app.watcher.update_estats,
                                                period=5)
            # app.scout_updater = PeriodicTrigger(lambda: app.watcher.check_scout("30s"), period=30)

        # Don't use app.check_scout; it will deadlock. And don't bother doing the Scout
        # update until after we've taken care of Envoy.
        self.check_scout("update")
Beispiel #7
0
    def _load_ir(self, rqueue: queue.Queue, aconf: Config,
                 fetcher: ResourceFetcher, secret_handler: SecretHandler,
                 snapshot: str) -> None:
        aconf.load_all(fetcher.sorted())

        aconf_path = os.path.join(app.snapshot_path, "aconf-tmp.json")
        open(aconf_path, "w").write(aconf.as_json())

        ir = IR(aconf, secret_handler=secret_handler)

        ir_path = os.path.join(app.snapshot_path, "ir-tmp.json")
        open(ir_path, "w").write(ir.as_json())

        econf = EnvoyConfig.generate(ir, "V2")
        diag = Diagnostics(ir, econf)

        bootstrap_config, ads_config = econf.split_config()

        if not self.validate_envoy_config(config=ads_config,
                                          retries=self.app.validation_retries):
            self.logger.info(
                "no updates were performed due to invalid envoy configuration, continuing with current configuration..."
            )
            # Don't use app.check_scout; it will deadlock.
            self.check_scout("attempted bad update")
            self._respond(
                rqueue, 500,
                'ignoring: invalid Envoy configuration in snapshot %s' %
                snapshot)
            return

        snapcount = int(os.environ.get('AMBASSADOR_SNAPSHOT_COUNT', "4"))
        snaplist: List[Tuple[str, str]] = []

        if snapcount > 0:
            self.logger.debug("rotating snapshots for snapshot %s" % snapshot)

            # If snapcount is 4, this range statement becomes range(-4, -1)
            # which gives [ -4, -3, -2 ], which the list comprehension turns
            # into [ ( "-3", "-4" ), ( "-2", "-3" ), ( "-1", "-2" ) ]...
            # which is the list of suffixes to rename to rotate the snapshots.

            snaplist += [(str(x + 1), str(x))
                         for x in range(-1 * snapcount, -1)]

            # After dealing with that, we need to rotate the current file into -1.
            snaplist.append(('', '-1'))

        # Whether or not we do any rotation, we need to cycle in the '-tmp' file.
        snaplist.append(('-tmp', ''))

        for from_suffix, to_suffix in snaplist:
            for fmt in [
                    "aconf{}.json", "econf{}.json", "ir{}.json",
                    "snapshot{}.yaml"
            ]:
                from_path = os.path.join(app.snapshot_path,
                                         fmt.format(from_suffix))
                to_path = os.path.join(app.snapshot_path,
                                       fmt.format(to_suffix))

                try:
                    self.logger.debug("rotate: %s -> %s" %
                                      (from_path, to_path))
                    os.rename(from_path, to_path)
                except IOError as e:
                    self.logger.debug("skip %s -> %s: %s" %
                                      (from_path, to_path, e))
                    pass
                except Exception as e:
                    self.logger.debug("could not rename %s -> %s: %s" %
                                      (from_path, to_path, e))

        app.latest_snapshot = snapshot
        self.logger.info("saving Envoy configuration for snapshot %s" %
                         snapshot)

        with open(app.bootstrap_path, "w") as output:
            output.write(json.dumps(bootstrap_config, sort_keys=True,
                                    indent=4))

        with open(app.ads_path, "w") as output:
            output.write(json.dumps(ads_config, sort_keys=True, indent=4))

        app.aconf = aconf
        app.ir = ir
        app.econf = econf
        app.diag = diag

        if app.kick:
            self.logger.info("running '%s'" % app.kick)
            os.system(app.kick)
        elif app.ambex_pid != 0:
            self.logger.info("notifying PID %d ambex" % app.ambex_pid)
            os.kill(app.ambex_pid, signal.SIGHUP)

        if app.ir.k8s_status_updates:
            for name in app.ir.k8s_status_updates.keys():
                kind, update = app.ir.k8s_status_updates[name]

                self.logger.info(
                    f"doing K8s status update for {kind} {name}...")

                text = json.dumps(update)

                with open(f'/tmp/kstat-{kind}-{name}', 'w') as out:
                    out.write(text)

                cmd = [
                    '/ambassador/kubestatus', kind, '-f',
                    f'metadata.name={name}', '-u', '/dev/fd/0'
                ]
                self.logger.info(f"Running command: {cmd}")

                try:
                    rc = subprocess.run(cmd,
                                        input=text.encode('utf-8'),
                                        timeout=5)
                    self.logger.info(f'...update finished, rc {rc.returncode}')
                except subprocess.TimeoutExpired as e:
                    self.logger.error(f'...update timed out, {e}')

        self.logger.info("configuration updated from snapshot %s" % snapshot)
        self._respond(rqueue, 200,
                      'configuration updated from snapshot %s' % snapshot)

        if app.health_checks and not app.stats_updater:
            app.logger.info("starting Envoy status updater")
            app.stats_updater = PeriodicTrigger(app.watcher.update_estats,
                                                period=5)

        # Check our environment...
        self.check_environment()

        self.chime()
Beispiel #8
0
    def _load_ir(self, rqueue: queue.Queue, aconf: Config,
                 fetcher: ResourceFetcher,
                 secret_reader: Callable[['IRTLSContext', str, str],
                                         SavedSecret], snapshot: str) -> None:
        aconf.load_all(fetcher.sorted())

        aconf_path = os.path.join(app.snapshot_path, "aconf-tmp.json")
        open(aconf_path, "w").write(aconf.as_json())

        ir = IR(aconf, secret_reader=secret_reader)

        ir_path = os.path.join(app.snapshot_path, "ir-tmp.json")
        open(ir_path, "w").write(ir.as_json())

        econf = EnvoyConfig.generate(ir, "V2")
        diag = Diagnostics(ir, econf)

        bootstrap_config, ads_config = econf.split_config()

        if not self.validate_envoy_config(config=ads_config):
            self.logger.info(
                "no updates were performed due to invalid envoy configuration, continuing with current configuration..."
            )
            app.check_scout("attempted bad update")
            self._respond(
                rqueue, 500,
                'ignoring: invalid Envoy configuration in snapshot %s' %
                snapshot)
            return

        self.logger.info("rotating snapshots for snapshot %s" % snapshot)

        for from_suffix, to_suffix in [('-3', '-4'), ('-2', '-3'),
                                       ('-1', '-2'), ('', '-1'), ('-tmp', '')]:
            for fmt in [
                    "aconf{}.json", "econf{}.json", "ir{}.json",
                    "snapshot{}.yaml"
            ]:
                try:
                    from_path = os.path.join(app.snapshot_path,
                                             fmt.format(from_suffix))
                    to_path = os.path.join(app.snapshot_path,
                                           fmt.format(to_suffix))

                    self.logger.debug("rotate: %s -> %s" %
                                      (from_path, to_path))
                    os.rename(from_path, to_path)
                except IOError as e:
                    self.logger.debug("skip %s -> %s: %s" %
                                      (from_path, to_path, e))
                except Exception as e:
                    self.logger.debug("could not rename %s -> %s: %s" %
                                      (from_path, to_path, e))

        self.logger.info("saving Envoy configuration for snapshot %s" %
                         snapshot)

        with open(app.bootstrap_path, "w") as output:
            output.write(json.dumps(bootstrap_config, sort_keys=True,
                                    indent=4))

        with open(app.ads_path, "w") as output:
            output.write(json.dumps(ads_config, sort_keys=True, indent=4))

        app.aconf = aconf
        app.ir = ir
        app.econf = econf
        app.diag = diag

        if app.kick:
            self.logger.info("running '%s'" % app.kick)
            os.system(app.kick)
        elif app.ambex_pid != 0:
            self.logger.info("notifying PID %d ambex" % app.ambex_pid)
            os.kill(app.ambex_pid, signal.SIGHUP)

        self.logger.info("configuration updated from snapshot %s" % snapshot)
        self._respond(rqueue, 200,
                      'configuration updated from snapshot %s' % snapshot)

        if app.health_checks and not app.stats_updater:
            app.logger.info("starting Envoy status updater")
            app.stats_updater = PeriodicTrigger(app.watcher.update_estats,
                                                period=5)

        # Don't use app.check_scout; it will deadlock. And don't bother doing the Scout
        # update until after we've taken care of Envoy.
        self.check_scout("update")