예제 #1
0
    def __init__(self, root, host_ip=None):
        self.root = root

        self.apps_dir = os.path.join(self.root, self.APPS_DIR)
        self.watchdog_dir = os.path.join(self.root, self.WATCHDOG_DIR)
        self.running_dir = os.path.join(self.root, self.RUNNING_DIR)
        self.cache_dir = os.path.join(self.root, self.CACHE_DIR)
        self.cleanup_dir = os.path.join(self.root, self.CLEANUP_DIR)
        self.app_events_dir = os.path.join(self.root, self.APP_EVENTS_DIR)
        self.metrics_dir = os.path.join(self.root, self.METRICS_DIR)
        self.archives_dir = os.path.join(self.root, self.ARCHIVES_DIR)
        self.init_dir = os.path.join(self.root, self.INIT_DIR)
        self.pending_cleanup_dir = os.path.join(self.root,
                                                self.PENDING_CLEANUP_DIR)

        if host_ip is not None:
            self.host_ip = host_ip
        else:
            hostname = socket.gethostname()
            self.host_ip = socket.gethostbyname(hostname)

        self.watchdogs = watchdog.Watchdog(self.watchdog_dir)

        fs.mkdir_safe(self.apps_dir)
        fs.mkdir_safe(self.watchdog_dir)
        fs.mkdir_safe(self.running_dir)
        fs.mkdir_safe(self.cache_dir)
        fs.mkdir_safe(self.cleanup_dir)
        fs.mkdir_safe(self.app_events_dir)
        fs.mkdir_safe(self.metrics_dir)
        fs.mkdir_safe(self.archives_dir)
예제 #2
0
    def __init__(self, root):
        self.root = root

        self.apps_dir = os.path.join(self.root, self.APPS_DIR)
        self.bin_dir = os.path.join(self.root, self.BIN_DIR)
        self.watchdog_dir = os.path.join(self.root, self.WATCHDOG_DIR)
        self.running_dir = os.path.join(self.root, self.RUNNING_DIR)
        self.cache_dir = os.path.join(self.root, self.CACHE_DIR)
        self.cleaning_dir = os.path.join(self.root, self.CLEANING_DIR)
        self.cleanup_dir = os.path.join(self.root, self.CLEANUP_DIR)
        self.cleanup_apps_dir = os.path.join(self.root, self.CLEANUP_APPS_DIR)
        self.configs_dir = os.path.join(self.root, self.CONFIG_DIR)
        self.app_events_dir = os.path.join(self.root, self.APP_EVENTS_DIR)
        self.archives_dir = os.path.join(self.root, self.ARCHIVES_DIR)
        self.images_dir = os.path.join(self.root, self.IMAGES_DIR)
        self.init_dir = os.path.join(self.root, self.INIT_DIR)
        self.init1_dir = os.path.join(self.root, self.INIT1_DIR)
        self.tombstones_dir = os.path.join(self.root, self.TOMBSTONES_DIR)
        self.cleanup_tombstone_dir = os.path.join(self.tombstones_dir,
                                                  self.CLEANUP_DIR)
        self.running_tombstone_dir = os.path.join(self.tombstones_dir,
                                                  self.RUNNING_DIR)
        self.init_tombstone_dir = os.path.join(self.tombstones_dir,
                                               self.INIT_DIR)

        self.watchdogs = watchdog.Watchdog(self.watchdog_dir)
예제 #3
0
    def __init__(self, root):
        self.root = root

        self.apps_dir = os.path.join(self.root, self.APPS_DIR)
        self.watchdog_dir = os.path.join(self.root, self.WATCHDOG_DIR)
        self.running_dir = os.path.join(self.root, self.RUNNING_DIR)
        self.cache_dir = os.path.join(self.root, self.CACHE_DIR)
        self.cleanup_dir = os.path.join(self.root, self.CLEANUP_DIR)
        self.configs_dir = os.path.join(self.root, self.CONFIG_DIR)
        self.app_events_dir = os.path.join(self.root, self.APP_EVENTS_DIR)
        self.metrics_dir = os.path.join(self.root, self.METRICS_DIR)
        self.archives_dir = os.path.join(self.root, self.ARCHIVES_DIR)
        self.images_dir = os.path.join(self.root, self.IMAGES_DIR)
        self.init_dir = os.path.join(self.root, self.INIT_DIR)
        self.pending_cleanup_dir = os.path.join(self.root,
                                                self.PENDING_CLEANUP_DIR)

        self.watchdogs = watchdog.Watchdog(self.watchdog_dir)

        fs.mkdir_safe(self.apps_dir)
        fs.mkdir_safe(self.watchdog_dir)
        fs.mkdir_safe(self.running_dir)
        fs.mkdir_safe(self.cache_dir)
        fs.mkdir_safe(self.cleanup_dir)
        fs.mkdir_safe(self.configs_dir)
        fs.mkdir_safe(self.app_events_dir)
        fs.mkdir_safe(self.metrics_dir)
        fs.mkdir_safe(self.archives_dir)
        fs.mkdir_safe(self.init_dir)
예제 #4
0
 def setUp(self):
     self.root = tempfile.mkdtemp()
     self.watchdog = watchdog.Watchdog(self.root)
     # Setup some entries
     for name, age in [('.tmp', 0), ('foo', 10), ('bar_30s', 15),
                       ('baz#lala', 40)]:
         fname = os.path.join(self.root, name)
         with open(fname, 'w') as f:
             f.write(name)
         os.utime(fname, (age, age))
     os.mkdir(os.path.join(self.root, 'food'))
예제 #5
0
    def run(self, watchdogs_dir, *impl_args, **impl_kwargs):
        """Run the service.

        The run procedure will first initialize the service's implementation,
        the setup the service's watchdog, and start the service resource
        resynchronization procedure.

        This procedure is in 4 phases to handle both fresh starts and restarts.

        $ Call the implementation's :function:`initialize` function which
        allows the implementation to query and import the backend resource's
        state.
        $ Setup the service request watcher.
        $ Import all existing requests (passing them to the
        :function:`on_created` implementation's handler.
        $ Call the implementation's :function:`synchronize` function which
        expunges anything allocated against the backend resource that doesn't
        have a matching request anymore.

        The implementation is expected to implement two handlers:

        * :function:`on_created` that handles new resource requests or update
        to existing resource request (implementation is expected to be
        idem-potent.
        * :function:`on_deleted` that handlers delation of resource requests.
        It should properly handle the case where the backend resource is
        already gone.

        :param ``str`` watchdogs_dir:
            Path to the watchdogs directory.
        :param ``tuple`` impl_args:
            Arguments passed to the implementation's  constructor.
        :param ``dict`` impl_kwargs:
            Keywords arguments passed to the implementation's  constructor.
        """
        # Load the implementation
        if self._service_class is None:
            self._service_class = self._load_impl()
        impl = self._service_class(*impl_args, **impl_kwargs)

        # Setup the watchdog
        watchdogs = watchdog.Watchdog(os.path.realpath(watchdogs_dir))
        watchdog_lease = watchdogs.create(
            name='svc-{svc_name}'.format(svc_name=self.name),
            timeout='{hb:d}s'.format(hb=impl.WATCHDOG_HEARTBEAT_SEC),
            content='Service %r failed' % self.name
        )

        self._run(impl, watchdog_lease)

        _LOGGER.info('Shuting down %r service', self.name)
        # Remove the service heartbeat
        watchdog_lease.remove()
예제 #6
0
def _watcher(root_dir, rules_dir, containers_dir, watchdogs_dir):
    """Treadmill Firewall rule watcher.
    """
    rules_dir = os.path.join(root_dir, rules_dir)
    containers_dir = os.path.join(root_dir, containers_dir)
    watchdogs_dir = os.path.join(root_dir, watchdogs_dir)

    # Setup the watchdog
    watchdogs = watchdog.Watchdog(watchdogs_dir)
    wd = watchdogs.create(
        'svc-{svc_name}'.format(svc_name='firewall_watcher'),
        '{hb:d}s'.format(hb=_FW_WATCHER_HEARTBEAT * 2),
        'Service firewall watcher failed'
    )

    rulemgr = rulefile.RuleMgr(rules_dir, containers_dir)
    passthrough = {}

    def on_created(path):
        """Invoked when a network rule is created."""
        rule_file = os.path.basename(path)
        _LOGGER.info('adding %r', rule_file)
        # The rule is the filename
        chain_rule = rulemgr.get_rule(rule_file)
        if chain_rule is not None:
            chain, rule = chain_rule
            iptables.add_rule(rule, chain=chain)
            if isinstance(rule, fw.PassThroughRule):
                passthrough[rule.src_ip] = (
                    passthrough.setdefault(rule.src_ip, 0) + 1
                )
                _LOGGER.info('Adding passthrough %r', rule.src_ip)
                iptables.add_ip_set(iptables.SET_PASSTHROUGHS, rule.src_ip)
                iptables.flush_pt_conntrack_table(rule.src_ip)
        else:
            _LOGGER.warning('Ignoring unparseable rule %r', rule_file)

    def on_deleted(path):
        """Invoked when a network rule is deleted."""
        # Edge case, if the directory where the rules are kept gets removed,
        # abort
        if path == rulemgr.path:
            _LOGGER.critical('Network rules directory was removed: %r',
                             path)
            utils.sys_exit(1)

        # The rule is the filename
        rule_file = os.path.basename(path)
        _LOGGER.info('Removing %r', rule_file)
        chain_rule = rulemgr.get_rule(rule_file)
        if chain_rule is not None:
            chain, rule = chain_rule
            iptables.delete_rule(rule, chain=chain)
            if isinstance(rule, fw.PassThroughRule):
                if passthrough[rule.src_ip] == 1:
                    # Remove the IPs from the passthrough set
                    passthrough.pop(rule.src_ip)
                    _LOGGER.info('Removing passthrough %r', rule.src_ip)
                    iptables.rm_ip_set(iptables.SET_PASSTHROUGHS, rule.src_ip)
                    iptables.flush_pt_conntrack_table(rule.src_ip)
                else:
                    passthrough[rule.src_ip] -= 1

        else:
            _LOGGER.warning('Ignoring unparseable file %r', rule_file)

    _LOGGER.info('Monitoring fw rules changes in %r', rulemgr.path)
    watch = dirwatch.DirWatcher(rulemgr.path)
    watch.on_created = on_created
    watch.on_deleted = on_deleted

    # Minimal initialization of the all chains and sets
    _init_rules()

    # now that we are watching, prime the rules
    current_rules = rulemgr.get_rules()

    # Bulk apply rules
    _configure_rules(current_rules)
    for _chain, rule in current_rules:
        if isinstance(rule, fw.PassThroughRule):
            passthrough[rule.src_ip] = (
                passthrough.setdefault(rule.src_ip, 0) + 1
            )
            # Add the IPs to the passthrough set
            _LOGGER.info('Adding passthrough %r', rule.src_ip)
            iptables.add_ip_set(iptables.SET_PASSTHROUGHS, rule.src_ip)

    _LOGGER.info('Current rules: %r', current_rules)
    while True:
        if watch.wait_for_events(timeout=_FW_WATCHER_HEARTBEAT):
            # Process no more than 5 events between heartbeats
            watch.process_events(max_events=5)

        rulemgr.garbage_collect()
        wd.heartbeat()

    _LOGGER.info('service shutdown.')
    wd.remove()
예제 #7
0
    def run(self, watchdogs_dir, *impl_args, **impl_kwargs):
        """Run the service."""
        # Load the implementation
        if self._service_class is None:
            self._service_class = self._load_impl()
        impl = self._service_class(*impl_args, **impl_kwargs)

        # Setup the watchdog
        watchdogs = watchdog.Watchdog(os.path.realpath(watchdogs_dir))
        watchdog_lease = watchdogs.create(
            name='svc-{svc_name}'.format(svc_name=self.name),
            timeout='{hb:d}s'.format(hb=impl.WATCHDOG_HEARTBEAT_SEC),
            content='Service %r failed' % self.name
        )

        # Create the status socket
        ss = self._create_status_socket()

        # Run initialization
        impl.initialize(self._dir)

        watcher = dirwatch.DirWatcher(self._rsrc_dir)
        # Call all the callbacks with the implementation instance
        watcher.on_created = functools.partial(self._on_created, impl)
        watcher.on_deleted = functools.partial(self._on_deleted, impl)
        # NOTE: A modified request is treated as a brand new request
        watcher.on_modified = functools.partial(self._on_created, impl)
        self._io_eventfd = eventfd.eventfd(0, eventfd.EFD_CLOEXEC)

        # Before starting, check the request directory
        svcs = self._check_requests()
        # and "fake" a created event on all the existing requests
        for existing_svcs in svcs:
            self._on_created(impl, existing_svcs)

        # Before starting, make sure backend state and service state are
        # synchronized.
        impl.synchronize()

        # Report service status
        status_info = {}
        status_info.update(impl.report_status())

        # Setup the poll object
        loop_poll = select.poll()
        loop_callbacks = {}

        base_event_handlers = [
            (
                self._io_eventfd,
                select.POLLIN,
                functools.partial(
                    self._handle_queued_io_events,
                    watcher=watcher,
                    impl=impl,
                )
            ),
            (
                watcher.inotify,
                select.POLLIN,
                functools.partial(
                    self._handle_io_events,
                    watcher=watcher,
                    impl=impl,
                )
            ),
            (
                ss,
                select.POLLIN,
                functools.partial(
                    self._publish_status,
                    status_socket=ss,
                    status_info=status_info,
                )
            ),
        ]
        # Initial collection of implementation' event handlers
        impl_event_handlers = impl.event_handlers()

        self._update_poll_registration(
            loop_poll,
            loop_callbacks,
            base_event_handlers + impl_event_handlers,
        )

        loop_timeout = impl.WATCHDOG_HEARTBEAT_SEC // 2
        while not self._is_dead:

            # Check for events
            updated = self._run_events(
                loop_poll,
                loop_timeout,
                loop_callbacks,
            )

            if updated:
                # Report service status
                status_info.clear()
                status_info.update(impl.report_status())

                # Update poll registration if needed
                impl_event_handlers = impl.event_handlers()
                self._update_poll_registration(
                    loop_poll, loop_callbacks,
                    base_event_handlers + impl_event_handlers,
                )

            # Clean up stale requests
            self._check_requests()

            # Heartbeat
            watchdog_lease.heartbeat()

        _LOGGER.info('Shuting down %r service', self.name)
        # Remove the service heartbeat
        watchdog_lease.remove()
예제 #8
0
    def version_monitor(approot, command):
        """Runs node version monitor."""
        cli_cmd = list(command)
        _LOGGER.info('Initializing code monitor: %r', cli_cmd)

        watchdogs = watchdog.Watchdog(
            os.path.join(
                approot,
                appenv.AppEnvironment.WATCHDOG_DIR,
            ))

        context.GLOBAL.zk.conn.add_listener(zkutils.exit_on_lost)

        while not context.GLOBAL.zk.conn.exists(z.VERSION):
            _LOGGER.warn('%r node not created yet. Cell masters running?',
                         z.VERSION)
            time.sleep(30)

        hostname = sysinfo.hostname()
        version_path = z.path.version(hostname)

        codepath = os.path.realpath(utils.rootdir())
        digest = versionmgr.checksum_dir(codepath).hexdigest()
        _LOGGER.info('codepath: %s, digest: %s', codepath, digest)

        info = {
            'codepath': codepath,
            'since': int(time.time()),
            'digest': digest,
        }

        zkutils.put(context.GLOBAL.zk.conn, version_path, info)

        @context.GLOBAL.zk.conn.DataWatch(version_path)
        @exc.exit_on_unhandled
        def _watch_version(_data, _stat, event):
            """Force exit if server node is deleted."""

            # If the node is deleted, we exit to pick up new version code.
            if event is not None and event.type == 'DELETED':
                # The version info not present, restart services and register
                # new checksum.
                _LOGGER.info('Upgrade requested, running: %s', cli_cmd)

                if cli_cmd:
                    try:
                        subproc.check_call(cli_cmd)
                        # Record successful upgrade.
                    except subprocess.CalledProcessError:
                        _LOGGER.exception('Upgrade failed.')
                        # Immediately trigger a watchdog timeout
                        watchdogs.create(
                            name='version_monitor',
                            timeout='0s',
                            content='Upgrade to '
                            '{code!r}({digest}) failed'.format(code=codepath,
                                                               digest=digest),
                        ).heartbeat()
                        del info['digest']

                _LOGGER.info('Upgrade complete.')
                utils.sys_exit(0)

            return True

        while True:
            time.sleep(100000)

        _LOGGER.info('service shutdown.')