def test_watcher(self): """Tests created/deleted callbackes.""" created = [] modified = [] deleted = [] test_file = os.path.join(self.root, 'a') watcher = dirwatch.DirWatcher(self.root) watcher.on_created = lambda x: created.append(x) or 'one' watcher.on_modified = lambda x: modified.append(x) or 'two' watcher.on_deleted = lambda x: deleted.append(x) or 'three' with open(test_file, 'w') as f: f.write('hello') with open(test_file, 'a') as f: f.write(' world!') os.unlink(test_file) with open(test_file, 'w') as f: f.write('hello again') res = watcher.process_events(max_events=3) self.assertEqual([test_file], created) self.assertEqual([test_file], modified) self.assertEqual([test_file], deleted) self.assertEqual( [ (dirwatch.DirWatcherEvent.CREATED, test_file, 'one'), (dirwatch.DirWatcherEvent.MODIFIED, test_file, 'two'), (dirwatch.DirWatcherEvent.DELETED, test_file, 'three'), (dirwatch.DirWatcherEvent.MORE_PENDING, None, None), ], res, )
def _run(self, impl, watchdog_lease): """Linux implementation of run. """ # Run initialization impl.initialize(self._dir) watcher = dirwatch.DirWatcher(self._rsrc_dir) # Call all the callbacks with the implementation instance watcher.on_created = functools.partial(self._on_created, impl) watcher.on_deleted = functools.partial(self._on_deleted, impl) # NOTE: A modified request is treated as a brand new request watcher.on_modified = functools.partial(self._on_created, impl) # Before starting, check the request directory svcs = self._check_requests() # and "fake" a created event on all the existing requests for existing_svcs in svcs: self._on_created(impl, existing_svcs) # Before starting, make sure backend state and service state are # synchronized. impl.synchronize() loop_timeout = impl.WATCHDOG_HEARTBEAT_SEC // 2 while not self._is_dead: if watcher.wait_for_events(timeout=loop_timeout): watcher.process_events() # Clean up stale requests self._check_requests() # Heartbeat watchdog_lease.heartbeat()
def test_on_created(self): """Test _servers.ServersWatch._on_created.""" # Access protected module # pylint: disable=W0212 dirwatcher = dirwatch.DirWatcher() dispatcher = dirwatch.DirWatcherDispatcher(dirwatcher) added_servers = set() def _server_added(server_info): added_servers.add(server_info['hostname']) watch = servers.ServersWatch(dispatcher, self.root, 'partition1', _server_added) watch.sync() path = os.path.join(self.servers_dir, 'server1.ad.com') with io.open(path, 'w') as f: yaml.dump( { servers.DC_KEY: 'dc.ad.com', servers.DN_KEY: 'CN=server1,DC=AD,DC=COM', 'partition': 'partition1' }, f) watch._on_created(path) self.assertEqual(added_servers, set(['server1.ad.com']))
def wait_for_file(filename, timeout=None): """Wait at least ``timeout`` seconds for a file to appear or be modified. :param ``int`` timeout: Minimum amount of seconds to wait for the file. :returns ``bool``: ``True`` if there was an event, ``False`` otherwise (timeout). """ if timeout is None: timeout = DEFAULT_TIMEOUT elif timeout == 0: return os.path.exists(filename) filedir = os.path.dirname(filename) # TODO: Fine tune the watcher mask for efficiency. watcher = dirwatch.DirWatcher(filedir) now = time.time() end_time = now + timeout while not os.path.exists(filename): if watcher.wait_for_events(timeout=max(0, end_time - now)): watcher.process_events() now = time.time() if now > end_time: return False return True
def run(self): """Run the monitor. Start the event loop and continue until a service fails and the configure down action considers it fatal. """ self._dirwatcher = dirwatch.DirWatcher() self._dirwatcher.on_deleted = self._on_deleted self._dirwatcher.on_created = self._on_created service_dirs = self._services[:] if self._services_dir is not None: # If we have a svscan directory to watch add it. self._dirwatcher.add_dir(self._services_dir) service_dirs += [ os.path.join(self._services_dir, dentry) for dentry in os.listdir(self._services_dir) if dentry[0] != '.' ] for service_dir in service_dirs: self._add_service(service_dir) keep_running = True while keep_running: while self._down_reason is None: if self._dirwatcher.wait_for_events(): self._dirwatcher.process_events() keep_running = self._down_action.execute(self._down_reason) self._down_reason = None return
def test_signal(self): """Tests behavior when signalled during wait.""" watcher = dirwatch.DirWatcher(self.root) mocked_pollobj = select.poll.return_value mocked_pollobj.poll.side_effect = select.error(errno.EINTR, '') self.assertFalse(watcher.wait_for_events())
def run(self): """Setup directories' watches and start the re-scan ticker. """ # Start idle self._is_active = False # Setup the watchdog watchdog_lease = self.tm_env.watchdogs.create( name='svc-{svc_name}'.format(svc_name=self.name), timeout='{hb:d}s'.format(hb=_WATCHDOG_TIMEOUT_SEC), content='Service %r failed' % self.name ) watch = dirwatch.DirWatcher(self.tm_env.cache_dir) watch.on_created = self._on_created watch.on_modified = self._on_modified watch.on_deleted = self._on_deleted # Start the timer watchdog_lease.heartbeat() while True: if watch.wait_for_events(timeout=_HEARTBEAT_SEC): watch.process_events(max_events=5) else: if self._is_active is True: cached_files = glob.glob( os.path.join(self.tm_env.cache_dir, '*') ) running_links = glob.glob( os.path.join(self.tm_env.running_dir, '*') ) # Calculate the container names from every event file cached_containers = { appcfg.eventfile_unique_name(filename) for filename in cached_files } # Calculate the instance names from every event running # link running_instances = { os.path.basename(linkname) for linkname in running_links } _LOGGER.debug('content of %r and %r: %r <-> %r', self.tm_env.cache_dir, self.tm_env.running_dir, cached_containers, running_instances) else: _LOGGER.info('Still inactive during heartbeat event.') watchdog_lease.heartbeat() # Graceful shutdown. _LOGGER.info('service shutdown.') watchdog_lease.remove()
def alert_monitor_cmd(approot, plugin, max_queue_length, wait_interval): """Publish alerts. """ tm_env = appenv.AppEnvironment(root=approot) watcher = dirwatch.DirWatcher(tm_env.alerts_dir) watcher.on_created = _get_on_create_handler( _load_alert_backend(plugin)) _serve_forever(watcher, tm_env.alerts_dir, max_queue_length, wait_interval)
def test_watcher(self): """Tests created/deleted callbacks.""" created = set() modified = set() deleted = set() test_file = os.path.join(self.root, 'a') watcher = dirwatch.DirWatcher(self.root) watcher.on_created = lambda x: created.add(x) or 'one' watcher.on_modified = lambda x: modified.add(x) or 'two' watcher.on_deleted = lambda x: deleted.add(x) or 'three' self.assertFalse(watcher.wait_for_events(0)) with io.open(test_file, 'w') as f: f.write('hello') with io.open(test_file, 'a') as f: f.write(' world!') os.unlink(test_file) with io.open(test_file, 'w') as f: f.write('hello again') if sys.platform.startswith('linux'): res = watcher.process_events(max_events=3) else: self.assertTrue(watcher.wait_for_events(0)) self.assertTrue(watcher.wait_for_events(0)) res = watcher.process_events(max_events=4) self.assertEqual({test_file}, created) self.assertEqual({test_file}, modified) self.assertEqual({test_file}, deleted) if sys.platform.startswith('linux'): self.assertEqual( [ (dirwatch.DirWatcherEvent.CREATED, test_file, 'one'), (dirwatch.DirWatcherEvent.MODIFIED, test_file, 'two'), (dirwatch.DirWatcherEvent.DELETED, test_file, 'three'), (dirwatch.DirWatcherEvent.MORE_PENDING, None, None), ], res, ) else: self.assertEqual( [ (dirwatch.DirWatcherEvent.CREATED, test_file, 'one'), (dirwatch.DirWatcherEvent.MODIFIED, test_file, 'two'), (dirwatch.DirWatcherEvent.MODIFIED, test_file, 'two'), (dirwatch.DirWatcherEvent.DELETED, test_file, 'three'), (dirwatch.DirWatcherEvent.MORE_PENDING, None, None), ], res, )
def _configure(self): """Configures the dispatcher with the monitor actions defined in the config directory. """ config = {} for name in os.listdir(self._config_dir): path = os.path.join(self._config_dir, name) if not os.path.isfile(path): continue _LOGGER.debug('Configuring for file: %s', path) with io.open(path) as f: for line in f.readlines(): parts = line.rstrip().split(';', 2) if len(parts) < 2: _LOGGER.warning('skiping config line %s', line) continue try: handler = plugin_manager.load(_TOMESTONES_PLUGINS, parts[1]) except KeyError: _LOGGER.warning('Tomestone handler does not exist: %r', parts[1]) continue params = {} if len(parts) > 2: params = json.loads(parts[2]) impl = handler(self._tm_env, params) config[parts[0]] = impl self._dirwatcher = dirwatch.DirWatcher() self._dispatcher = dirwatch.DirWatcherDispatcher(self._dirwatcher) self._tombstones = collections.deque() for path, handler in six.iteritems(config): fs.mkdir_safe(path) self._dirwatcher.add_dir(path) self._dispatcher.register( path, { dirwatch.DirWatcherEvent.CREATED: lambda p, h=handler: self._on_created(p, h) }) _LOGGER.info('Watching %s with handler %r', path, handler) for name in os.listdir(path): self._on_created(os.path.join(path, name), handler) _LOGGER.info('Monitor configured')
def test_sync(self): """Test _servers.ServersWatch.sync.""" with io.open(os.path.join(self.servers_dir, 'server1.ad.com'), 'w') as f: yaml.dump({ servers.DC_KEY: 'dc.ad.com', servers.DN_KEY: 'CN=server1,DC=AD,DC=COM', 'partition': 'partition1' }, f) with io.open(os.path.join(self.servers_dir, 'server2.ad.com'), 'w') as f: yaml.dump({ servers.DC_KEY: 'dc.ad.com', servers.DN_KEY: 'CN=server2,DC=AD,DC=COM', 'partition': 'partition1' }, f) with io.open(os.path.join(self.servers_dir, 'server3.ad.com'), 'w') as f: yaml.dump({ servers.DC_KEY: 'dc.ad.com', servers.DN_KEY: 'CN=server3,DC=AD,DC=COM', 'partition': 'partition2' }, f) with io.open(os.path.join(self.servers_dir, 'server3.other.com'), 'w') as f: yaml.dump({ 'a': '1', 'b': '2', 'partition': 'partition1' }, f) dirwatcher = dirwatch.DirWatcher() dispatcher = dirwatch.DirWatcherDispatcher(dirwatcher) added_servers = set() def _server_added(server_info): added_servers.add(server_info['hostname']) watch = servers.ServersWatch(dispatcher, self.root, 'partition1', _server_added) watch.sync() self.assertEqual(added_servers, set(['server1.ad.com', 'server2.ad.com'])) server_info = watch.get_server_info('server1.ad.com') self.assertEqual(server_info[servers.DN_KEY], 'CN=server1,DC=AD,DC=COM') self.assertEqual(2, len(watch.get_all_server_info()))
def __init__(self, root): self.handlers = collections.defaultdict(list) self.impl = dict() self.root = root self.watcher = dirwatch.DirWatcher() self.watcher.on_created = self._on_created self.watcher.on_deleted = self._on_deleted self.watcher.on_modified = self._on_modified self.ws = make_handler(self)
def alert_monitor_cmd(approot, plugin): """Publish alerts.""" tm_env = appenv.AppEnvironment(root=approot) watcher = dirwatch.DirWatcher(tm_env.alerts_dir) watcher.on_created = _get_on_create_handler( _load_alert_backend(plugin) ) # if there are alerts in alerts_dir already for alert_file in os.listdir(tm_env.alerts_dir): watcher.on_created(os.path.join(tm_env.alerts_dir, alert_file)) _serve_forever(watcher)
def run(self): """Monitores events directory and publish events.""" watch = dirwatch.DirWatcher(self.events_dir) watch.on_created = self._on_created for eventfile in os.listdir(self.events_dir): filename = os.path.join(self.events_dir, eventfile) self._on_created(filename) while True: if watch.wait_for_events(60): watch.process_events()
def __init__(self, fsroot): self.fsroot = fsroot # pylint: disable=C0103 self.ChildrenWatch = self._childrenwatch self._dirwatcher = dirwatch.DirWatcher() self._dirwatch_dispatcher = dirwatch.DirWatcherDispatcher( self._dirwatcher) thread = threading.Thread(target=self._run_dirwatcher) thread.daemon = True thread.start() super(FsBackend, self).__init__()
def hosts_aliases_cmd(aliases_dir, source, dest): """Manage /etc/hosts aliases.""" aliases = {} retry = set() with io.open(source, 'r') as fd: original = fd.read() def _on_created(path): """Callback invoked when new alias is created.""" if os.path.basename(path).startswith('^'): return _resolve(path, aliases) _generate(aliases, original, dest, retry) def _on_deleted(path): """Callback invoked when alias is removed.""" _LOGGER.info('Alias removed: %s', path) alias = os.path.basename(path) if alias in aliases: del aliases[alias] _generate(aliases, original, dest, retry) watcher = dirwatch.DirWatcher(aliases_dir) watcher.on_created = _on_created watcher.on_deleted = _on_deleted existing = glob.glob(os.path.join(aliases_dir, '*')) for path in existing: if os.path.basename(path).startswith('^'): os.unlink(path) continue _resolve(path, aliases) _generate(aliases, original, dest, retry) last_retry = 0 while True: if watcher.wait_for_events(timeout=100): watcher.process_events(max_events=100) last_retry = time.time() if retry and (time.time() > (last_retry + _DNS_RETRY_INTERVAL)): _generate(aliases, original, dest, retry) last_retry = time.time()
def test_on_modified(self): """Test _servers.ServersWatch._on_modified.""" # Access protected module # pylint: disable=W0212 path = os.path.join(self.servers_dir, 'server1.ad.com') with io.open(path, 'w') as f: yaml.dump( { servers.DC_KEY: 'dc.ad.com', servers.DN_KEY: 'CN=server1,DC=AD,DC=COM', 'partition': 'partition1' }, f) dirwatcher = dirwatch.DirWatcher() dispatcher = dirwatch.DirWatcherDispatcher(dirwatcher) added_servers = set() def _server_added(server_info): added_servers.add(server_info[servers.DC_KEY]) watch = servers.ServersWatch(dispatcher, self.root, 'partition1', _server_added) watch.sync() self.assertEqual(added_servers, set(['dc.ad.com'])) server_info = watch.get_server_info('server1.ad.com') self.assertEqual(server_info[servers.DC_KEY], 'dc.ad.com') with io.open(path, 'w') as f: yaml.dump( { servers.DC_KEY: 'dc2.ad.com', servers.DN_KEY: 'CN=server1,DC=AD,DC=COM', 'partition': 'partition1' }, f) watch._on_modified(path) # server_added was not called a second time self.assertEqual(added_servers, set(['dc.ad.com'])) server_info = watch.get_server_info('server1.ad.com') self.assertEqual(server_info[servers.DC_KEY], 'dc2.ad.com')
def publish_tickets(self, realms, once=False): """Publish list of all tickets present on the locker.""" zkutils.ensure_exists(self.zkclient, z.TICKETS) watcher = dirwatch.DirWatcher(self.tkt_spool_dir) def _publish_ticket(tkt_file): """Publish ticket details.""" if tkt_file.startswith('.'): return if not any([tkt_file.endswith(realm) for realm in realms]): _LOGGER.info('Ignore tkt_file: %s', tkt_file) return try: tkt_details = subproc.check_output( ['klist', '-5', '-e', '-f', tkt_file]) tkt_node = z.path.tickets(os.path.basename(tkt_file), self.hostname) zkutils.put(self.zkclient, tkt_node, tkt_details, ephemeral=True) except subproc.CalledProcessError: _LOGGER.warning('Unable to get tickets details.') for tkt_file in glob.glob(os.path.join(self.tkt_spool_dir, '*')): _publish_ticket(tkt_file) self.prune_tickets() last_prune = time.time() if once: return watcher.on_created = _publish_ticket while True: if time.time() - last_prune > _STALE_TKTS_PRUNE_INTERVAL: self.prune_tickets() last_prune = time.time() if watcher.wait_for_events(timeout=_STALE_TKTS_PRUNE_INTERVAL): watcher.process_events(max_events=_DIRWATCH_EVENTS_COUNT)
def __init__(self, root, impl=None, watches=None): self.root = os.path.realpath(root) self.impl = impl or {} self.watches = watches or [] self.watcher = dirwatch.DirWatcher() self.watcher.on_created = self._on_created self.watcher.on_deleted = self._on_deleted self.watcher.on_modified = self._on_modified self.watch_dirs = set() for watch in self.watches: watch_dirs = self._get_watch_dirs(watch) self.watch_dirs.update(watch_dirs) for directory in self.watch_dirs: _LOGGER.info('Added permanent dir watcher: %s', directory) self.watcher.add_dir(directory) self.ws = make_handler(self) self.handlers = collections.defaultdict(list)
def hosts_aliases_cmd(aliases_dir, source, dest): """Manage /etc/hosts aliases.""" aliases = {} with source: original = source.read() def _on_created(path): """Callback invoked when new alias is created.""" if os.path.basename(path).startswith('^'): return _resolve(path, aliases) _generate(aliases, original, dest) def _on_deleted(path): """Callback invoked when alias is removed.""" _LOGGER.info('Alias removed: %s', path) alias = os.path.basename(path) if alias in aliases: del aliases[alias] _generate(aliases, original, dest) watcher = dirwatch.DirWatcher(aliases_dir) watcher.on_created = _on_created watcher.on_deleted = _on_deleted existing = glob.glob(os.path.join(aliases_dir, '*')) for path in existing: if os.path.basename(path).startswith('^'): os.unlink(path) continue _resolve(path, aliases) _generate(aliases, original, dest) while True: if watcher.wait_for_events(timeout=100): watcher.process_events(max_events=100)
def __init__(self, fs_root, partition, group_ou, group_pattern): self._config = GMSAConfig(group_ou, group_pattern) fs_root = os.path.realpath(fs_root) self._placement_path = os.path.join(fs_root, 'placement') self._dirwatcher = dirwatch.DirWatcher(self._placement_path) self._dirwatch_dispatcher = dirwatch.DirWatcherDispatcher( self._dirwatcher) self._dirwatch_dispatcher.register(self._placement_path, { dirwatch.DirWatcherEvent.CREATED: self._on_created_server, dirwatch.DirWatcherEvent.DELETED: self._on_deleted_server, }) self._dirwatch_dispatcher.register(self._placement_path + '/*', { dirwatch.DirWatcherEvent.CREATED: self._on_created_placement, dirwatch.DirWatcherEvent.DELETED: self._on_deleted_placement, }) self._servers_watch = servers.ServersWatch(self._dirwatch_dispatcher, fs_root, partition, self._add_server, self._remove_server) self._proids = {} self._servers = set() self._synced = False
def run(self): """Run the monitor. Start the event loop and continue until a service fails and the configure down action considers it fatal. """ self._dirwatcher = dirwatch.DirWatcher() self._dirwatcher.on_deleted = self._on_deleted self._dirwatcher.on_created = self._on_created service_dirs = self._services[:] for scan_dir in self._scan_dirs: # If we have a svscan directory to watch add it. self._dirwatcher.add_dir(scan_dir) service_dirs += [ os.path.join(scan_dir, dentry) for dentry in os.listdir(scan_dir) if dentry[0] != '.' ] for service_dir in service_dirs: self._add_service(service_dir) running = True while running: while not self._down_reasons: if self._dirwatcher.wait_for_events(): self._dirwatcher.process_events() # Process all the down reasons through the down_action callback. for down_reason in self._down_reasons: if not self._down_action.execute(down_reason): # If one of the down_action stops the monitor, break early. running = False break else: # Clear the down reasons now that we have processed them all. self._down_reasons.clear()
def run(self): """Setup directories' watches and start the re-scan ticker. """ # Setup the watchdog watchdog_lease = self.tm_env.watchdogs.create( name='svc-{svc_name}'.format(svc_name=_SERVICE_NAME), timeout='{hb:d}s'.format(hb=_WATCHDOG_HEARTBEAT_SEC), content='Service {svc_name!r} failed'.format( svc_name=_SERVICE_NAME), ) # Wait on svscan starting up first to avoid race conditions with # refreshing it later. while True: try: self._refresh_supervisor() _LOGGER.info('svscan is running.') break except subproc.CalledProcessError: _LOGGER.info('Waiting on svscan running.') time.sleep(0.2) watcher = dirwatch.DirWatcher(self.tm_env.cleanup_dir) watcher.on_created = self._add_cleanup_app watcher.on_deleted = self._remove_cleanup_app self._sync() loop_timeout = _WATCHDOG_HEARTBEAT_SEC // 2 while True: if watcher.wait_for_events(timeout=loop_timeout): watcher.process_events(max_events=_MAX_REQUEST_PER_CYCLE) # Heartbeat watchdog_lease.heartbeat() _LOGGER.info('Cleanup service shutdown.') watchdog_lease.remove()
def run(self): """Load and publish initial state.""" watch_dir = self.endpoints_dir _LOGGER.info('Starting endpoint publisher: %s', watch_dir) watcher = dirwatch.DirWatcher(watch_dir) watcher.on_created = self._on_created watcher.on_deleted = self._on_deleted for fname in os.listdir(watch_dir): self._on_created(fname) self._publish() self.up_to_date = True while True: if watcher.wait_for_events(timeout=1): watcher.process_events(max_events=self._MAX_REQUEST_PER_CYCLE) if not self.up_to_date: self._publish() self.up_to_date = True
def get_dir_watch(self): """Construct a watcher for the cleanup directory.""" watch = dirwatch.DirWatcher(self.paths.cleanup_dir) watch.on_created = self._on_created return watch
def get_dir_watch(self): """Construct a watcher for the manifest directory.""" watch = dirwatch.DirWatcher(self.paths.manifest_dir) watch.on_created = self._on_created return watch
def run(self, watchdogs_dir, *impl_args, **impl_kwargs): """Run the service.""" # Load the implementation if self._service_class is None: self._service_class = self._load_impl() impl = self._service_class(*impl_args, **impl_kwargs) # Setup the watchdog watchdogs = watchdog.Watchdog(os.path.realpath(watchdogs_dir)) watchdog_lease = watchdogs.create( name='svc-{svc_name}'.format(svc_name=self.name), timeout='{hb:d}s'.format(hb=impl.WATCHDOG_HEARTBEAT_SEC), content='Service %r failed' % self.name ) # Create the status socket ss = self._create_status_socket() # Run initialization impl.initialize(self._dir) watcher = dirwatch.DirWatcher(self._rsrc_dir) # Call all the callbacks with the implementation instance watcher.on_created = functools.partial(self._on_created, impl) watcher.on_deleted = functools.partial(self._on_deleted, impl) # NOTE: A modified request is treated as a brand new request watcher.on_modified = functools.partial(self._on_created, impl) self._io_eventfd = eventfd.eventfd(0, eventfd.EFD_CLOEXEC) # Before starting, check the request directory svcs = self._check_requests() # and "fake" a created event on all the existing requests for existing_svcs in svcs: self._on_created(impl, existing_svcs) # Before starting, make sure backend state and service state are # synchronized. impl.synchronize() # Report service status status_info = {} status_info.update(impl.report_status()) # Setup the poll object loop_poll = select.poll() loop_callbacks = {} base_event_handlers = [ ( self._io_eventfd, select.POLLIN, functools.partial( self._handle_queued_io_events, watcher=watcher, impl=impl, ) ), ( watcher.inotify, select.POLLIN, functools.partial( self._handle_io_events, watcher=watcher, impl=impl, ) ), ( ss, select.POLLIN, functools.partial( self._publish_status, status_socket=ss, status_info=status_info, ) ), ] # Initial collection of implementation' event handlers impl_event_handlers = impl.event_handlers() self._update_poll_registration( loop_poll, loop_callbacks, base_event_handlers + impl_event_handlers, ) loop_timeout = impl.WATCHDOG_HEARTBEAT_SEC // 2 while not self._is_dead: # Check for events updated = self._run_events( loop_poll, loop_timeout, loop_callbacks, ) if updated: # Report service status status_info.clear() status_info.update(impl.report_status()) # Update poll registration if needed impl_event_handlers = impl.event_handlers() self._update_poll_registration( loop_poll, loop_callbacks, base_event_handlers + impl_event_handlers, ) # Clean up stale requests self._check_requests() # Heartbeat watchdog_lease.heartbeat() _LOGGER.info('Shuting down %r service', self.name) # Remove the service heartbeat watchdog_lease.remove()
def top(approot): """Start cleanup process.""" tm_env = appenv.AppEnvironment(root=approot) # Setup the watchdog watchdog_lease = tm_env.watchdogs.create( name='svc-{svc_name}'.format(svc_name=_SERVICE_NAME), timeout='{hb:d}s'.format(hb=_WATCHDOG_HEARTBEAT_SEC), content='Service {svc_name!r} failed'.format( svc_name=_SERVICE_NAME), ) def _on_created(path): """Callback invoked with new cleanup file appears.""" fullpath = os.path.join(tm_env.cleanup_dir, path) with lc.LogContext(_LOGGER, os.path.basename(path), lc.ContainerAdapter) as log: if not os.path.islink(fullpath): log.logger.info('Ignore - not a link: %s', fullpath) return container_dir = os.readlink(fullpath) log.logger.info('Cleanup: %s => %s', path, container_dir) if os.path.exists(container_dir): try: log.logger.info( 'invoking treadmill.TREADMILL_BIN script: %r', treadmill.TREADMILL_BIN) subprocess.check_call([ treadmill.TREADMILL_BIN, 'sproc', 'finish', container_dir ]) except subprocess.CalledProcessError: log.logger.exception('Fatal error running %r.', treadmill.TREADMILL_BIN) raise else: log.logger.info('Container dir does not exist: %r', container_dir) os.unlink(fullpath) watcher = dirwatch.DirWatcher(tm_env.cleanup_dir) watcher.on_created = _on_created # Before starting, capture all already pending cleanups leftover = glob.glob(os.path.join(tm_env.cleanup_dir, '*')) # and "fake" a created event on all of them for pending_cleanup in leftover: _on_created(pending_cleanup) loop_timeout = _WATCHDOG_HEARTBEAT_SEC / 2 while True: if watcher.wait_for_events(timeout=loop_timeout): watcher.process_events(max_events=_MAX_REQUEST_PER_CYCLE) # Heartbeat watchdog_lease.heartbeat() _LOGGER.info('Cleanup service shutdown.') watchdog_lease.remove()
def _watcher(root_dir, rules_dir, containers_dir, watchdogs_dir): """Treadmill Firewall rule watcher. """ rules_dir = os.path.join(root_dir, rules_dir) containers_dir = os.path.join(root_dir, containers_dir) watchdogs_dir = os.path.join(root_dir, watchdogs_dir) # Setup the watchdog watchdogs = watchdog.Watchdog(watchdogs_dir) wd = watchdogs.create( 'svc-{svc_name}'.format(svc_name='firewall_watcher'), '{hb:d}s'.format(hb=_FW_WATCHER_HEARTBEAT * 2), 'Service firewall watcher failed' ) rulemgr = rulefile.RuleMgr(rules_dir, containers_dir) passthrough = {} def on_created(path): """Invoked when a network rule is created.""" rule_file = os.path.basename(path) _LOGGER.info('adding %r', rule_file) # The rule is the filename chain_rule = rulemgr.get_rule(rule_file) if chain_rule is not None: chain, rule = chain_rule iptables.add_rule(rule, chain=chain) if isinstance(rule, fw.PassThroughRule): passthrough[rule.src_ip] = ( passthrough.setdefault(rule.src_ip, 0) + 1 ) _LOGGER.info('Adding passthrough %r', rule.src_ip) iptables.add_ip_set(iptables.SET_PASSTHROUGHS, rule.src_ip) iptables.flush_pt_conntrack_table(rule.src_ip) else: _LOGGER.warning('Ignoring unparseable rule %r', rule_file) def on_deleted(path): """Invoked when a network rule is deleted.""" # Edge case, if the directory where the rules are kept gets removed, # abort if path == rulemgr.path: _LOGGER.critical('Network rules directory was removed: %r', path) utils.sys_exit(1) # The rule is the filename rule_file = os.path.basename(path) _LOGGER.info('Removing %r', rule_file) chain_rule = rulemgr.get_rule(rule_file) if chain_rule is not None: chain, rule = chain_rule iptables.delete_rule(rule, chain=chain) if isinstance(rule, fw.PassThroughRule): if passthrough[rule.src_ip] == 1: # Remove the IPs from the passthrough set passthrough.pop(rule.src_ip) _LOGGER.info('Removing passthrough %r', rule.src_ip) iptables.rm_ip_set(iptables.SET_PASSTHROUGHS, rule.src_ip) iptables.flush_pt_conntrack_table(rule.src_ip) else: passthrough[rule.src_ip] -= 1 else: _LOGGER.warning('Ignoring unparseable file %r', rule_file) _LOGGER.info('Monitoring fw rules changes in %r', rulemgr.path) watch = dirwatch.DirWatcher(rulemgr.path) watch.on_created = on_created watch.on_deleted = on_deleted # Minimal initialization of the all chains and sets _init_rules() # now that we are watching, prime the rules current_rules = rulemgr.get_rules() # Bulk apply rules _configure_rules(current_rules) for _chain, rule in current_rules: if isinstance(rule, fw.PassThroughRule): passthrough[rule.src_ip] = ( passthrough.setdefault(rule.src_ip, 0) + 1 ) # Add the IPs to the passthrough set _LOGGER.info('Adding passthrough %r', rule.src_ip) iptables.add_ip_set(iptables.SET_PASSTHROUGHS, rule.src_ip) _LOGGER.info('Current rules: %r', current_rules) while True: if watch.wait_for_events(timeout=_FW_WATCHER_HEARTBEAT): # Process no more than 5 events between heartbeats watch.process_events(max_events=5) rulemgr.garbage_collect() wd.heartbeat() _LOGGER.info('service shutdown.') wd.remove()
def reforward(tkt_spool_dir, tkt_final_dir, appname, endpoint, realms): """Renew tickets in the locker.""" endpoint_ref = {} endpoint_path = z.path.endpoint(appname, 'tcp', endpoint) _LOGGER.info('Starting ticket renew process, ticket endpoint: %s', endpoint_path) zkclient = context.GLOBAL.zk.conn @zkclient.DataWatch(endpoint_path) @utils.exit_on_unhandled def _tickets_endpoint_watch(data, _stat, event): """Watch to endpoint changes.""" if data is None and event is None: # The node is not there yet, wait. _LOGGER.info('Ticket endpoint missing: %s', endpoint_path) endpoint_ref.clear() elif event is not None and event.type == 'DELETED': _LOGGER.info('Ticket endpoint node deleted.') endpoint_ref.clear() else: _LOGGER.info('Ticket endpoint initialized: %s', data) endpoint_ref['endpoint'] = data return True def _on_created(path): """Callback invoked with new ticket appears.""" ticket_file = os.path.basename(path) if ticket_file.startswith('.'): return # TODO: this is for hotfix only. Ticker receiver creates temp # ticket file with random suffix, not with . prefix. # # As result, thie core is invoked for tmp files, and not # only it generates too much traffic for ticket receiver, # but it also generates errors, because by the time we # forward cache is gone. # # Proper soltution for ticket receiver to create temp files # starting with dot. valid_realm = False for realm in realms: if ticket_file.endswith(realm): valid_realm = True break if not valid_realm: return _LOGGER.info('Got new ticket: %s', ticket_file) if endpoint_ref: # invoke tkt-send with KRB5CCNAME pointing to the new ticket # file. _reforward_ticket(path, tkt_final_dir, endpoint_ref['endpoint']) else: _LOGGER.warning('No ticket endpoint found.') watcher = dirwatch.DirWatcher(tkt_spool_dir) watcher.on_created = _on_created # Make sure to forward all tickets on startup tickets_in_tmp_spool = set([ os.path.basename(path) for path in glob.glob(os.path.join(tkt_spool_dir, '*')) ]) tickets_in_dst_spool = set([ os.path.basename(path) for path in glob.glob(os.path.join(tkt_final_dir, '*')) ]) for common in tickets_in_tmp_spool & tickets_in_dst_spool: dst_path = os.path.join(tkt_final_dir, common) tmp_path = os.path.join(tkt_spool_dir, common) try: dst_ctime = os.stat(dst_path).st_ctime tmp_ctime = os.stat(tmp_path).st_ctime if tmp_ctime > dst_ctime: _LOGGER.info('Ticket in spool out of date: %s', common) _on_created(tmp_path) else: _LOGGER.info('Ticket: %s is up to date.', common) except OSError: _on_created(tmp_path) for missing in tickets_in_tmp_spool - tickets_in_dst_spool: _LOGGER.info('Forwarding missing ticket: %s', missing) _on_created(os.path.join(tkt_spool_dir, missing)) _LOGGER.info('Watching for events.') while True: if watcher.wait_for_events(timeout=60): watcher.process_events(max_events=10)