class FsRadar: def __init__(self, dir_filter, observer): self.inotify = INotify() self.watch_flags = flags.CREATE | flags.DELETE | flags.MODIFY | flags.DELETE_SELF self.watch_flags = masks.ALL_EVENTS self.watch_flags = \ flags.CREATE | \ flags.DELETE | \ flags.DELETE_SELF | \ flags.CLOSE_WRITE | \ flags.MOVE_SELF | \ flags.MOVED_FROM | \ flags.MOVED_TO | \ flags.EXCL_UNLINK self.wds = {} self.dir_filter = dir_filter self.observer = observer def add_watch(self, path): if not ((self.watch_flags & flags.ONLYDIR) and not os.path.isdir(path)): wd = self.inotify.add_watch(path, self.watch_flags) self.wds[wd] = path logger.debug('Watch %s', important(path)) def rm_watch(self, wd): logger.debug('Stop Watching %s', important(self.wds[wd])) self.inotify.rm_watch(wd) self.wds.pop(wd) def __enter__(self): return self def __exit__(self, exc_type, exc_value, traceback): self.close() def close(self): logger.debug('Close inotify descriptor') return self.inotify.close() def on_watch_event(self, event): MASK_NEW_DIR = flags.CREATE | flags.ISDIR if logging.getLogger().isEnabledFor(logging.DEBUG): logger.debug('New event: %r', event) for flag in flags.from_mask(event.mask): logger.debug('-> flag: %s', flag) if MASK_NEW_DIR == MASK_NEW_DIR & event.mask: new_dir_path = join(self.wds[event.wd], event.name) self.on_new_dir(new_dir_path) elif flags.CLOSE_WRITE & event.mask and event.name: # we are watching a directory and a file inside of it has been touched logger.debug('Watching dir, file touched') self.on_file_write(join(self.wds[event.wd], event.name)) elif flags.CLOSE_WRITE & event.mask and not event.name: # we are watching a file logger.debug('Watching file, file touched') self.on_file_write(self.wds[event.wd]) elif flags.IGNORED & event.mask: # inotify_rm_watch was called automatically # (file/directory removed/unmounted) path = self.wds[event.wd] self.wds.pop(event.wd) self.on_file_gone(path) def on_new_dir(self, path): if self.dir_filter(path): self.add_watch(path) # If files have been added immediately to the directory we # missed the events, so we emit them artificially (with # the risk of having some repeated events) for fName in os.listdir(path): self.on_file_write(join(new_dir_path, fName)) def on_file_write(self, path): '''A write /directory at `path` was either unlinked, moved or unmounted''' self.observer.notify(FsRadarEvent.FILE_MATCH, path) def on_file_gone(self, path): '''The file/directory at `path` was either unlinked, moved or unmounted''' self.observer.notify(FsRadarEvent.FILE_GONE, path) def run_forever(self): while True: for event in self.inotify.read(read_delay=30, timeout=2000): self.on_watch_event(event)
class FileChangeWatcher(object): """Monitors for changes in a given file.""" def __init__(self, filepath): self.__path, self.__name = filepath.rsplit("/", 1) self.__inotify = INotify() watch_flags = \ Flags.MODIFY \ | Flags.CLOSE_WRITE \ | Flags.MOVED_TO \ | Flags.EXCL_UNLINK self.__wd = self.__inotify.add_watch(self.__path, watch_flags) def close(self): self.__inotify.rm_watch(self.__wd) @property def changed(self): modified = saved = False events = self.__inotify.read() events = (e for e in events if e.name == self.__name) flags = (f for e in events for f in Flags.from_mask(e.mask)) for flag in flags: if flag == Flags.MODIFY: modified = True elif flag == Flags.CLOSE_WRITE: saved = True elif flag == Flags.MOVED_TO: modified = saved = True return modified and saved
def watch_directories_inotify(watched_dirs, shutdown_event, callback, interval=settings.WATCH_DIRECTORY_INTERVAL): """ Watch the directories given via inotify. This is a very efficient way to handle watches, however it requires linux, and may not work with NFS mounts. Accepts an iterable of workflow WatchedDir objects, a shutdown event, and a callback to be called when content appears in the watched dir. """ if not IS_LINUX: warnings.warn( "inotify may not work as a watched directory method on non-linux systems.", RuntimeWarning, ) inotify = INotify() watch_flags = flags.CREATE | flags.MOVED_TO watches = {} # descriptor: (path, WatchedDir) for watched_dir in watched_dirs: path = os.path.join(WATCHED_BASE_DIR, watched_dir.path.lstrip("/")) if not os.path.isdir(path): raise OSError('The path "{}" is not a directory.'.format(path)) descriptor = inotify.add_watch(path, watch_flags) watches[descriptor] = (path, watched_dir) # If the directory already has something in it, trigger callbacks for item in scandir.scandir(path): if watched_dir.only_dirs and not item.is_dir(): continue logger.debug("Found existing data in watched dir %s: %s", watched_dir.path, item.name) callback(item.path, watched_dir) while not shutdown_event.is_set(): # timeout is in milliseconds events = inotify.read(timeout=interval * 1000) for event in events: path, watched_dir = watches[event.wd] logger.debug("Watched dir %s detected activity: %s", watched_dir.path, event.name) # bitwise check the mask for dirs, if dirs_only is set if watched_dir.only_dirs and (flags.ISDIR & event.mask == 0): continue callback(os.path.join(path, event.name), watched_dir) for watch_descriptor in watches.keys(): inotify.rm_watch(watch_descriptor) inotify.close()
class Monitor(threading.Thread): def __init__(self, path_to_runtime_lib, register): threading.Thread.__init__(self) self.path = path_to_runtime_lib self.lastrun = 0 # get rid of this ugly hack self.register = register self.known_modules = set() def start_inotify(self): self.inotify = INotify() watch_flags = flags.CLOSE_WRITE | flags.DELETE self.wd = self.inotify.add_watch(self.path + '/', watch_flags) def run(self): self.alive = True try: while self.alive == True: for event in self.inotify.read(): if self.alive == False: break if event.name.endswith('.py'): print( 'Cheguei em event.name. Name: {}, knownmodules:{}'. format(event.name, self.known_modules)) module_name = self.path + '.' + os.path.splitext( event.name)[0] for flag in flags.from_mask(event.mask): if flag.name == 'CLOSE_WRITE': if event.name not in self.known_modules: print('Primeira vez em known modules ({})'. format(self.known_modules)) self.known_modules.add((event.name)) self.register.add_module(module_name) if flag.name == 'DELETE': # TODO: handle unload from roles / modules pass finally: self.inotify.rm_watch(self.wd) def stop(self): self.alive = False self.add_mock_file() self.remove_mock_file() # send a mock event to inotify in order to avoid the need to kill the thread self.join() def add_mock_file(self): with open('{}/___mocking_file.py'.format(self.path), 'w') as f: f.write("Mock") def remove_mock_file(self): os.remove('{}/___mocking_file.py'.format(self.path))
def __watcher(self): """Watch for file changes""" inotify = INotify() wd = inotify.add_watch(self.work_dir, flags.MODIFY) while not self.watch_event.is_set(): for event in inotify.read(timeout=100, read_delay=100): for filename in [self.CONFIG_FILE, self.SECRETS_FILE]: if event.name == filename: log.info("File change detected: %s", filename) self.load(self.work_dir) break # stop watching inotify.rm_watch(wd)
def handle_inotify(self, directory): logging.getLogger(__name__).info( f"Using inotify to watch directory for changes: {directory}") inotify = INotify() descriptor = inotify.add_watch(directory, flags.CLOSE_WRITE | flags.MOVED_TO) try: while not self.stop_flag: for event in inotify.read(timeout=1000, read_delay=1000): file = os.path.join(directory, event.name) _consume(file) except KeyboardInterrupt: pass inotify.rm_watch(descriptor) inotify.close()
class WatchManager(object): def __init__(self): self.inotify = INotify() self.watches = {} def register(self, obj): wd = self.inotify.add_watch(obj.path(), IN_MASK) obj.wd = wd self.watches[wd] = obj def unregister(self, obj): logger.debug('Unwatching %s.' % (obj.path())) try: del self.watches[obj.wd] self.inotify.rm_watch(obj.wd) except AttributeError: logger.error('Object %s has no watch descriptor.' % (obj.path())) except KeyError: logger.error( 'Object %s has a watch descriptor but is not known to watch manager.' % (obj.path())) def fileno(self): return self.inotify.fd def dispatch(self): for event in self.inotify.read(read_delay=1000): if event.mask & flags.IGNORED: if event.wd in self.watches: del self.watches[event.wd].wd del self.watches[event.wd] else: logger.warning( 'Received ignore event for object we were not watching: %s %s.' % (event.path, event.name)) else: if event.wd in self.watches: self.watches[event.wd].inotify(event) else: logger.warning( 'Received event for object we were not watching: %s %s.' % (event.path, event.name)) def verify(self): for obj in self.watches.values(): assert (obj.path().exists())
class InotifyWatch(Thread): """ Watches for inotify notifications when the databsae file changes. This sends notification to any observers added to this monitor. """ def __init__(self, dbfile): super().__init__() self.inotify = INotify() self.watch = self.inotify.add_watch(dbfile, flags.MODIFY) self.running = True self.observers = [] def stop(self): self.inotify.rm_watch(self.watch) self.running = False def run(self): while self.running: for event in self.inotify.read(): for observer in self.observers: observer.notify() def add_observer(self, observer): self.observers.append(observer)
class FileWatcher: """Summary Attributes: directories (dict): Description inotify (INotify): Description watch_descriptors (dict): Description watched_flags (int): Description """ def __init__(self): """Summary """ self.inotify = INotify() self.watched_flags = flags.CREATE | flags.MOVED_TO | flags.ISDIR self.directories = {} self.watch_descriptors = {} watch_descriptor = self.inotify.add_watch( config.APP_LANDING_INGEST_DIR, self.watched_flags) self.directories[watch_descriptor] = config.APP_LANDING_INGEST_DIR self.watch_descriptors[ self.directories[watch_descriptor]] = watch_descriptor logger.info("Watching ingestion folder") while True: events = self.inotify.read(read_delay=1000) all_events = self.get_all_events(events) for event in all_events: path = os.path.join(self.directories[event.wd], event.name) upload_file(path) os.remove(path) self.delete_folders_if_empty(config.APP_LANDING_INGEST_DIR) def delete_folders_if_empty(self, dirname: str): """Summary Args: dirname (str): Description """ for root, dirs, _ in os.walk(dirname, topdown=False): for name in dirs: dir_path = os.path.join(root, name) if not os.listdir(dir_path): # An empty list is False self.inotify.rm_watch(self.watch_descriptors[os.path.join( root, name)]) del self.directories[self.watch_descriptors[os.path.join( root, name)]] del self.watch_descriptors[os.path.join(root, name)] os.rmdir(os.path.join(root, name)) def get_all_events(self, events: List[Event]): """Summary Args: events (List[Event]): Description Returns: List[Event]: Description """ logger.info(f"Initial events: {events}") all_events = [] for event in events: is_dir = False deleted = False for flag in flags.from_mask(event.mask): if flag == flag.ISDIR: is_dir = True if flag in (flag.DELETE, flag.IGNORED): deleted = True if is_dir and not deleted: watch_descriptor = self.inotify.add_watch( os.path.join(config.APP_LANDING_INGEST_DIR, event.name), self.watched_flags) self.directories[watch_descriptor] = os.path.join( config.APP_LANDING_INGEST_DIR, event.name) self.watch_descriptors[ self.directories[watch_descriptor]] = watch_descriptor all_events = self.check_subfolders(watch_descriptor, all_events) elif not deleted: all_events += [event] logger.info(f"Adding event for file : " f"{self.directories[event.wd]}/{event.name}") else: continue logger.info(f"All events : {all_events}") return all_events def check_subfolders(self, watch_descriptor: int, all_events: List[Event]): """Summary Args: watch_descriptor (int): Description all_events (List[Event]): Description Returns: List[Event]: Description """ for root, folders, files in os.walk( self.directories[watch_descriptor]): for folder in folders: watch_descriptor = self.inotify.add_watch( os.path.join(root, folder), self.watched_flags) self.directories[watch_descriptor] = os.path.join(root, folder) self.watch_descriptors[ self.directories[watch_descriptor]] = watch_descriptor for file in files: all_events += [ Event(wd=self.watch_descriptors[root], mask=256, cookie=0, name=file) ] logger.info(f"Adding untracked event for file : {root}/{file}") return all_events
class FStatCache(object): """ Simple caching mechanism using inotify functionality from linux to avoid expensive repeated os.stat calls to the latest details of a file. When a file is modified we listen for the event and update our cache. """ self_stats_file = "/tmp/fstat-cache-stats" def __init__(self, timeout=None): # this could potentially be an LRU Cache self._store = OrderedDict() self._inotify = INotify() self._monitor = MonitorThread() self._monitor_thread = Thread(target=self._monitor.run, args=( self._inotify, self._store, timeout, )) def get_file_stats(self, file_path: str): """ takes absolute path to a file and returns the last modification time and size in bytes. :param file_path: absolute path to a file :return: { timestamp, size} """ try: return self._get_item(file_path) except KeyError: logger.info("file %s is not present in the cache adding it " "now and fetching the details using os.stat" % file_path) return self._add_file_to_watch(file_path) @staticmethod def get_file_stats_using_stat(file_path: str): """ takes absolute path to a file and returns the last modification time and size in bytes by using os.stat function. This is available here only to get some benchmarks to compare with this cache implementation. :param file_path: :return: { timestamp, size} """ if os.path.isfile(file_path): file_info = os.stat(file_path) return {"ts": file_info.st_mtime, "size": file_info.st_size} def _get_item(self, file_path: str): return self._store[file_path] def _set_item(self, file_path: str, stats: dict): self._store[file_path] = stats def build(self, file_paths: list): """ takes list of files and starts watching for changes using inotify from Linux. :param file_paths: list of files to watch for changes, only absolute paths. """ for file_path in file_paths: # only if the file exists if os.path.isfile(file_path): self._store[file_path] = self.get_file_stats(file_path) self._monitor_thread.start() def invalidate(self): """ will invalidate the current cache and remove all the files from the watcher. """ self._unwatch_all_files() self._monitor.terminate() def _add_file_to_watch(self, file_path: str): if os.path.isfile(file_path): wd = self._inotify.add_watch(file_path, flags.MODIFY) watches[wd] = file_path stats = self.get_file_stats_using_stat(file_path) self._set_item(file_path, stats) return stats def _remove_from_watch(self, file_path: str): wd = FStatCache._get_key(watches, file_path) self._inotify.rm_watch(wd) del watches[wd] def _unwatch_all_files(self): for wd in list(watches): logger.debug("removing %s from watch list" % watches[wd]) self._inotify.rm_watch(wd) del watches[wd] @staticmethod def list_files_in_cache(): return list(watches.values()) def add_file_to_watch(self, file_path: str): """ will add the given file to watch list, stats in cache will be updated when a file modification event is received :param file_path: absolute path to the file to monitor """ if not os.path.isfile(file_path): raise FileNotFoundError("given file doesn't exist") wd = self._inotify.add_watch(file_path, flags.MODIFY) watches[wd] = file_path def remove_from_watch(self, file_path: str): """ will remove the file from watcher, will raise an error if the file is not being watched :param file_path: absolute path to the file to remove from watcher """ if file_path not in list(watches.values()): # define a custom exception class here raise KeyError("file is not being watched") self._remove_from_watch(file_path) @staticmethod def _get_key(watches: dict, value: str): for item in watches: if watches[item] == value: return item
class Tree(LoggingMixIn, Operations): def __init__(self, config): self.config = config self.repo_dir = Path(config['repo']).expanduser().as_posix() self.repo = pygit2.Repository(self.repo_dir) self.now = time() self.uid = os.geteuid() self.gid = os.getegid() self.files = {} self.watch_cfg = {} self.watchers = {} for checkout in config['checkouts']: branch = str(checkout['branch']) directory = str(checkout['dir']) ref = self.repo.lookup_branch(branch, pygit2.GIT_BRANCH_ALL).name self.watch_cfg[f'{self.repo_dir}/.git/{ref}'] = checkout tree = self.repo.revparse_single(branch).tree logging.info(f'Populating directory {directory} with {branch}') self.files[directory] = self.build_tree(tree) def change_watcher(self): # Set up listener for changes self.inotify = INotify() watch_flags = flags.CREATE | flags.MOVED_TO for watch in self.watch_cfg: try: iwatch = self.inotify.add_watch(watch, watch_flags) self.watchers[iwatch] = self.watch_cfg[watch] except FileNotFoundError: logging.warn(f"Could not set up watcher for {watch}") while True: for event in self.inotify.read(): branch = self.watchers[event.wd]['branch'] directory = str(self.watchers[event.wd]['dir']) logging.info(f'Re-populating directory {directory} with {branch}') # noqa tree = self.repo.revparse_single(branch).tree self.files[directory] = self.build_tree(tree) def rm_watchers(self): for wd in self.watchers.keys(): self.inotify.rm_watch(wd) def build_tree(self, obj): if obj.type_str == 'blob': return self.repo[obj.id].read_raw() return {o.name: self.build_tree(o) for o in obj} def build_path(self, path): for entry in path: if type(path[entry]) == bytes: return entry return self.build_path(path[entry]) def readdir(self, path, fh): return ['.', '..'] + [inode for inode in self.lookup(path)] def getattr(self, path, fh=None): attrs = { 'st_uid': self.uid, 'st_gid': self.gid, 'st_ctime': self.now, 'st_atime': self.now, 'st_mtime': self.now, } inode = self.lookup(path) if isinstance(inode, bytes): attrs['st_mode'] = stat.S_IFREG | 0o0444 attrs['st_nlink'] = 1 attrs['st_size'] = len(inode) else: attrs['st_mode'] = stat.S_IFDIR | 0o0555 attrs['st_nlink'] = len(inode)+2 attrs['st_size'] = 4096 return attrs def read(self, path, size, offset, fh): data = self.lookup(path) return data[offset:offset + size] def lookup(self, path): '''Lookup path as string Returns inode from self.files: Either file contents or directory entry. ''' files = self.files inodes = [i for i in path.strip('/').split('/') if i] for inode in inodes: try: files = files[inode] except KeyError: raise FuseOSError(ENOENT) if isinstance(files, str): ret = inode else: ret = files return ret
class InotifyFilesWatcher(BaseFilesWatcher): def __init__(self): super().__init__() self.inotify = INotify() self.mapping = {} # only used to display directories in debug mode flag_groups = { "self_delete": f.DELETE_SELF | f.MOVE_SELF | f.UNMOUNT, "all": f.CREATE | f.DELETE | f.MODIFY | f.MOVED_FROM | f.MOVED_TO | f.DELETE_SELF | f.MOVE_SELF | f.UNMOUNT, "added": f.CREATE | f.MOVED_TO, "removed": f.DELETE | f.MOVED_FROM, "changed": f.MODIFY, } def _set_watch(self, directory, watch_mode): watch_id = self.inotify.add_watch(directory, self.flag_groups[watch_mode]) self.mapping[watch_id] = directory return watch_id def _remove_watch(self, watch_id): try: self.inotify.rm_watch(watch_id) except OSError: # the watch is already removed from the kernel, maybe because the directory was deleted pass self.mapping.pop(watch_id, None) def stop(self): super().stop() if self.inotify: self.inotify.close() def stopped(self): return super().stopped() or self.inotify.closed def iter_events(self): try: for event in self.inotify.read(timeout=500): directory = self.mapping.get(event.wd) logger.debug( f'{event} ; {directory}/{event.name} ; FLAGS: {", ".join(str(flag) for flag in f.from_mask(event.mask))}' ) if event.mask & f.IGNORED: self.remove_watch(event.wd) continue yield event except ValueError: # happen if read while closed pass def get_event_watch_id(self, event): return event.wd def get_event_watch_name(self, event): return event.name def is_directory_event(self, event): return event.mask & f.ISDIR def is_event_self_removed(self, event): return event.mask & self.flag_groups["self_delete"] def is_event_directory_added(self, event): return self.is_directory_event(event) and (event.mask & self.flag_groups["added"]) def is_event_directory_removed(self, event): return self.is_directory_event(event) and ( event.mask & self.flag_groups["removed"]) def is_file_added(self, event): return not self.is_directory_event(event) and ( event.mask & self.flag_groups["added"]) def is_file_removed(self, event): return not self.is_directory_event(event) and ( event.mask & self.flag_groups["removed"]) def is_file_changed(self, event): return not self.is_directory_event(event) and ( event.mask & self.flag_groups["changed"])