def main(): hosts = None action = None path = None status = None dryrun = False store_filepath = "/etc/data_retention/dataretention_rules.sq3" try: (options, remainder) = getopt.gnu_getopt( sys.argv[1:], "H:a:p:s:r:dh", ["hosts=", "action=", "path=", "status=", "dryrun", "help"]) except getopt.GetoptError as err: usage("Unknown option specified: " + str(err)) for (opt, val) in options: if opt in ["-H", "--hosts"]: hosts = val elif opt in ["-a", "--action"]: action = val elif opt in ["-p", "--path"]: path = val elif opt in ["-s", "--status"]: status = val elif opt in ["-r", "--rulestore"]: store_filepath = val elif opt in ["-d", "--dryrun"]: dryrun = True elif opt in ["-h", "--help"]: usage() else: usage("Unknown option specified: %s" % opt) if len(remainder) > 0: usage("Unknown option specified: <%s>" % remainder[0]) check_args(hosts, action, status) if not os.path.exists(store_filepath): usage('no such rulestore at %s' % store_filepath) cdb = RuleStore(store_filepath) cdb.store_db_init(None) hosts, htype = clouseau.retention.utils.utils.get_hosts_expr_type(hosts) # if we are given one host, check that the host has a table or whine if htype == 'glob' and '*' not in hosts: if not clouseau.retention.utils.ruleutils.check_host_table_exists( cdb, hosts): usage('no such host in rule store, %s' % hosts) if htype == 'grain' or htype == 'glob': client = LocalClientPlus() hosts = client.cmd_expandminions(hosts, "test.ping", expr_form=htype) do_action(cdb, action, hosts, status, path, dryrun)
def main(): hosts = None action = None path = None status = None dryrun = False store_filepath = "/etc/data_retention/dataretention_rules.sq3" try: (options, remainder) = getopt.gnu_getopt( sys.argv[1:], "H:a:p:s:r:dh", ["hosts=", "action=", "path=", "status=", "dryrun", "help"]) except getopt.GetoptError as err: usage("Unknown option specified: " + str(err)) for (opt, val) in options: if opt in ["-H", "--hosts"]: hosts = val elif opt in ["-a", "--action"]: action = val elif opt in ["-p", "--path"]: path = val elif opt in ["-s", "--status"]: status = val elif opt in ["-r", "--rulestore"]: store_filepath = val elif opt in ["-d", "--dryrun"]: dryrun = True elif opt in ["-h", "--help"]: usage() else: usage("Unknown option specified: %s" % opt) if len(remainder) > 0: usage("Unknown option specified: <%s>" % remainder[0]) check_args(hosts, action, status) if not os.path.exists(store_filepath): usage('no such rulestore at %s' % store_filepath) cdb = RuleStore(store_filepath) cdb.store_db_init(None) hosts, htype = clouseau.retention.utils.utils.get_hosts_expr_type(hosts) # if we are given one host, check that the host has a table or whine if htype == 'glob' and '*' not in hosts: if not clouseau.retention.utils.ruleutils.check_host_table_exists(cdb, hosts): usage('no such host in rule store, %s' % hosts) if htype == 'grain' or htype == 'glob': client = LocalClientPlus() hosts = client.cmd_expandminions(hosts, "test.ping", expr_form=htype) do_action(cdb, action, hosts, status, path, dryrun)
class CommandLine(object): ''' prompt user at the command line for actions to take on a given directory or file, show results ''' # todo: down and up should check you really are (descending, # ascending path) def __init__(self, confdir, store_filepath, timeout, audit_type, ignore_also=None, hosts_expr=None): self.confdir = confdir self.cdb = RuleStore(store_filepath) self.cdb.store_db_init(None) self.timeout = timeout self.audit_type = audit_type self.locations = audit_type + "_locations" self.hosts_expr = hosts_expr self.basedir = None clouseau.retention.utils.cliutils.init_readline_hist() # this is arbitrary, can tweak it later # how many levels down we keep in our list of # top-level dirs from which the user can start # their interactive session self.max_depth_top_level = 3 self.filtertype = 'all' # fixme completely wrong self.batchno = 1 clouseau.retention.utils.config.set_up_conf(self.confdir) # duplicate all the ignores except for the uh # ones specific to a host. those will be done # at host choice time # this includes rules, we will do those at host choice time too # we want: global, perhost, ignore_also (if there were any) self.local_ignored = None self.ignores = Ignores(self.confdir) self.ignored_from_rulestore = {} self.ignored_also = clouseau.retention.utils.ignores.convert_ignore_also_to_ignores( ignore_also) self.dircontents = CurrentDirContents(self.timeout) self.cenv = CurrentEnv() self.cmpl = Completion(self.dircontents, self.cenv, self.max_depth_top_level) def do_one_host(self, host, report): self.set_host(host) results = clouseau.retention.utils.ignores.get_ignored_from_rulestore( self.cdb, [host]) if host in results: self.ignored_from_rulestore[host] = results[host] if host not in report: dirs_problem = None dirs_skipped = None else: dirs_problem, dirs_skipped = get_dirs_toexamine(report[host]) self.cenv.set_reported_dirs(dirs_problem, dirs_skipped) if self.cenv.problem_dirs is None and self.cenv.skipped_dirs is None: print "No report available from this host" elif len(self.cenv.problem_dirs) == 0 and len( self.cenv.skipped_dirs) == 0: print "No problem dirs and no skipped dirs on this host" else: dirs_problem_to_depth = [ clouseau.retention.utils.cliutils.get_path_prefix( d, self.max_depth_top_level) for d in dirs_problem ] dirs_skipped = [ s for s in dirs_skipped if s not in dirs_problem_to_depth ] relevant_dirs = (sorted(list(set(dirs_problem_to_depth))) + sorted(list(set(dirs_skipped)))) while True: dir_todo = self.cmpl.prompt_for_dir() if dir_todo is None: print "Done with this host" break elif dir_todo not in relevant_dirs: print "Please choose one of the following directories:" # fixme another arbitrary setting clouseau.retention.utils.cliutils.print_columns( relevant_dirs, 5) else: self.basedir = None self.cenv.cwdir = None self.do_one_directory(dir_todo) def run(self, report): ''' call with full report output (not summary) across hosts, this will permit the user to examine directories and files of specified hosts and add/update rules for those dirs and files ''' self.cenv.set_hosts(report.keys()) while True: host_todo = self.cmpl.prompt_for_host() if host_todo is None: print "exiting at user request" break else: usercfgrab = RemoteUserCfGrabber(host_todo, self.timeout, self.audit_type, self.confdir) to_convert = usercfgrab.run(True) self.local_ignored = clouseau.retention.utils.ignores.process_local_ignores( to_convert) results = clouseau.retention.utils.ignores.get_ignored_from_rulestore( self.cdb, [host_todo]) if host_todo in results: self.ignored_from_rulestore[host_todo] = results[host_todo] self.do_one_host(host_todo, report) def set_host(self, host): self.cenv.host = host def do_one_directory(self, path): ''' given a list which contains absolute paths for the subdirectories / files of a given directory, (we don't go more than one level down, it's likely to be too much), ask the user what status to give this directory, and show the user information for each contained dir/file if desired, as well as info about the directory ''' while True: todo = self.get_do_command(path) if todo is None: break def get_do_command(self, path): command = self.show_menu('top') return self.do_command(command, 'top', path) def get_menu_entry(self, choices, default, text): self.cmpl.set_choices_completion(choices, default) self.cenv.set_prompt() command = raw_input(self.cenv.prompt + ' ' + text + " [%s]: " % default) command = command.strip() if command == "": command = default return command def show_menu(self, level): if level == 'top': text = ("S(set status)/E(examine directory)/" "Filter directory listings/" "I(ignore)/R(manage rules)/Q(quit menu)") command = self.get_menu_entry(['S', 'E', 'I', 'F', 'R', 'Q'], 'S', text) elif level == 'status': text = Status.get_statuses_prompt(", ") + ", Q(quit status menu)" command = self.get_menu_entry(Status.STATUSES + ['Q'], text, Status.text_to_status('good')) if command == 'Q' or command == 'q': level = 'top' elif level == 'examine': text = ("D(down a level)/U(up a level)/E(show entries)/" "C(show contents of file)/R(show rules)/" "F(filter directory listings/" "M(mark file(s))/Q(quit examine menu)") command = self.get_menu_entry( ['D', 'U', 'E', 'F', 'C', 'R', 'M', 'Q'], 'E', text) if command == 'Q' or command == 'q': level = 'top' elif level == 'rule': text = ("S(show all rules of type)/D(show rules covering dir)/" "C(show rules covering dir contents)/" "A(add rule to rules store)/" "R(remove rule from rules store/" "E(export rules from store to file)/" "I(import rules from file to store)/Q(quit rule menu)") command = self.get_menu_entry(['S', 'C', 'A', 'R', 'E', 'I', 'Q'], 'D', text) if command == 'Q' or command == 'q': level = 'top' else: command = None return command def get_file_contents(self, path): # get 20 lines and hope that's enough for the user to evaluate # fixme the number of lines should be configurable fileexamin = RemoteFileExaminer(path, self.cenv.host, 20, self.timeout, quiet=True) contents = fileexamin.run() return contents def get_basedir_from_path(self, path): for location in clouseau.retention.utils.config.conf[self.locations]: if path == location or path.startswith(location + os.path.sep): return location # fixme is this really the right fallback? check it return '/' def entry_is_not_ignored(self, path, entrytype, do_check): ''' see if the given entry is in NOT in the ingored lists and return True if so, False otherwise we only do this check if the do_check argment is set to 'check'; otherwise we default to True ''' if do_check != 'check': return True basedir = self.get_basedir_from_path(path) if self.audit_type == 'logs' and entrytype == 'file': path = LocalLogsAuditor.normalize(path) if entrytype == 'file': checker = clouseau.retention.utils.ignores.file_is_ignored dirs = False else: checker = clouseau.retention.utils.ignores.dir_is_ignored dirs = True for ignored in [self.ignores.global_ignored, self.ignored_also]: if dirs: result = checker(path, ignored) else: result = checker(path, basedir, ignored) if result: return False for ignored in [ self.ignores.perhost_ignored, self.ignored_from_rulestore ]: if self.cenv.host in ignored: if dirs: result = checker(path, ignored[self.cenv.host]) else: result = checker(path, basedir, ignored[self.cenv.host]) if result: return False return True def get_entries_from_wildcard(self, file_expr): ''' get entries from cwdir that match the expression ''' # fixme that dang batchno, what a bad idea it was self.dircontents.get(self.cenv.host, self.cenv.cwdir, 1) # one wildcard only, them's the breaks if '*' in file_expr: start, end = file_expr.split('*', 1) return [ c for c in self.dircontents.entries_dict if (c.startswith(start) and c.endswith(end) and len(c) >= len(start) + len(end)) ] elif file_expr in self.dircontents.entries_dict: return [file_expr] else: return [] def do_mark(self): readline.set_completer(self.cmpl.dir_entries_completion) file_expr = raw_input("file or dirname expression (empty to quit): ") file_expr = file_expr.strip() if file_expr == '': return True if file_expr[-1] == os.path.sep: file_expr = file_expr[:-1] if '*' in file_expr: entries_todo = self.get_entries_from_wildcard(file_expr) else: entries_todo = [file_expr] self.dircontents.get(self.cenv.host, self.cenv.cwdir, self.batchno) if not self.dircontents.entries: print 'failed to get directory contents for', self.cenv.cwdir print 'marking dirs/files regardless' for entry in entries_todo: if entry not in self.dircontents.entries_dict: print 'skipping %s, not in current dir listing' % entry print self.dircontents.entries_dict continue filetype = clouseau.retention.utils.ruleutils.entrytype_to_text( self.dircontents.entries_dict[entry]['type']) if filetype == 'link': print 'No need to mark', file_expr, 'links are always skipped' continue elif filetype != 'dir' and filetype != 'file': print 'Not a dir or regular file, no need to mark, skipping' continue status = Status.text_to_status('good') clouseau.retention.utils.ruleutils.do_add_rule( self.cdb, file_expr, filetype, status, self.cenv.host) return True def do_add_rule(self): # fixme need different completer here I think, that # completes relative to self.cwdir readline.set_completer(None) path = raw_input("path or wildcard expr in rule (empty to quit): ") path = path.strip() if path == '': return True default = Status.text_to_status('good') self.cmpl.set_choices_completion(Status.STATUSES + ['Q'], default) while True: statuses_text = Status.get_statuses_prompt(", ") status = raw_input(statuses_text + " Q(quit)) [%s]: " % default) status = status.strip() if status == "": status = default if status[0].upper() in Status.STATUSES: status = status[0].upper() break elif status == 'q' or status == 'Q': return None else: print "Unknown status type" continue # fixme should check that any wildcard is only one and only # in the last component... someday if path[0] != os.path.sep: path = os.path.join(self.cenv.cwdir, path) if path[-1] == os.path.sep: path = path[:-1] filetype = clouseau.retention.utils.ruleutils.text_to_entrytype( 'dir') else: filetype = clouseau.retention.utils.ruleutils.text_to_entrytype( 'file') clouseau.retention.utils.ruleutils.do_add_rule(self.cdb, path, filetype, status, self.cenv.host) # update the ignores list since we have a new rule results = clouseau.retention.utils.ignores.get_ignored_from_rulestore( self.cdb, [self.cenv.host]) if self.cenv.host in results: self.ignored_from_rulestore[self.cenv.host] = results[ self.cenv.host] return True def do_show_rules_with_status(self): default = Status.text_to_status('problem') self.cmpl.set_choices_completion(['A'] + Status.STATUSES + ['Q'], default) while True: statuses_text = Status.get_statuses_prompt(", ") status = raw_input("status type A(all), " + statuses_text + ", Q(quit)) [%s]: " % default) status = status.strip() if status == "": status = default if status == 'q' or status == 'Q': return None elif status[0].upper() not in ['A'] + Status.STATUSES: print "Unknown status type" continue readline.set_completer(None) prefix = raw_input("starting with prefix? [/]: ") prefix = prefix.strip() if prefix == "": prefix = "/" if status == 'a' or status == 'A': clouseau.retention.utils.ruleutils.show_rules(self.cdb, self.cenv.host, prefix=prefix) return True elif status[0].upper() in Status.STATUSES: clouseau.retention.utils.ruleutils.show_rules( self.cdb, self.cenv.host, status[0].upper(), prefix=prefix) return True def do_remove_rule(self): # fixme need different completer here I think, that # completes relative to self.cwdir readline.set_completer(None) path = raw_input("path or wildcard expr in rule (empty to quit): ") path = path.strip() if path == '': return True elif path[0] != os.path.sep: path = os.path.join(self.cenv.cwdir, path) if path[-1] == os.path.sep: path = path[:-1] clouseau.retention.utils.ruleutils.do_remove_rule( self.cdb, path, self.cenv.host) # update the ignores list since we removed a rule results = clouseau.retention.utils.ignores.get_ignored_from_rulestore( self.cdb, [self.cenv.host]) if self.cenv.host in results: self.ignored_from_rulestore[self.cenv.host] = results[ self.cenv.host] return True def get_rules_path(self): readline.set_completer(None) rules_path = raw_input("full path to rules file (empty to quit): ") rules_path = rules_path.strip() if rules_path == '': return rules_path if not clouseau.retention.utils.cliutils.check_rules_path(rules_path): print "bad rules file path specified, aborting" return '' return rules_path def do_rule(self, command): if command == 'A' or command == 'a': result = self.do_add_rule() elif command == 'S' or command == 's': result = self.do_show_rules_with_status() elif command == 'D' or command == 'd': self.dircontents.get(self.cenv.host, self.cenv.cwdir, self.batchno) clouseau.retention.utils.ruleutils.get_rules_for_path( self.cdb, self.cenv.cwdir, self.cenv.host) result = True elif command == 'C' or command == 'c': self.dircontents.get(self.cenv.host, self.cenv.cwdir, self.batchno) clouseau.retention.utils.ruleutils.get_rules_for_entries( self.cdb, self.cenv.cwdir, self.dircontents.entries_dict, self.cenv.host) result = True elif command == 'R' or command == 'r': result = self.do_remove_rule() elif command == 'I' or command == 'i': rules_path = self.get_rules_path() if rules_path != '': clouseau.retention.utils.ruleutils.import_rules( self.cdb, rules_path, self.cenv.host) result = True elif command == 'E' or command == 'e': rules_path = self.get_rules_path() if rules_path != '': clouseau.retention.utils.ruleutils.export_rules( self.cdb, rules_path, self.cenv.host) result = True elif command == 'Q' or command == 'q': print "quitting this level" result = None else: clouseau.retention.utils.cliutils.show_help('rule') result = True return result def do_file_contents(self): # fixme need a different completer here... meh readline.set_completer(None) filename = raw_input("filename (empty to quit): ") filename = filename.strip() if filename == '': return if filename[0] != os.path.sep: filename = os.path.join(self.cenv.cwdir, filename) contents = self.get_file_contents(filename) if contents is not None: print contents else: print "failed to get contents of file" def do_filter(self): default = 'C' self.cmpl.set_choices_completion(['A', 'D', 'F', 'C', 'Q'], default) while True: filtertype = raw_input("filter A(all), D(directories only)," " F(files only)," " C(Entries checked (not ignored)," " Q(quit)) [?]: ") filtertype = filtertype.strip() if filtertype == "": filtertype = default if filtertype == 'a' or filtertype == 'A': self.filtertype = 'all' return True elif filtertype == 'D' or filtertype == 'd': self.filtertype = 'dir' return True elif filtertype == 'F' or filtertype == 'f': self.filtertype = 'file' return True elif filtertype == 'C' or filtertype == 'c': self.filtertype = 'check' return True elif filtertype == 'q' or filtertype == 'Q': return None else: print "Unknown filter type" continue def do_dir_descend(self, command): while True: # prompt user for dir to descend readline.set_completer(self.cmpl.dir_completion) self.cenv.set_prompt() directory = raw_input(self.cenv.prompt + ' ' + "directory name (empty to quit): ") directory = directory.strip() if directory == '': return command if directory[-1] == os.path.sep: directory = directory[:-1] if (directory[0] == '/' and not directory.startswith(self.cenv.cwdir + os.path.sep)): print 'New directory is not a subdirectory of', print self.cenv.cwdir, "skipping" else: self.cenv.cwdir = os.path.join(self.cenv.cwdir, directory) self.dircontents.clear() self.cenv.set_prompt() print 'Now at', self.cenv.cwdir return True def do_examine(self, command): if command == 'D' or command == 'd': return self.do_dir_descend(command) elif command == 'U' or command == 'u': if self.cenv.cwdir != self.basedir: self.cenv.cwdir = os.path.dirname(self.cenv.cwdir) self.dircontents.clear() self.cenv.set_prompt() print 'Now at', self.cenv.cwdir else: print 'Already at top', self.cenv.cwdir result = True elif command == 'E' or command == 'e': self.dircontents.show(self.cenv.host, self.cenv.cwdir, 1, self.filtertype, self.entry_is_not_ignored) result = True elif command == 'C' or command == 'c': self.do_file_contents() result = True elif command == 'F' or command == 'f': self.do_filter() result = True elif command == 'R' or command == 'r': continuing = True while continuing: command = self.show_menu('rule') continuing = self.do_command(command, 'rule', self.cenv.cwdir) result = True elif command == 'M' or command == 'm': result = self.do_mark() elif command == 'Q' or command == 'q' or command == '': print "quitting this level" result = None else: clouseau.retention.utils.cliutils.show_help('examine') result = True return result def do_top(self, command, dir_path): result = True if command == 'S' or command == 's': continuing = True while continuing: command = self.show_menu('status') continuing = self.do_command(command, 'status', dir_path) elif command == 'E' or command == 'e': self.dircontents.show(self.cenv.host, self.cenv.cwdir, 1, self.filtertype, self.entry_is_not_ignored) continuing = True while continuing: # fixme this should let the user page through batches, # not use '1' every time command = self.show_menu('examine') continuing = self.do_command(command, 'examine', self.cenv.cwdir) elif command == 'F' or command == 'f': self.do_filter() elif command == 'I' or command == 'i': # do nothing result = command elif command == 'R' or command == 'r': continuing = True while continuing: command = self.show_menu('rule') continuing = self.do_command(command, 'rule', self.cenv.cwdir) elif command == 'Q' or command == 'q': result = None else: clouseau.retention.utils.cliutils.show_help('top') return result def do_command(self, command, level, dir_path): result = None if self.basedir is None: self.basedir = dir_path if self.cenv.cwdir is None: self.cenv.cwdir = dir_path if command is None: return None if level == 'top': result = self.do_top(command, dir_path) elif level == 'status': if command in Status.STATUSES: # this option is invoked on a directory so # type is dir every time clouseau.retention.utils.ruleutils.do_add_rule( self.cdb, dir_path, clouseau.retention.utils.ruleutils.text_to_entrytype( 'dir'), command, self.cenv.host) return None elif command == 'Q' or command == 'q': return None else: clouseau.retention.utils.cliutils.show_help(level) result = True elif level == 'examine': result = self.do_examine(command) elif level == 'rule': result = self.do_rule(command) return result
class RemoteFilesAuditor(object): ''' audit files across a set of remote hosts, in a specified set of directories ''' def __init__(self, hosts_expr, audit_type, confdir=None, prettyprint=False, show_content=False, dirsizes=False, summary_report=False, depth=2, to_check=None, ignore_also=None, timeout=60, maxfiles=None, store_filepath=None, verbose=False): ''' hosts_expr: list or grain-based or wildcard expr for hosts to be audited audit_type: type of audit e.g. 'logs', 'homes' confdir: directory where the yaml config files are stored prettyprint: nicely format the output display show_content: show the first line or so from problematic files dirsizes: show only directories which have too many files to audit properly, don't report on files at all summary_report: do a summary of results instead of detailed this means different thiings depending on the audit type depth: the auditor will give up if a directory has too any files it (saves it form dying on someone's 25gb homedir). this option tells it how far down the tree to go from the top dir of the audit, before starting to count. e.g. do we count in /home/ariel or separately in /home/ariel/* or in /home/ariel/*/*, etc. to_check: comma-separated list of dirs (must end in '/') and/or files that will be checked; if this is None then all dirs/files will be checked ignore_also: comma-separated list of dirs (must end in '/') and/or files that will be skipped in addition to the ones in the config, rules, etc. timeout: salt timeout for running remote commands maxfiles: how many files in a directory tree is too many to audit (at which point we warn about that and move on) store_filepath: full path to rule store (sqlite3 db) verbose: show informative messages during processing ''' self.hosts_expr = hosts_expr self.audit_type = audit_type self.confdir = confdir self.locations = audit_type + "_locations" self.prettyprint = prettyprint self.show_sample_content = show_content self.dirsizes = dirsizes self.show_summary = summary_report self.depth = depth + 1 # actually count of path separators in dirname self.to_check = to_check self.ignore_also = ignore_also self.timeout = timeout self.store_filepath = store_filepath self.verbose = verbose self.max_files = maxfiles self.set_up_max_files(maxfiles) self.magic = clouseau.retention.utils.magic.magic_open( clouseau.retention.utils.magic.MAGIC_NONE) self.magic.load() self.summary = None self.display_from_dict = FileInfo.display_from_dict self.runner = None if self.hosts_expr == "127.0.0.1" or self.hosts_expr == "localhost": # run locally self.localaudit = True self.expanded_hosts = [] self.cdb = None else: self.localaudit = False clouseau.retention.utils.config.set_up_conf(confdir) client = LocalClientPlus() hosts, expr_type = Runner.get_hosts_expr_type(self.hosts_expr) self.expanded_hosts = client.cmd_expandminions(hosts, "test.ping", expr_form=expr_type) self.cdb = RuleStore(self.store_filepath) self.cdb.store_db_init(self.expanded_hosts) self.set_up_and_export_rule_store() def get_audit_args(self): audit_args = [ self.confdir, self.show_sample_content, self.dirsizes, self.depth - 1, self.to_check, self.ignore_also, self.max_files ] return audit_args def set_up_runner(self): self.runner = Runner(self.confdir, self.store_filepath, self.hosts_expr, self.expanded_hosts, self.audit_type, self.get_audit_args(), self.show_sample_content, self.to_check, self.timeout, self.verbose) def set_up_max_files(self, maxfiles): ''' more than this many files in a subdir we won't process, we'll just try to name top offenders if we've been asked only to report dir trees that are too large in this manner, we can set defaults mich higher, since we don't stat files, open them to guess their filetype, etc; processing then goes much quicker ''' if maxfiles is None: if self.dirsizes: self.max_files = 1000 else: self.max_files = 100 else: self.max_files = maxfiles def set_up_and_export_rule_store(self): hosts = self.cdb.store_db_list_all_hosts() destdir = os.path.join(os.path.dirname(self.store_filepath), "data_retention.d") if not os.path.isdir(destdir): os.makedirs(destdir, 0755) for host in hosts: all_destpath = os.path.join(destdir, host + "_store.yaml") clouseau.retention.utils.ruleutils.export_rules( self.cdb, all_destpath, host) good_destpath = os.path.join(destdir, host + "_store_good.yaml") clouseau.retention.utils.ruleutils.export_rules( self.cdb, good_destpath, host, Status.text_to_status('good')) def normalize(self, fname): ''' subclasses may want to do something different, see LogsAuditor for an example ''' return fname @staticmethod def get_dirname_from_warning(warning): ''' some audit output lines warn about directory trees having too many files to audit; grab the dirname out of such a line and return it ''' start = "WARNING: directory " if warning.startswith(start): # WARNING: directory %s has more than %d files rindex = warning.rfind(" has more than") if not rindex: return None else: return warning[len(start):rindex] start = "WARNING: too many files to audit in directory " if warning.startswith(start): return warning[len(start):] return None def add_stats(self, item, summary): ''' gather stats on how many files/dirs may be problematic; summary is where the results are collected, item is the item to include in the summary if needed ''' dirname = os.path.dirname(item['path']) if dirname not in summary: summary[dirname] = { 'binary': { 'old': 0, 'maybe_old': 0, 'nonroot': 0 }, 'text': { 'old': 0, 'maybe_old': 0, 'nonroot': 0 } } if item['binary'] is True: group = 'binary' else: group = 'text' if item['old'] == 'T': summary[dirname][group]['old'] += 1 elif item['old'] == '-': summary[dirname][group]['maybe_old'] += 1 if item['owner'] != 0: summary[dirname][group]['nonroot'] += 1 return summary def display_host_summary(self): if self.summary is not None: paths = sorted(self.summary.keys()) for path in paths: for group in self.summary[path]: if (self.summary[path][group]['old'] > 0 or self.summary[path][group]['maybe_old'] > 0 or self.summary[path][group]['nonroot'] > 0): print( "in directory %s, (%s), %d old," " %d maybe old, %d with non root owner" % (path, group, self.summary[path][group]['old'], self.summary[path][group]['maybe_old'], self.summary[path][group]['nonroot'])) def display_summary(self, result): for host in result: self.summary = {} print "host:", host if result[host]: try: lines = result[host].split('\n') for line in lines: if display_summary_line(line): continue else: try: item = json.loads( line, object_hook=JsonHelper.decode_dict) if item['empty'] is not True: self.add_stats(item, self.summary) except: print "WARNING: failed to json load from host", print host, "this line:", line self.display_host_summary() except: print "WARNING: failed to process output from host" else: if self.verbose: print "WARNING: no output from host", host def display_remote_host(self, result): try: lines = result.split('\n') files = [] for line in lines: if line == "": continue elif line.startswith("WARNING:") or line.startswith("INFO:"): print line else: files.append( json.loads(line, object_hook=JsonHelper.decode_dict)) if files == []: return path_justify = max([len(finfo['path']) for finfo in files]) + 2 for finfo in files: self.display_from_dict(finfo, self.show_sample_content, path_justify) except: print "WARNING: failed to load json from host" def get_local_auditor(self): return LocalFilesAuditor(self.audit_type, self.confdir, self.show_sample_content, self.dirsizes, self.depth, self.to_check, self.ignore_also, self.max_files) def audit_hosts(self): # do local audit instead if self.localaudit: localauditor = self.get_local_auditor() result = localauditor.do_local_audit() self.display_remote_host(result) return # proceed to regular remote audit self.set_up_runner() result = self.runner.run_remotely() if result is None: print "WARNING: failed to get output from audit script on any host" elif self.show_summary: self.display_summary(result) else: for host in result: print "host:", host if result[host]: self.display_remote_host(result[host]) else: if self.verbose: print "no output from host", host # add some results to rule store self.update_status_rules_from_report(result) return result def update_status_rules_from_report(self, report): hostlist = report.keys() for host in hostlist: try: problem_rules = clouseau.retention.utils.ruleutils.get_rules( self.cdb, host, Status.text_to_status('problem')) except: print 'WARNING: problem retrieving problem rules for host', host problem_rules = None if problem_rules is not None: existing_problems = [rule['path'] for rule in problem_rules] else: existing_problems = [] dirs_problem, dirs_skipped = get_dirs_toexamine(report[host]) if dirs_problem is not None: dirs_problem = list(set(dirs_problem)) for dirname in dirs_problem: clouseau.retention.utils.ruleutils.do_add_rule( self.cdb, dirname, clouseau.retention.utils.ruleutils.text_to_entrytype( 'dir'), Status.text_to_status('problem'), host) if dirs_skipped is not None: dirs_skipped = list(set(dirs_skipped)) for dirname in dirs_skipped: if dirname in dirs_problem or dirname in existing_problems: # problem report overrides 'too many to audit' continue clouseau.retention.utils.ruleutils.do_add_rule( self.cdb, dirname, clouseau.retention.utils.ruleutils.text_to_entrytype( 'dir'), Status.text_to_status('unreviewed'), host)
class CommandLine(object): ''' prompt user at the command line for actions to take on a given directory or file, show results ''' # todo: down and up should check you really are (descending, # ascending path) def __init__(self, confdir, store_filepath, timeout, audit_type, ignore_also=None, hosts_expr=None): self.confdir = confdir self.cdb = RuleStore(store_filepath) self.cdb.store_db_init(None) self.timeout = timeout self.audit_type = audit_type self.locations = audit_type + "_locations" self.hosts_expr = hosts_expr self.basedir = None clouseau.retention.utils.cliutils.init_readline_hist() # this is arbitrary, can tweak it later # how many levels down we keep in our list of # top-level dirs from which the user can start # their interactive session self.max_depth_top_level = 3 self.filtertype = 'all' # fixme completely wrong self.batchno = 1 clouseau.retention.utils.config.set_up_conf(self.confdir) # duplicate all the ignores except for the uh # ones specific to a host. those will be done # at host choice time # this includes rules, we will do those at host choice time too # we want: global, perhost, ignore_also (if there were any) self.local_ignored = None self.ignores = Ignores(self.confdir) self.ignored_from_rulestore = {} self.ignored_also = clouseau.retention.utils.ignores.convert_ignore_also_to_ignores( ignore_also) self.dircontents = CurrentDirContents(self.timeout) self.cenv = CurrentEnv() self.cmpl = Completion(self.dircontents, self.cenv, self.max_depth_top_level) def do_one_host(self, host, report): self.set_host(host) results = clouseau.retention.utils.ignores.get_ignored_from_rulestore(self.cdb, [host]) if host in results: self.ignored_from_rulestore[host] = results[host] if host not in report: dirs_problem = None dirs_skipped = None else: dirs_problem, dirs_skipped = get_dirs_toexamine(report[host]) self.cenv.set_reported_dirs(dirs_problem, dirs_skipped) if self.cenv.problem_dirs is None and self.cenv.skipped_dirs is None: print "No report available from this host" elif len(self.cenv.problem_dirs) == 0 and len(self.cenv.skipped_dirs) == 0: print "No problem dirs and no skipped dirs on this host" else: dirs_problem_to_depth = [clouseau.retention.utils.cliutils.get_path_prefix( d, self.max_depth_top_level) for d in dirs_problem] dirs_skipped = [s for s in dirs_skipped if s not in dirs_problem_to_depth] relevant_dirs = (sorted(list(set(dirs_problem_to_depth))) + sorted(list(set(dirs_skipped)))) while True: dir_todo = self.cmpl.prompt_for_dir() if dir_todo is None: print "Done with this host" break elif dir_todo not in relevant_dirs: print "Please choose one of the following directories:" # fixme another arbitrary setting clouseau.retention.utils.cliutils.print_columns(relevant_dirs, 5) else: self.basedir = None self.cenv.cwdir = None self.do_one_directory(dir_todo) def run(self, report): ''' call with full report output (not summary) across hosts, this will permit the user to examine directories and files of specified hosts and add/update rules for those dirs and files ''' self.cenv.set_hosts(report.keys()) while True: host_todo = self.cmpl.prompt_for_host() if host_todo is None: print "exiting at user request" break else: usercfgrab = RemoteUserCfGrabber(host_todo, self.timeout, self.audit_type, self.confdir) to_convert = usercfgrab.run(True) self.local_ignored = clouseau.retention.utils.ignores.process_local_ignores( to_convert) results = clouseau.retention.utils.ignores.get_ignored_from_rulestore( self.cdb, [host_todo]) if host_todo in results: self.ignored_from_rulestore[host_todo] = results[host_todo] self.do_one_host(host_todo, report) def set_host(self, host): self.cenv.host = host def do_one_directory(self, path): ''' given a list which contains absolute paths for the subdirectories / files of a given directory, (we don't go more than one level down, it's likely to be too much), ask the user what status to give this directory, and show the user information for each contained dir/file if desired, as well as info about the directory ''' while True: todo = self.get_do_command(path) if todo is None: break def get_do_command(self, path): command = self.show_menu('top') return self.do_command(command, 'top', path) def get_menu_entry(self, choices, default, text): self.cmpl.set_choices_completion(choices, default) self.cenv.set_prompt() command = raw_input(self.cenv.prompt + ' ' + text + " [%s]: " % default) command = command.strip() if command == "": command = default return command def show_menu(self, level): if level == 'top': text = ("S(set status)/E(examine directory)/" "Filter directory listings/" "I(ignore)/R(manage rules)/Q(quit menu)") command = self.get_menu_entry(['S', 'E', 'I', 'F', 'R', 'Q'], 'S', text) elif level == 'status': text = Status.get_statuses_prompt(", ") + ", Q(quit status menu)" command = self.get_menu_entry(Status.STATUSES + ['Q'], text, Status.text_to_status('good')) if command == 'Q' or command == 'q': level = 'top' elif level == 'examine': text = ("D(down a level)/U(up a level)/E(show entries)/" "C(show contents of file)/R(show rules)/" "F(filter directory listings/" "M(mark file(s))/Q(quit examine menu)") command = self.get_menu_entry(['D', 'U', 'E', 'F', 'C', 'R', 'M', 'Q'], 'E', text) if command == 'Q' or command == 'q': level = 'top' elif level == 'rule': text = ("S(show all rules of type)/D(show rules covering dir)/" "C(show rules covering dir contents)/" "A(add rule to rules store)/" "R(remove rule from rules store/" "E(export rules from store to file)/" "I(import rules from file to store)/Q(quit rule menu)") command = self.get_menu_entry(['S', 'C', 'A', 'R', 'E', 'I', 'Q'], 'D', text) if command == 'Q' or command == 'q': level = 'top' else: command = None return command def get_file_contents(self, path): # get 20 lines and hope that's enough for the user to evaluate # fixme the number of lines should be configurable fileexamin = RemoteFileExaminer(path, self.cenv.host, 20, self.timeout, quiet=True) contents = fileexamin.run() return contents def get_basedir_from_path(self, path): for location in clouseau.retention.utils.config.conf[self.locations]: if path == location or path.startswith(location + os.path.sep): return location # fixme is this really the right fallback? check it return '/' def entry_is_not_ignored(self, path, entrytype, do_check): ''' see if the given entry is in NOT in the ingored lists and return True if so, False otherwise we only do this check if the do_check argment is set to 'check'; otherwise we default to True ''' if do_check != 'check': return True basedir = self.get_basedir_from_path(path) if self.audit_type == 'logs' and entrytype == 'file': path = LocalLogsAuditor.normalize(path) if entrytype == 'file': checker = clouseau.retention.utils.ignores.file_is_ignored dirs = False else: checker = clouseau.retention.utils.ignores.dir_is_ignored dirs = True for ignored in [self.ignores.global_ignored, self.ignored_also]: if dirs: result = checker(path, ignored) else: result = checker(path, basedir, ignored) if result: return False for ignored in [self.ignores.perhost_ignored, self.ignored_from_rulestore]: if self.cenv.host in ignored: if dirs: result = checker(path, ignored[self.cenv.host]) else: result = checker(path, basedir, ignored[self.cenv.host]) if result: return False return True def get_entries_from_wildcard(self, file_expr): ''' get entries from cwdir that match the expression ''' # fixme that dang batchno, what a bad idea it was self.dircontents.get(self.cenv.host, self.cenv.cwdir, 1) # one wildcard only, them's the breaks if '*' in file_expr: start, end = file_expr.split('*', 1) return [c for c in self.dircontents.entries_dict if (c.startswith(start) and c.endswith(end) and len(c) >= len(start) + len(end))] elif file_expr in self.dircontents.entries_dict: return [file_expr] else: return [] def do_mark(self): readline.set_completer(self.cmpl.dir_entries_completion) file_expr = raw_input("file or dirname expression (empty to quit): ") file_expr = file_expr.strip() if file_expr == '': return True if file_expr[-1] == os.path.sep: file_expr = file_expr[:-1] if '*' in file_expr: entries_todo = self.get_entries_from_wildcard(file_expr) else: entries_todo = [file_expr] self.dircontents.get(self.cenv.host, self.cenv.cwdir, self.batchno) if not self.dircontents.entries: print 'failed to get directory contents for', self.cenv.cwdir print 'marking dirs/files regardless' for entry in entries_todo: if entry not in self.dircontents.entries_dict: print 'skipping %s, not in current dir listing' % entry print self.dircontents.entries_dict continue filetype = clouseau.retention.utils.ruleutils.entrytype_to_text( self.dircontents.entries_dict[entry]['type']) if filetype == 'link': print 'No need to mark', file_expr, 'links are always skipped' continue elif filetype != 'dir' and filetype != 'file': print 'Not a dir or regular file, no need to mark, skipping' continue status = Status.text_to_status('good') clouseau.retention.utils.ruleutils.do_add_rule( self.cdb, file_expr, filetype, status, self.cenv.host) return True def do_add_rule(self): # fixme need different completer here I think, that # completes relative to self.cwdir readline.set_completer(None) path = raw_input("path or wildcard expr in rule (empty to quit): ") path = path.strip() if path == '': return True default = Status.text_to_status('good') self.cmpl.set_choices_completion(Status.STATUSES + ['Q'], default) while True: statuses_text = Status.get_statuses_prompt(", ") status = raw_input(statuses_text + " Q(quit)) [%s]: " % default) status = status.strip() if status == "": status = default if status[0].upper() in Status.STATUSES: status = status[0].upper() break elif status == 'q' or status == 'Q': return None else: print "Unknown status type" continue # fixme should check that any wildcard is only one and only # in the last component... someday if path[0] != os.path.sep: path = os.path.join(self.cenv.cwdir, path) if path[-1] == os.path.sep: path = path[:-1] filetype = clouseau.retention.utils.ruleutils.text_to_entrytype('dir') else: filetype = clouseau.retention.utils.ruleutils.text_to_entrytype('file') clouseau.retention.utils.ruleutils.do_add_rule( self.cdb, path, filetype, status, self.cenv.host) # update the ignores list since we have a new rule results = clouseau.retention.utils.ignores.get_ignored_from_rulestore( self.cdb, [self.cenv.host]) if self.cenv.host in results: self.ignored_from_rulestore[self.cenv.host] = results[self.cenv.host] return True def do_show_rules_with_status(self): default = Status.text_to_status('problem') self.cmpl.set_choices_completion(['A'] + Status.STATUSES + ['Q'], default) while True: statuses_text = Status.get_statuses_prompt(", ") status = raw_input("status type A(all), " + statuses_text + ", Q(quit)) [%s]: " % default) status = status.strip() if status == "": status = default if status == 'q' or status == 'Q': return None elif status[0].upper() not in ['A'] + Status.STATUSES: print "Unknown status type" continue readline.set_completer(None) prefix = raw_input("starting with prefix? [/]: ") prefix = prefix.strip() if prefix == "": prefix = "/" if status == 'a' or status == 'A': clouseau.retention.utils.ruleutils.show_rules( self.cdb, self.cenv.host, prefix=prefix) return True elif status[0].upper() in Status.STATUSES: clouseau.retention.utils.ruleutils.show_rules( self.cdb, self.cenv.host, status[0].upper(), prefix=prefix) return True def do_remove_rule(self): # fixme need different completer here I think, that # completes relative to self.cwdir readline.set_completer(None) path = raw_input("path or wildcard expr in rule (empty to quit): ") path = path.strip() if path == '': return True elif path[0] != os.path.sep: path = os.path.join(self.cenv.cwdir, path) if path[-1] == os.path.sep: path = path[:-1] clouseau.retention.utils.ruleutils.do_remove_rule(self.cdb, path, self.cenv.host) # update the ignores list since we removed a rule results = clouseau.retention.utils.ignores.get_ignored_from_rulestore( self.cdb, [self.cenv.host]) if self.cenv.host in results: self.ignored_from_rulestore[self.cenv.host] = results[self.cenv.host] return True def get_rules_path(self): readline.set_completer(None) rules_path = raw_input("full path to rules file (empty to quit): ") rules_path = rules_path.strip() if rules_path == '': return rules_path if not clouseau.retention.utils.cliutils.check_rules_path(rules_path): print "bad rules file path specified, aborting" return '' return rules_path def do_rule(self, command): if command == 'A' or command == 'a': result = self.do_add_rule() elif command == 'S' or command == 's': result = self.do_show_rules_with_status() elif command == 'D' or command == 'd': self.dircontents.get(self.cenv.host, self.cenv.cwdir, self.batchno) clouseau.retention.utils.ruleutils.get_rules_for_path(self.cdb, self.cenv.cwdir, self.cenv.host) result = True elif command == 'C' or command == 'c': self.dircontents.get(self.cenv.host, self.cenv.cwdir, self.batchno) clouseau.retention.utils.ruleutils.get_rules_for_entries( self.cdb, self.cenv.cwdir, self.dircontents.entries_dict, self.cenv.host) result = True elif command == 'R' or command == 'r': result = self.do_remove_rule() elif command == 'I' or command == 'i': rules_path = self.get_rules_path() if rules_path != '': clouseau.retention.utils.ruleutils.import_rules(self.cdb, rules_path, self.cenv.host) result = True elif command == 'E' or command == 'e': rules_path = self.get_rules_path() if rules_path != '': clouseau.retention.utils.ruleutils.export_rules(self.cdb, rules_path, self.cenv.host) result = True elif command == 'Q' or command == 'q': print "quitting this level" result = None else: clouseau.retention.utils.cliutils.show_help('rule') result = True return result def do_file_contents(self): # fixme need a different completer here... meh readline.set_completer(None) filename = raw_input("filename (empty to quit): ") filename = filename.strip() if filename == '': return if filename[0] != os.path.sep: filename = os.path.join(self.cenv.cwdir, filename) contents = self.get_file_contents(filename) if contents is not None: print contents else: print "failed to get contents of file" def do_filter(self): default = 'C' self.cmpl.set_choices_completion(['A', 'D', 'F', 'C', 'Q'], default) while True: filtertype = raw_input("filter A(all), D(directories only)," " F(files only)," " C(Entries checked (not ignored)," " Q(quit)) [?]: ") filtertype = filtertype.strip() if filtertype == "": filtertype = default if filtertype == 'a' or filtertype == 'A': self.filtertype = 'all' return True elif filtertype == 'D' or filtertype == 'd': self.filtertype = 'dir' return True elif filtertype == 'F' or filtertype == 'f': self.filtertype = 'file' return True elif filtertype == 'C' or filtertype == 'c': self.filtertype = 'check' return True elif filtertype == 'q' or filtertype == 'Q': return None else: print "Unknown filter type" continue def do_dir_descend(self, command): while True: # prompt user for dir to descend readline.set_completer(self.cmpl.dir_completion) self.cenv.set_prompt() directory = raw_input(self.cenv.prompt + ' ' + "directory name (empty to quit): ") directory = directory.strip() if directory == '': return command if directory[-1] == os.path.sep: directory = directory[:-1] if (directory[0] == '/' and not directory.startswith(self.cenv.cwdir + os.path.sep)): print 'New directory is not a subdirectory of', print self.cenv.cwdir, "skipping" else: self.cenv.cwdir = os.path.join(self.cenv.cwdir, directory) self.dircontents.clear() self.cenv.set_prompt() print 'Now at', self.cenv.cwdir return True def do_examine(self, command): if command == 'D' or command == 'd': return self.do_dir_descend(command) elif command == 'U' or command == 'u': if self.cenv.cwdir != self.basedir: self.cenv.cwdir = os.path.dirname(self.cenv.cwdir) self.dircontents.clear() self.cenv.set_prompt() print 'Now at', self.cenv.cwdir else: print 'Already at top', self.cenv.cwdir result = True elif command == 'E' or command == 'e': self.dircontents.show(self.cenv.host, self.cenv.cwdir, 1, self.filtertype, self.entry_is_not_ignored) result = True elif command == 'C' or command == 'c': self.do_file_contents() result = True elif command == 'F' or command == 'f': self.do_filter() result = True elif command == 'R' or command == 'r': continuing = True while continuing: command = self.show_menu('rule') continuing = self.do_command(command, 'rule', self.cenv.cwdir) result = True elif command == 'M' or command == 'm': result = self.do_mark() elif command == 'Q' or command == 'q' or command == '': print "quitting this level" result = None else: clouseau.retention.utils.cliutils.show_help('examine') result = True return result def do_top(self, command, dir_path): result = True if command == 'S' or command == 's': continuing = True while continuing: command = self.show_menu('status') continuing = self.do_command(command, 'status', dir_path) elif command == 'E' or command == 'e': self.dircontents.show(self.cenv.host, self.cenv.cwdir, 1, self.filtertype, self.entry_is_not_ignored) continuing = True while continuing: # fixme this should let the user page through batches, # not use '1' every time command = self.show_menu('examine') continuing = self.do_command(command, 'examine', self.cenv.cwdir) elif command == 'F' or command == 'f': self.do_filter() elif command == 'I' or command == 'i': # do nothing result = command elif command == 'R' or command == 'r': continuing = True while continuing: command = self.show_menu('rule') continuing = self.do_command(command, 'rule', self.cenv.cwdir) elif command == 'Q' or command == 'q': result = None else: clouseau.retention.utils.cliutils.show_help('top') return result def do_command(self, command, level, dir_path): result = None if self.basedir is None: self.basedir = dir_path if self.cenv.cwdir is None: self.cenv.cwdir = dir_path if command is None: return None if level == 'top': result = self.do_top(command, dir_path) elif level == 'status': if command in Status.STATUSES: # this option is invoked on a directory so # type is dir every time clouseau.retention.utils.ruleutils.do_add_rule( self.cdb, dir_path, clouseau.retention.utils.ruleutils.text_to_entrytype('dir'), command, self.cenv.host) return None elif command == 'Q' or command == 'q': return None else: clouseau.retention.utils.cliutils.show_help(level) result = True elif level == 'examine': result = self.do_examine(command) elif level == 'rule': result = self.do_rule(command) return result
class RemoteFilesAuditor(object): ''' audit files across a set of remote hosts, in a specified set of directories ''' def __init__(self, hosts_expr, audit_type, confdir=None, prettyprint=False, show_content=False, dirsizes=False, summary_report=False, depth=2, to_check=None, ignore_also=None, timeout=60, maxfiles=None, store_filepath=None, verbose=False): ''' hosts_expr: list or grain-based or wildcard expr for hosts to be audited audit_type: type of audit e.g. 'logs', 'homes' confdir: directory where the yaml config files are stored prettyprint: nicely format the output display show_content: show the first line or so from problematic files dirsizes: show only directories which have too many files to audit properly, don't report on files at all summary_report: do a summary of results instead of detailed this means different thiings depending on the audit type depth: the auditor will give up if a directory has too any files it (saves it form dying on someone's 25gb homedir). this option tells it how far down the tree to go from the top dir of the audit, before starting to count. e.g. do we count in /home/ariel or separately in /home/ariel/* or in /home/ariel/*/*, etc. to_check: comma-separated list of dirs (must end in '/') and/or files that will be checked; if this is None then all dirs/files will be checked ignore_also: comma-separated list of dirs (must end in '/') and/or files that will be skipped in addition to the ones in the config, rules, etc. timeout: salt timeout for running remote commands maxfiles: how many files in a directory tree is too many to audit (at which point we warn about that and move on) store_filepath: full path to rule store (sqlite3 db) verbose: show informative messages during processing ''' self.hosts_expr = hosts_expr self.audit_type = audit_type self.confdir = confdir self.locations = audit_type + "_locations" self.prettyprint = prettyprint self.show_sample_content = show_content self.dirsizes = dirsizes self.show_summary = summary_report self.depth = depth + 1 # actually count of path separators in dirname self.to_check = to_check self.ignore_also = ignore_also self.timeout = timeout self.store_filepath = store_filepath self.verbose = verbose self.max_files = maxfiles self.set_up_max_files(maxfiles) self.magic = clouseau.retention.utils.magic.magic_open( clouseau.retention.utils.magic.MAGIC_NONE) self.magic.load() self.summary = None self.display_from_dict = FileInfo.display_from_dict self.runner = None if self.hosts_expr == "127.0.0.1" or self.hosts_expr == "localhost": # run locally self.localaudit = True self.expanded_hosts = [] self.cdb = None else: self.localaudit = False clouseau.retention.utils.config.set_up_conf(confdir) client = LocalClientPlus() hosts, expr_type = Runner.get_hosts_expr_type(self.hosts_expr) self.expanded_hosts = client.cmd_expandminions( hosts, "test.ping", expr_form=expr_type) self.cdb = RuleStore(self.store_filepath) self.cdb.store_db_init(self.expanded_hosts) self.set_up_and_export_rule_store() def get_audit_args(self): audit_args = [self.confdir, self.show_sample_content, self.dirsizes, self.depth - 1, self.to_check, self.ignore_also, self.max_files] return audit_args def set_up_runner(self): self.runner = Runner(self.confdir, self.store_filepath, self.hosts_expr, self.expanded_hosts, self.audit_type, self.get_audit_args(), self.show_sample_content, self.to_check, self.timeout, self.verbose) def set_up_max_files(self, maxfiles): ''' more than this many files in a subdir we won't process, we'll just try to name top offenders if we've been asked only to report dir trees that are too large in this manner, we can set defaults mich higher, since we don't stat files, open them to guess their filetype, etc; processing then goes much quicker ''' if maxfiles is None: if self.dirsizes: self.max_files = 1000 else: self.max_files = 100 else: self.max_files = maxfiles def set_up_and_export_rule_store(self): hosts = self.cdb.store_db_list_all_hosts() destdir = os.path.join(os.path.dirname(self.store_filepath), "data_retention.d") if not os.path.isdir(destdir): os.makedirs(destdir, 0755) for host in hosts: all_destpath = os.path.join(destdir, host + "_store.yaml") clouseau.retention.utils.ruleutils.export_rules(self.cdb, all_destpath, host) good_destpath = os.path.join(destdir, host + "_store_good.yaml") clouseau.retention.utils.ruleutils.export_rules(self.cdb, good_destpath, host, Status.text_to_status('good')) def normalize(self, fname): ''' subclasses may want to do something different, see LogsAuditor for an example ''' return fname @staticmethod def get_dirname_from_warning(warning): ''' some audit output lines warn about directory trees having too many files to audit; grab the dirname out of such a line and return it ''' start = "WARNING: directory " if warning.startswith(start): # WARNING: directory %s has more than %d files rindex = warning.rfind(" has more than") if not rindex: return None else: return warning[len(start):rindex] start = "WARNING: too many files to audit in directory " if warning.startswith(start): return warning[len(start):] return None def add_stats(self, item, summary): ''' gather stats on how many files/dirs may be problematic; summary is where the results are collected, item is the item to include in the summary if needed ''' dirname = os.path.dirname(item['path']) if dirname not in summary: summary[dirname] = { 'binary': {'old': 0, 'maybe_old': 0, 'nonroot': 0}, 'text': {'old': 0, 'maybe_old': 0, 'nonroot': 0} } if item['binary'] is True: group = 'binary' else: group = 'text' if item['old'] == 'T': summary[dirname][group]['old'] += 1 elif item['old'] == '-': summary[dirname][group]['maybe_old'] += 1 if item['owner'] != 0: summary[dirname][group]['nonroot'] += 1 return summary def display_host_summary(self): if self.summary is not None: paths = sorted(self.summary.keys()) for path in paths: for group in self.summary[path]: if (self.summary[path][group]['old'] > 0 or self.summary[path][group]['maybe_old'] > 0 or self.summary[path][group]['nonroot'] > 0): print ("in directory %s, (%s), %d old," " %d maybe old, %d with non root owner" % (path, group, self.summary[path][group]['old'], self.summary[path][group]['maybe_old'], self.summary[path][group]['nonroot'])) def display_summary(self, result): for host in result: self.summary = {} print "host:", host if result[host]: try: lines = result[host].split('\n') for line in lines: if display_summary_line(line): continue else: try: item = json.loads( line, object_hook=JsonHelper.decode_dict) if item['empty'] is not True: self.add_stats(item, self.summary) except: print "WARNING: failed to json load from host", print host, "this line:", line self.display_host_summary() except: print "WARNING: failed to process output from host" else: if self.verbose: print "WARNING: no output from host", host def display_remote_host(self, result): try: lines = result.split('\n') files = [] for line in lines: if line == "": continue elif line.startswith("WARNING:") or line.startswith("INFO:"): print line else: files.append(json.loads(line, object_hook=JsonHelper.decode_dict)) if files == []: return path_justify = max([len(finfo['path']) for finfo in files]) + 2 for finfo in files: self.display_from_dict(finfo, self.show_sample_content, path_justify) except: print "WARNING: failed to load json from host" def get_local_auditor(self): return LocalFilesAuditor(self.audit_type, self.confdir, self.show_sample_content, self.dirsizes, self.depth, self.to_check, self.ignore_also, self.max_files) def audit_hosts(self): # do local audit instead if self.localaudit: localauditor = self.get_local_auditor() result = localauditor.do_local_audit() self.display_remote_host(result) return # proceed to regular remote audit self.set_up_runner() result = self.runner.run_remotely() if result is None: print "WARNING: failed to get output from audit script on any host" elif self.show_summary: self.display_summary(result) else: for host in result: print "host:", host if result[host]: self.display_remote_host(result[host]) else: if self.verbose: print "no output from host", host # add some results to rule store self.update_status_rules_from_report(result) return result def update_status_rules_from_report(self, report): hostlist = report.keys() for host in hostlist: try: problem_rules = clouseau.retention.utils.ruleutils.get_rules( self.cdb, host, Status.text_to_status('problem')) except: print 'WARNING: problem retrieving problem rules for host', host problem_rules = None if problem_rules is not None: existing_problems = [rule['path'] for rule in problem_rules] else: existing_problems = [] dirs_problem, dirs_skipped = get_dirs_toexamine(report[host]) if dirs_problem is not None: dirs_problem = list(set(dirs_problem)) for dirname in dirs_problem: clouseau.retention.utils.ruleutils.do_add_rule( self.cdb, dirname, clouseau.retention.utils.ruleutils.text_to_entrytype('dir'), Status.text_to_status('problem'), host) if dirs_skipped is not None: dirs_skipped = list(set(dirs_skipped)) for dirname in dirs_skipped: if dirname in dirs_problem or dirname in existing_problems: # problem report overrides 'too many to audit' continue clouseau.retention.utils.ruleutils.do_add_rule( self.cdb, dirname, clouseau.retention.utils.ruleutils.text_to_entrytype('dir'), Status.text_to_status('unreviewed'), host)