Beispiel #1
0
def main():
    hosts = None
    action = None
    path = None
    status = None
    dryrun = False
    store_filepath = "/etc/data_retention/dataretention_rules.sq3"

    try:
        (options, remainder) = getopt.gnu_getopt(
            sys.argv[1:], "H:a:p:s:r:dh",
            ["hosts=", "action=", "path=", "status=", "dryrun", "help"])

    except getopt.GetoptError as err:
        usage("Unknown option specified: " + str(err))

    for (opt, val) in options:
        if opt in ["-H", "--hosts"]:
            hosts = val
        elif opt in ["-a", "--action"]:
            action = val
        elif opt in ["-p", "--path"]:
            path = val
        elif opt in ["-s", "--status"]:
            status = val
        elif opt in ["-r", "--rulestore"]:
            store_filepath = val
        elif opt in ["-d", "--dryrun"]:
            dryrun = True
        elif opt in ["-h", "--help"]:
            usage()
        else:
            usage("Unknown option specified: %s" % opt)

    if len(remainder) > 0:
        usage("Unknown option specified: <%s>" % remainder[0])

    check_args(hosts, action, status)

    if not os.path.exists(store_filepath):
        usage('no such rulestore at %s' % store_filepath)

    cdb = RuleStore(store_filepath)
    cdb.store_db_init(None)

    hosts, htype = clouseau.retention.utils.utils.get_hosts_expr_type(hosts)
    # if we are given one host, check that the host has a table or whine
    if htype == 'glob' and '*' not in hosts:
        if not clouseau.retention.utils.ruleutils.check_host_table_exists(
                cdb, hosts):
            usage('no such host in rule store, %s' % hosts)
    if htype == 'grain' or htype == 'glob':
        client = LocalClientPlus()
        hosts = client.cmd_expandminions(hosts, "test.ping", expr_form=htype)
    do_action(cdb, action, hosts, status, path, dryrun)
def main():
    hosts = None
    action = None
    path = None
    status = None
    dryrun = False
    store_filepath = "/etc/data_retention/dataretention_rules.sq3"

    try:
        (options, remainder) = getopt.gnu_getopt(
            sys.argv[1:], "H:a:p:s:r:dh",
            ["hosts=", "action=", "path=",
             "status=", "dryrun", "help"])

    except getopt.GetoptError as err:
        usage("Unknown option specified: " + str(err))

    for (opt, val) in options:
        if opt in ["-H", "--hosts"]:
            hosts = val
        elif opt in ["-a", "--action"]:
            action = val
        elif opt in ["-p", "--path"]:
            path = val
        elif opt in ["-s", "--status"]:
            status = val
        elif opt in ["-r", "--rulestore"]:
            store_filepath = val
        elif opt in ["-d", "--dryrun"]:
            dryrun = True
        elif opt in ["-h", "--help"]:
            usage()
        else:
            usage("Unknown option specified: %s" % opt)

    if len(remainder) > 0:
        usage("Unknown option specified: <%s>" % remainder[0])

    check_args(hosts, action, status)

    if not os.path.exists(store_filepath):
        usage('no such rulestore at %s' % store_filepath)

    cdb = RuleStore(store_filepath)
    cdb.store_db_init(None)

    hosts, htype = clouseau.retention.utils.utils.get_hosts_expr_type(hosts)
    # if we are given one host, check that the host has a table or whine
    if htype == 'glob' and '*' not in hosts:
        if not clouseau.retention.utils.ruleutils.check_host_table_exists(cdb, hosts):
            usage('no such host in rule store, %s' % hosts)
    if htype == 'grain' or htype == 'glob':
        client = LocalClientPlus()
        hosts = client.cmd_expandminions(hosts, "test.ping", expr_form=htype)
    do_action(cdb, action, hosts, status, path, dryrun)
Beispiel #3
0
class CommandLine(object):
    '''
    prompt user at the command line for actions to take on a given
    directory or file, show results
    '''

    # todo: down and up should check you really are (descending,
    # ascending path)

    def __init__(self,
                 confdir,
                 store_filepath,
                 timeout,
                 audit_type,
                 ignore_also=None,
                 hosts_expr=None):
        self.confdir = confdir

        self.cdb = RuleStore(store_filepath)
        self.cdb.store_db_init(None)

        self.timeout = timeout
        self.audit_type = audit_type
        self.locations = audit_type + "_locations"
        self.hosts_expr = hosts_expr

        self.basedir = None

        clouseau.retention.utils.cliutils.init_readline_hist()
        # this is arbitrary, can tweak it later
        # how many levels down we keep in our list of
        # top-level dirs from which the user can start
        # their interactive session
        self.max_depth_top_level = 3

        self.filtertype = 'all'

        # fixme completely wrong
        self.batchno = 1

        clouseau.retention.utils.config.set_up_conf(self.confdir)

        # duplicate all the ignores except for the uh
        # ones specific to a host. those will be done
        # at host choice time
        # this includes rules, we will do those at host choice time too
        # we want: global, perhost, ignore_also (if there were any)

        self.local_ignored = None
        self.ignores = Ignores(self.confdir)
        self.ignored_from_rulestore = {}
        self.ignored_also = clouseau.retention.utils.ignores.convert_ignore_also_to_ignores(
            ignore_also)

        self.dircontents = CurrentDirContents(self.timeout)
        self.cenv = CurrentEnv()
        self.cmpl = Completion(self.dircontents, self.cenv,
                               self.max_depth_top_level)

    def do_one_host(self, host, report):
        self.set_host(host)
        results = clouseau.retention.utils.ignores.get_ignored_from_rulestore(
            self.cdb, [host])
        if host in results:
            self.ignored_from_rulestore[host] = results[host]

        if host not in report:
            dirs_problem = None
            dirs_skipped = None
        else:
            dirs_problem, dirs_skipped = get_dirs_toexamine(report[host])
        self.cenv.set_reported_dirs(dirs_problem, dirs_skipped)
        if self.cenv.problem_dirs is None and self.cenv.skipped_dirs is None:
            print "No report available from this host"
        elif len(self.cenv.problem_dirs) == 0 and len(
                self.cenv.skipped_dirs) == 0:
            print "No problem dirs and no skipped dirs on this host"
        else:
            dirs_problem_to_depth = [
                clouseau.retention.utils.cliutils.get_path_prefix(
                    d, self.max_depth_top_level) for d in dirs_problem
            ]
            dirs_skipped = [
                s for s in dirs_skipped if s not in dirs_problem_to_depth
            ]
            relevant_dirs = (sorted(list(set(dirs_problem_to_depth))) +
                             sorted(list(set(dirs_skipped))))
            while True:
                dir_todo = self.cmpl.prompt_for_dir()
                if dir_todo is None:
                    print "Done with this host"
                    break
                elif dir_todo not in relevant_dirs:
                    print "Please choose one of the following directories:"
                    # fixme another arbitrary setting
                    clouseau.retention.utils.cliutils.print_columns(
                        relevant_dirs, 5)
                else:
                    self.basedir = None
                    self.cenv.cwdir = None
                    self.do_one_directory(dir_todo)

    def run(self, report):
        '''
        call with full report output (not summary) across
        hosts, this will permit the user to examine
        directories and files of specified hosts and
        add/update rules for those dirs and files
        '''
        self.cenv.set_hosts(report.keys())
        while True:
            host_todo = self.cmpl.prompt_for_host()
            if host_todo is None:
                print "exiting at user request"
                break
            else:
                usercfgrab = RemoteUserCfGrabber(host_todo, self.timeout,
                                                 self.audit_type, self.confdir)
                to_convert = usercfgrab.run(True)
                self.local_ignored = clouseau.retention.utils.ignores.process_local_ignores(
                    to_convert)

                results = clouseau.retention.utils.ignores.get_ignored_from_rulestore(
                    self.cdb, [host_todo])
                if host_todo in results:
                    self.ignored_from_rulestore[host_todo] = results[host_todo]

                self.do_one_host(host_todo, report)

    def set_host(self, host):
        self.cenv.host = host

    def do_one_directory(self, path):
        '''
        given a list which contains absolute paths for the
        subdirectories / files of a given directory, (we don't
        go more than one level down, it's likely to be too much),
        ask the user what status to give this directory, and
        show the user information for each contained dir/file if
        desired, as well as info about the directory
        '''
        while True:
            todo = self.get_do_command(path)
            if todo is None:
                break

    def get_do_command(self, path):
        command = self.show_menu('top')
        return self.do_command(command, 'top', path)

    def get_menu_entry(self, choices, default, text):
        self.cmpl.set_choices_completion(choices, default)
        self.cenv.set_prompt()
        command = raw_input(self.cenv.prompt + ' ' + text +
                            " [%s]: " % default)
        command = command.strip()
        if command == "":
            command = default
        return command

    def show_menu(self, level):
        if level == 'top':
            text = ("S(set status)/E(examine directory)/"
                    "Filter directory listings/"
                    "I(ignore)/R(manage rules)/Q(quit menu)")
            command = self.get_menu_entry(['S', 'E', 'I', 'F', 'R', 'Q'], 'S',
                                          text)
        elif level == 'status':
            text = Status.get_statuses_prompt(", ") + ", Q(quit status menu)"
            command = self.get_menu_entry(Status.STATUSES + ['Q'], text,
                                          Status.text_to_status('good'))
            if command == 'Q' or command == 'q':
                level = 'top'
        elif level == 'examine':
            text = ("D(down a level)/U(up a level)/E(show entries)/"
                    "C(show contents of file)/R(show rules)/"
                    "F(filter directory listings/"
                    "M(mark file(s))/Q(quit examine menu)")
            command = self.get_menu_entry(
                ['D', 'U', 'E', 'F', 'C', 'R', 'M', 'Q'], 'E', text)
            if command == 'Q' or command == 'q':
                level = 'top'
        elif level == 'rule':
            text = ("S(show all rules of type)/D(show rules covering dir)/"
                    "C(show rules covering dir contents)/"
                    "A(add rule to rules store)/"
                    "R(remove rule from rules store/"
                    "E(export rules from store to file)/"
                    "I(import rules from file to store)/Q(quit rule menu)")
            command = self.get_menu_entry(['S', 'C', 'A', 'R', 'E', 'I', 'Q'],
                                          'D', text)
            if command == 'Q' or command == 'q':
                level = 'top'
        else:
            command = None
        return command

    def get_file_contents(self, path):
        # get 20 lines and hope that's enough for the user to evaluate
        # fixme the number of lines should be configurable
        fileexamin = RemoteFileExaminer(path,
                                        self.cenv.host,
                                        20,
                                        self.timeout,
                                        quiet=True)
        contents = fileexamin.run()
        return contents

    def get_basedir_from_path(self, path):
        for location in clouseau.retention.utils.config.conf[self.locations]:
            if path == location or path.startswith(location + os.path.sep):
                return location
        # fixme is this really the right fallback? check it
        return '/'

    def entry_is_not_ignored(self, path, entrytype, do_check):
        '''
        see if the given entry is in NOT in the ingored lists and return
        True if so, False otherwise
        we only do this check if the do_check argment is set to 'check';
        otherwise we default to True
        '''
        if do_check != 'check':
            return True

        basedir = self.get_basedir_from_path(path)
        if self.audit_type == 'logs' and entrytype == 'file':
            path = LocalLogsAuditor.normalize(path)

        if entrytype == 'file':
            checker = clouseau.retention.utils.ignores.file_is_ignored
            dirs = False
        else:
            checker = clouseau.retention.utils.ignores.dir_is_ignored
            dirs = True
            for ignored in [self.ignores.global_ignored, self.ignored_also]:
                if dirs:
                    result = checker(path, ignored)
                else:
                    result = checker(path, basedir, ignored)
                if result:
                    return False

            for ignored in [
                    self.ignores.perhost_ignored, self.ignored_from_rulestore
            ]:
                if self.cenv.host in ignored:
                    if dirs:
                        result = checker(path, ignored[self.cenv.host])
                    else:
                        result = checker(path, basedir,
                                         ignored[self.cenv.host])
                    if result:
                        return False

        return True

    def get_entries_from_wildcard(self, file_expr):
        '''
        get entries from cwdir that match the
        expression
        '''
        # fixme that dang batchno, what a bad idea it was
        self.dircontents.get(self.cenv.host, self.cenv.cwdir, 1)
        # one wildcard only, them's the breaks
        if '*' in file_expr:
            start, end = file_expr.split('*', 1)
            return [
                c for c in self.dircontents.entries_dict
                if (c.startswith(start) and c.endswith(end)
                    and len(c) >= len(start) + len(end))
            ]
        elif file_expr in self.dircontents.entries_dict:
            return [file_expr]
        else:
            return []

    def do_mark(self):
        readline.set_completer(self.cmpl.dir_entries_completion)
        file_expr = raw_input("file or dirname expression (empty to quit): ")
        file_expr = file_expr.strip()
        if file_expr == '':
            return True
        if file_expr[-1] == os.path.sep:
            file_expr = file_expr[:-1]
        if '*' in file_expr:
            entries_todo = self.get_entries_from_wildcard(file_expr)
        else:
            entries_todo = [file_expr]
            self.dircontents.get(self.cenv.host, self.cenv.cwdir, self.batchno)
            if not self.dircontents.entries:
                print 'failed to get directory contents for', self.cenv.cwdir
                print 'marking dirs/files regardless'
        for entry in entries_todo:
            if entry not in self.dircontents.entries_dict:
                print 'skipping %s, not in current dir listing' % entry
                print self.dircontents.entries_dict
                continue
            filetype = clouseau.retention.utils.ruleutils.entrytype_to_text(
                self.dircontents.entries_dict[entry]['type'])
            if filetype == 'link':
                print 'No need to mark', file_expr, 'links are always skipped'
                continue
            elif filetype != 'dir' and filetype != 'file':
                print 'Not a dir or regular file, no need to mark, skipping'
                continue
            status = Status.text_to_status('good')
            clouseau.retention.utils.ruleutils.do_add_rule(
                self.cdb, file_expr, filetype, status, self.cenv.host)
        return True

    def do_add_rule(self):
        # fixme need different completer here I think, that
        # completes relative to self.cwdir
        readline.set_completer(None)
        path = raw_input("path or wildcard expr in rule (empty to quit): ")
        path = path.strip()
        if path == '':
            return True
        default = Status.text_to_status('good')
        self.cmpl.set_choices_completion(Status.STATUSES + ['Q'], default)
        while True:
            statuses_text = Status.get_statuses_prompt(", ")
            status = raw_input(statuses_text + " Q(quit)) [%s]: " % default)
            status = status.strip()
            if status == "":
                status = default
            if status[0].upper() in Status.STATUSES:
                status = status[0].upper()
                break
            elif status == 'q' or status == 'Q':
                return None
            else:
                print "Unknown status type"
                continue

        # fixme should check that any wildcard is only one and only
        # in the last component... someday

        if path[0] != os.path.sep:
            path = os.path.join(self.cenv.cwdir, path)
        if path[-1] == os.path.sep:
            path = path[:-1]
            filetype = clouseau.retention.utils.ruleutils.text_to_entrytype(
                'dir')
        else:
            filetype = clouseau.retention.utils.ruleutils.text_to_entrytype(
                'file')

        clouseau.retention.utils.ruleutils.do_add_rule(self.cdb, path,
                                                       filetype, status,
                                                       self.cenv.host)
        # update the ignores list since we have a new rule
        results = clouseau.retention.utils.ignores.get_ignored_from_rulestore(
            self.cdb, [self.cenv.host])
        if self.cenv.host in results:
            self.ignored_from_rulestore[self.cenv.host] = results[
                self.cenv.host]
        return True

    def do_show_rules_with_status(self):
        default = Status.text_to_status('problem')
        self.cmpl.set_choices_completion(['A'] + Status.STATUSES + ['Q'],
                                         default)
        while True:
            statuses_text = Status.get_statuses_prompt(", ")
            status = raw_input("status type A(all), " + statuses_text +
                               ", Q(quit)) [%s]: " % default)
            status = status.strip()
            if status == "":
                status = default

            if status == 'q' or status == 'Q':
                return None
            elif status[0].upper() not in ['A'] + Status.STATUSES:
                print "Unknown status type"
                continue

            readline.set_completer(None)
            prefix = raw_input("starting with prefix? [/]: ")
            prefix = prefix.strip()
            if prefix == "":
                prefix = "/"
            if status == 'a' or status == 'A':
                clouseau.retention.utils.ruleutils.show_rules(self.cdb,
                                                              self.cenv.host,
                                                              prefix=prefix)
                return True
            elif status[0].upper() in Status.STATUSES:
                clouseau.retention.utils.ruleutils.show_rules(
                    self.cdb, self.cenv.host, status[0].upper(), prefix=prefix)
                return True

    def do_remove_rule(self):
        # fixme need different completer here I think, that
        # completes relative to self.cwdir
        readline.set_completer(None)
        path = raw_input("path or wildcard expr in rule (empty to quit): ")
        path = path.strip()
        if path == '':
            return True
        elif path[0] != os.path.sep:
            path = os.path.join(self.cenv.cwdir, path)
        if path[-1] == os.path.sep:
            path = path[:-1]
        clouseau.retention.utils.ruleutils.do_remove_rule(
            self.cdb, path, self.cenv.host)
        # update the ignores list since we removed a rule
        results = clouseau.retention.utils.ignores.get_ignored_from_rulestore(
            self.cdb, [self.cenv.host])
        if self.cenv.host in results:
            self.ignored_from_rulestore[self.cenv.host] = results[
                self.cenv.host]
        return True

    def get_rules_path(self):
        readline.set_completer(None)
        rules_path = raw_input("full path to rules file (empty to quit): ")
        rules_path = rules_path.strip()
        if rules_path == '':
            return rules_path
        if not clouseau.retention.utils.cliutils.check_rules_path(rules_path):
            print "bad rules file path specified, aborting"
            return ''
        return rules_path

    def do_rule(self, command):
        if command == 'A' or command == 'a':
            result = self.do_add_rule()
        elif command == 'S' or command == 's':
            result = self.do_show_rules_with_status()
        elif command == 'D' or command == 'd':
            self.dircontents.get(self.cenv.host, self.cenv.cwdir, self.batchno)
            clouseau.retention.utils.ruleutils.get_rules_for_path(
                self.cdb, self.cenv.cwdir, self.cenv.host)
            result = True
        elif command == 'C' or command == 'c':
            self.dircontents.get(self.cenv.host, self.cenv.cwdir, self.batchno)
            clouseau.retention.utils.ruleutils.get_rules_for_entries(
                self.cdb, self.cenv.cwdir, self.dircontents.entries_dict,
                self.cenv.host)
            result = True
        elif command == 'R' or command == 'r':
            result = self.do_remove_rule()
        elif command == 'I' or command == 'i':
            rules_path = self.get_rules_path()
            if rules_path != '':
                clouseau.retention.utils.ruleutils.import_rules(
                    self.cdb, rules_path, self.cenv.host)
            result = True
        elif command == 'E' or command == 'e':
            rules_path = self.get_rules_path()
            if rules_path != '':
                clouseau.retention.utils.ruleutils.export_rules(
                    self.cdb, rules_path, self.cenv.host)
            result = True
        elif command == 'Q' or command == 'q':
            print "quitting this level"
            result = None
        else:
            clouseau.retention.utils.cliutils.show_help('rule')
            result = True
        return result

    def do_file_contents(self):
        # fixme need a different completer here... meh
        readline.set_completer(None)
        filename = raw_input("filename (empty to quit): ")
        filename = filename.strip()
        if filename == '':
            return
        if filename[0] != os.path.sep:
            filename = os.path.join(self.cenv.cwdir, filename)
        contents = self.get_file_contents(filename)
        if contents is not None:
            print contents
        else:
            print "failed to get contents of file"

    def do_filter(self):
        default = 'C'
        self.cmpl.set_choices_completion(['A', 'D', 'F', 'C', 'Q'], default)
        while True:
            filtertype = raw_input("filter A(all), D(directories only),"
                                   " F(files only),"
                                   " C(Entries checked (not ignored),"
                                   " Q(quit)) [?]: ")
            filtertype = filtertype.strip()
            if filtertype == "":
                filtertype = default
            if filtertype == 'a' or filtertype == 'A':
                self.filtertype = 'all'
                return True
            elif filtertype == 'D' or filtertype == 'd':
                self.filtertype = 'dir'
                return True
            elif filtertype == 'F' or filtertype == 'f':
                self.filtertype = 'file'
                return True
            elif filtertype == 'C' or filtertype == 'c':
                self.filtertype = 'check'
                return True
            elif filtertype == 'q' or filtertype == 'Q':
                return None
            else:
                print "Unknown filter type"
                continue

    def do_dir_descend(self, command):
        while True:
            # prompt user for dir to descend
            readline.set_completer(self.cmpl.dir_completion)
            self.cenv.set_prompt()
            directory = raw_input(self.cenv.prompt + ' ' +
                                  "directory name (empty to quit): ")
            directory = directory.strip()
            if directory == '':
                return command
            if directory[-1] == os.path.sep:
                directory = directory[:-1]
            if (directory[0] == '/' and
                    not directory.startswith(self.cenv.cwdir + os.path.sep)):
                print 'New directory is not a subdirectory of',
                print self.cenv.cwdir, "skipping"
            else:
                self.cenv.cwdir = os.path.join(self.cenv.cwdir, directory)
                self.dircontents.clear()
                self.cenv.set_prompt()
                print 'Now at', self.cenv.cwdir
                return True

    def do_examine(self, command):
        if command == 'D' or command == 'd':
            return self.do_dir_descend(command)
        elif command == 'U' or command == 'u':
            if self.cenv.cwdir != self.basedir:
                self.cenv.cwdir = os.path.dirname(self.cenv.cwdir)
                self.dircontents.clear()
                self.cenv.set_prompt()
                print 'Now at', self.cenv.cwdir
            else:
                print 'Already at top', self.cenv.cwdir
            result = True
        elif command == 'E' or command == 'e':
            self.dircontents.show(self.cenv.host, self.cenv.cwdir, 1,
                                  self.filtertype, self.entry_is_not_ignored)
            result = True
        elif command == 'C' or command == 'c':
            self.do_file_contents()
            result = True
        elif command == 'F' or command == 'f':
            self.do_filter()
            result = True
        elif command == 'R' or command == 'r':
            continuing = True
            while continuing:
                command = self.show_menu('rule')
                continuing = self.do_command(command, 'rule', self.cenv.cwdir)
            result = True
        elif command == 'M' or command == 'm':
            result = self.do_mark()
        elif command == 'Q' or command == 'q' or command == '':
            print "quitting this level"
            result = None
        else:
            clouseau.retention.utils.cliutils.show_help('examine')
            result = True
        return result

    def do_top(self, command, dir_path):
        result = True
        if command == 'S' or command == 's':
            continuing = True
            while continuing:
                command = self.show_menu('status')
                continuing = self.do_command(command, 'status', dir_path)
        elif command == 'E' or command == 'e':
            self.dircontents.show(self.cenv.host, self.cenv.cwdir, 1,
                                  self.filtertype, self.entry_is_not_ignored)
            continuing = True
            while continuing:
                # fixme this should let the user page through batches,
                # not use '1' every time
                command = self.show_menu('examine')
                continuing = self.do_command(command, 'examine',
                                             self.cenv.cwdir)
        elif command == 'F' or command == 'f':
            self.do_filter()
        elif command == 'I' or command == 'i':
            # do nothing
            result = command
        elif command == 'R' or command == 'r':
            continuing = True
            while continuing:
                command = self.show_menu('rule')
                continuing = self.do_command(command, 'rule', self.cenv.cwdir)
        elif command == 'Q' or command == 'q':
            result = None
        else:
            clouseau.retention.utils.cliutils.show_help('top')
        return result

    def do_command(self, command, level, dir_path):
        result = None
        if self.basedir is None:
            self.basedir = dir_path
        if self.cenv.cwdir is None:
            self.cenv.cwdir = dir_path

        if command is None:
            return None

        if level == 'top':
            result = self.do_top(command, dir_path)
        elif level == 'status':
            if command in Status.STATUSES:
                # this option is invoked on a directory so
                # type is dir every time
                clouseau.retention.utils.ruleutils.do_add_rule(
                    self.cdb, dir_path,
                    clouseau.retention.utils.ruleutils.text_to_entrytype(
                        'dir'), command, self.cenv.host)
                return None
            elif command == 'Q' or command == 'q':
                return None
            else:
                clouseau.retention.utils.cliutils.show_help(level)
                result = True
        elif level == 'examine':
            result = self.do_examine(command)
        elif level == 'rule':
            result = self.do_rule(command)
        return result
class RemoteFilesAuditor(object):
    '''
    audit files across a set of remote hosts,
    in a specified set of directories
    '''
    def __init__(self,
                 hosts_expr,
                 audit_type,
                 confdir=None,
                 prettyprint=False,
                 show_content=False,
                 dirsizes=False,
                 summary_report=False,
                 depth=2,
                 to_check=None,
                 ignore_also=None,
                 timeout=60,
                 maxfiles=None,
                 store_filepath=None,
                 verbose=False):
        '''
        hosts_expr:   list or grain-based or wildcard expr for hosts
                      to be audited
        audit_type:   type of audit e.g. 'logs', 'homes'
        confdir:      directory where the yaml config files are stored
        prettyprint:  nicely format the output display
        show_content: show the first line or so from problematic files
        dirsizes:     show only directories which have too many files to
                      audit properly, don't report on files at all
        summary_report: do a summary of results instead of detailed
                        this means different thiings depending on the audit
                        type
        depth:        the auditor will give up if a directory has too any files
                      it (saves it form dying on someone's 25gb homedir).
                      this option tells it how far down the tree to go from
                      the top dir of the audit, before starting to count.
                      e.g. do we count in /home/ariel or separately in
                      /home/ariel/* or in /home/ariel/*/*, etc.
        to_check:     comma-separated list of dirs (must end in '/') and/or
                      files that will be checked; if this is None then
                      all dirs/files will be checked
        ignore_also:  comma-separated list of dirs (must end in '/') and/or
                      files that will be skipped in addition to the ones
                      in the config, rules, etc.
        timeout:      salt timeout for running remote commands
        maxfiles:     how many files in a directory tree is too many to audit
                      (at which point we warn about that and move on)
        store_filepath: full path to rule store (sqlite3 db)
        verbose:      show informative messages during processing
        '''

        self.hosts_expr = hosts_expr
        self.audit_type = audit_type
        self.confdir = confdir
        self.locations = audit_type + "_locations"
        self.prettyprint = prettyprint
        self.show_sample_content = show_content
        self.dirsizes = dirsizes
        self.show_summary = summary_report
        self.depth = depth + 1  # actually count of path separators in dirname
        self.to_check = to_check

        self.ignore_also = ignore_also
        self.timeout = timeout
        self.store_filepath = store_filepath
        self.verbose = verbose

        self.max_files = maxfiles
        self.set_up_max_files(maxfiles)

        self.magic = clouseau.retention.utils.magic.magic_open(
            clouseau.retention.utils.magic.MAGIC_NONE)
        self.magic.load()
        self.summary = None
        self.display_from_dict = FileInfo.display_from_dict
        self.runner = None

        if self.hosts_expr == "127.0.0.1" or self.hosts_expr == "localhost":
            # run locally
            self.localaudit = True
            self.expanded_hosts = []
            self.cdb = None
        else:
            self.localaudit = False
            clouseau.retention.utils.config.set_up_conf(confdir)
            client = LocalClientPlus()
            hosts, expr_type = Runner.get_hosts_expr_type(self.hosts_expr)
            self.expanded_hosts = client.cmd_expandminions(hosts,
                                                           "test.ping",
                                                           expr_form=expr_type)

            self.cdb = RuleStore(self.store_filepath)
            self.cdb.store_db_init(self.expanded_hosts)
            self.set_up_and_export_rule_store()

    def get_audit_args(self):
        audit_args = [
            self.confdir, self.show_sample_content, self.dirsizes,
            self.depth - 1, self.to_check, self.ignore_also, self.max_files
        ]
        return audit_args

    def set_up_runner(self):

        self.runner = Runner(self.confdir, self.store_filepath,
                             self.hosts_expr,
                             self.expanded_hosts, self.audit_type,
                             self.get_audit_args(), self.show_sample_content,
                             self.to_check, self.timeout, self.verbose)

    def set_up_max_files(self, maxfiles):
        '''
        more than this many files in a subdir we won't process,
        we'll just try to name top offenders

        if we've been asked only to report dir trees that are
        too large in this manner, we can set defaults mich
        higher, since we don't stat files, open them to guess
        their filetype, etc; processing then goes much quicker
        '''

        if maxfiles is None:
            if self.dirsizes:
                self.max_files = 1000
            else:
                self.max_files = 100
        else:
            self.max_files = maxfiles

    def set_up_and_export_rule_store(self):
        hosts = self.cdb.store_db_list_all_hosts()
        destdir = os.path.join(os.path.dirname(self.store_filepath),
                               "data_retention.d")
        if not os.path.isdir(destdir):
            os.makedirs(destdir, 0755)
        for host in hosts:
            all_destpath = os.path.join(destdir, host + "_store.yaml")
            clouseau.retention.utils.ruleutils.export_rules(
                self.cdb, all_destpath, host)
            good_destpath = os.path.join(destdir, host + "_store_good.yaml")
            clouseau.retention.utils.ruleutils.export_rules(
                self.cdb, good_destpath, host, Status.text_to_status('good'))

    def normalize(self, fname):
        '''
        subclasses may want to do something different, see
        LogsAuditor for an example
        '''
        return fname

    @staticmethod
    def get_dirname_from_warning(warning):
        '''
        some audit output lines warn about directory trees
        having too many files to audit; grab the dirname
        out of such a line and return it
        '''
        start = "WARNING: directory "
        if warning.startswith(start):
            # WARNING: directory %s has more than %d files
            rindex = warning.rfind(" has more than")
            if not rindex:
                return None
            else:
                return warning[len(start):rindex]

        start = "WARNING: too many files to audit in directory "
        if warning.startswith(start):
            return warning[len(start):]

        return None

    def add_stats(self, item, summary):
        '''
        gather stats on how many files/dirs
        may be problematic; summary is where the results
        are collected, item is the item to include in
        the summary if needed
        '''
        dirname = os.path.dirname(item['path'])

        if dirname not in summary:
            summary[dirname] = {
                'binary': {
                    'old': 0,
                    'maybe_old': 0,
                    'nonroot': 0
                },
                'text': {
                    'old': 0,
                    'maybe_old': 0,
                    'nonroot': 0
                }
            }
        if item['binary'] is True:
            group = 'binary'
        else:
            group = 'text'

        if item['old'] == 'T':
            summary[dirname][group]['old'] += 1
        elif item['old'] == '-':
            summary[dirname][group]['maybe_old'] += 1
        if item['owner'] != 0:
            summary[dirname][group]['nonroot'] += 1
        return summary

    def display_host_summary(self):
        if self.summary is not None:
            paths = sorted(self.summary.keys())
            for path in paths:
                for group in self.summary[path]:
                    if (self.summary[path][group]['old'] > 0
                            or self.summary[path][group]['maybe_old'] > 0
                            or self.summary[path][group]['nonroot'] > 0):
                        print(
                            "in directory %s, (%s), %d old,"
                            " %d maybe old, %d with non root owner" %
                            (path, group, self.summary[path][group]['old'],
                             self.summary[path][group]['maybe_old'],
                             self.summary[path][group]['nonroot']))

    def display_summary(self, result):
        for host in result:
            self.summary = {}
            print "host:", host

            if result[host]:
                try:
                    lines = result[host].split('\n')
                    for line in lines:
                        if display_summary_line(line):
                            continue
                        else:
                            try:
                                item = json.loads(
                                    line, object_hook=JsonHelper.decode_dict)
                                if item['empty'] is not True:
                                    self.add_stats(item, self.summary)
                            except:
                                print "WARNING: failed to json load from host",
                                print host, "this line:", line
                    self.display_host_summary()
                except:
                    print "WARNING: failed to process output from host"
            else:
                if self.verbose:
                    print "WARNING: no output from host", host

    def display_remote_host(self, result):
        try:
            lines = result.split('\n')
            files = []
            for line in lines:
                if line == "":
                    continue
                elif line.startswith("WARNING:") or line.startswith("INFO:"):
                    print line
                else:
                    files.append(
                        json.loads(line, object_hook=JsonHelper.decode_dict))

            if files == []:
                return
            path_justify = max([len(finfo['path']) for finfo in files]) + 2
            for finfo in files:
                self.display_from_dict(finfo, self.show_sample_content,
                                       path_justify)
        except:
            print "WARNING: failed to load json from host"

    def get_local_auditor(self):
        return LocalFilesAuditor(self.audit_type, self.confdir,
                                 self.show_sample_content, self.dirsizes,
                                 self.depth, self.to_check, self.ignore_also,
                                 self.max_files)

    def audit_hosts(self):
        # do local audit instead
        if self.localaudit:
            localauditor = self.get_local_auditor()
            result = localauditor.do_local_audit()
            self.display_remote_host(result)
            return

        # proceed to regular remote audit
        self.set_up_runner()
        result = self.runner.run_remotely()
        if result is None:
            print "WARNING: failed to get output from audit script on any host"
        elif self.show_summary:
            self.display_summary(result)
        else:
            for host in result:
                print "host:", host
                if result[host]:
                    self.display_remote_host(result[host])
                else:
                    if self.verbose:
                        print "no output from host", host
        # add some results to rule store
        self.update_status_rules_from_report(result)
        return result

    def update_status_rules_from_report(self, report):
        hostlist = report.keys()
        for host in hostlist:
            try:
                problem_rules = clouseau.retention.utils.ruleutils.get_rules(
                    self.cdb, host, Status.text_to_status('problem'))
            except:
                print 'WARNING: problem retrieving problem rules for host', host
                problem_rules = None
            if problem_rules is not None:
                existing_problems = [rule['path'] for rule in problem_rules]
            else:
                existing_problems = []

            dirs_problem, dirs_skipped = get_dirs_toexamine(report[host])
            if dirs_problem is not None:
                dirs_problem = list(set(dirs_problem))
                for dirname in dirs_problem:
                    clouseau.retention.utils.ruleutils.do_add_rule(
                        self.cdb, dirname,
                        clouseau.retention.utils.ruleutils.text_to_entrytype(
                            'dir'), Status.text_to_status('problem'), host)

            if dirs_skipped is not None:
                dirs_skipped = list(set(dirs_skipped))
                for dirname in dirs_skipped:
                    if dirname in dirs_problem or dirname in existing_problems:
                        # problem report overrides 'too many to audit'
                        continue
                    clouseau.retention.utils.ruleutils.do_add_rule(
                        self.cdb, dirname,
                        clouseau.retention.utils.ruleutils.text_to_entrytype(
                            'dir'), Status.text_to_status('unreviewed'), host)
Beispiel #5
0
class CommandLine(object):
    '''
    prompt user at the command line for actions to take on a given
    directory or file, show results
    '''
    # todo: down and up should check you really are (descending,
    # ascending path)

    def __init__(self, confdir, store_filepath, timeout, audit_type,
                 ignore_also=None, hosts_expr=None):
        self.confdir = confdir

        self.cdb = RuleStore(store_filepath)
        self.cdb.store_db_init(None)

        self.timeout = timeout
        self.audit_type = audit_type
        self.locations = audit_type + "_locations"
        self.hosts_expr = hosts_expr

        self.basedir = None

        clouseau.retention.utils.cliutils.init_readline_hist()
        # this is arbitrary, can tweak it later
        # how many levels down we keep in our list of
        # top-level dirs from which the user can start
        # their interactive session
        self.max_depth_top_level = 3

        self.filtertype = 'all'

        # fixme completely wrong
        self.batchno = 1

        clouseau.retention.utils.config.set_up_conf(self.confdir)

        # duplicate all the ignores except for the uh
        # ones specific to a host. those will be done
        # at host choice time
        # this includes rules, we will do those at host choice time too
        # we want: global, perhost, ignore_also (if there were any)

        self.local_ignored = None
        self.ignores = Ignores(self.confdir)
        self.ignored_from_rulestore = {}
        self.ignored_also = clouseau.retention.utils.ignores.convert_ignore_also_to_ignores(
            ignore_also)

        self.dircontents = CurrentDirContents(self.timeout)
        self.cenv = CurrentEnv()
        self.cmpl = Completion(self.dircontents, self.cenv, self.max_depth_top_level)

    def do_one_host(self, host, report):
        self.set_host(host)
        results = clouseau.retention.utils.ignores.get_ignored_from_rulestore(self.cdb, [host])
        if host in results:
            self.ignored_from_rulestore[host] = results[host]

        if host not in report:
            dirs_problem = None
            dirs_skipped = None
        else:
            dirs_problem, dirs_skipped = get_dirs_toexamine(report[host])
        self.cenv.set_reported_dirs(dirs_problem, dirs_skipped)
        if self.cenv.problem_dirs is None and self.cenv.skipped_dirs is None:
            print "No report available from this host"
        elif len(self.cenv.problem_dirs) == 0 and len(self.cenv.skipped_dirs) == 0:
            print "No problem dirs and no skipped dirs on this host"
        else:
            dirs_problem_to_depth = [clouseau.retention.utils.cliutils.get_path_prefix(
                d, self.max_depth_top_level) for d in dirs_problem]
            dirs_skipped = [s for s in dirs_skipped
                            if s not in dirs_problem_to_depth]
            relevant_dirs = (sorted(list(set(dirs_problem_to_depth))) +
                             sorted(list(set(dirs_skipped))))
            while True:
                dir_todo = self.cmpl.prompt_for_dir()
                if dir_todo is None:
                    print "Done with this host"
                    break
                elif dir_todo not in relevant_dirs:
                    print "Please choose one of the following directories:"
                    # fixme another arbitrary setting
                    clouseau.retention.utils.cliutils.print_columns(relevant_dirs, 5)
                else:
                    self.basedir = None
                    self.cenv.cwdir = None
                    self.do_one_directory(dir_todo)

    def run(self, report):
        '''
        call with full report output (not summary) across
        hosts, this will permit the user to examine
        directories and files of specified hosts and
        add/update rules for those dirs and files
        '''
        self.cenv.set_hosts(report.keys())
        while True:
            host_todo = self.cmpl.prompt_for_host()
            if host_todo is None:
                print "exiting at user request"
                break
            else:
                usercfgrab = RemoteUserCfGrabber(host_todo, self.timeout,
                                                 self.audit_type, self.confdir)
                to_convert = usercfgrab.run(True)
                self.local_ignored = clouseau.retention.utils.ignores.process_local_ignores(
                    to_convert)

                results = clouseau.retention.utils.ignores.get_ignored_from_rulestore(
                    self.cdb, [host_todo])
                if host_todo in results:
                    self.ignored_from_rulestore[host_todo] = results[host_todo]

                self.do_one_host(host_todo, report)

    def set_host(self, host):
        self.cenv.host = host

    def do_one_directory(self, path):
        '''
        given a list which contains absolute paths for the
        subdirectories / files of a given directory, (we don't
        go more than one level down, it's likely to be too much),
        ask the user what status to give this directory, and
        show the user information for each contained dir/file if
        desired, as well as info about the directory
        '''
        while True:
            todo = self.get_do_command(path)
            if todo is None:
                break

    def get_do_command(self, path):
        command = self.show_menu('top')
        return self.do_command(command, 'top', path)

    def get_menu_entry(self, choices, default, text):
        self.cmpl.set_choices_completion(choices, default)
        self.cenv.set_prompt()
        command = raw_input(self.cenv.prompt + ' ' + text + " [%s]: " % default)
        command = command.strip()
        if command == "":
            command = default
        return command

    def show_menu(self, level):
        if level == 'top':
            text = ("S(set status)/E(examine directory)/"
                    "Filter directory listings/"
                    "I(ignore)/R(manage rules)/Q(quit menu)")
            command = self.get_menu_entry(['S', 'E', 'I', 'F', 'R', 'Q'], 'S', text)
        elif level == 'status':
            text = Status.get_statuses_prompt(", ") + ", Q(quit status menu)"
            command = self.get_menu_entry(Status.STATUSES + ['Q'], text,
                                          Status.text_to_status('good'))
            if command == 'Q' or command == 'q':
                level = 'top'
        elif level == 'examine':
            text = ("D(down a level)/U(up a level)/E(show entries)/"
                    "C(show contents of file)/R(show rules)/"
                    "F(filter directory listings/"
                    "M(mark file(s))/Q(quit examine menu)")
            command = self.get_menu_entry(['D', 'U', 'E', 'F', 'C', 'R', 'M', 'Q'], 'E', text)
            if command == 'Q' or command == 'q':
                level = 'top'
        elif level == 'rule':
            text = ("S(show all rules of type)/D(show rules covering dir)/"
                    "C(show rules covering dir contents)/"
                    "A(add rule to rules store)/"
                    "R(remove rule from rules store/"
                    "E(export rules from store to file)/"
                    "I(import rules from file to store)/Q(quit rule menu)")
            command = self.get_menu_entry(['S', 'C', 'A', 'R', 'E', 'I', 'Q'], 'D', text)
            if command == 'Q' or command == 'q':
                level = 'top'
        else:
            command = None
        return command

    def get_file_contents(self, path):
        # get 20 lines and hope that's enough for the user to evaluate
        # fixme the number of lines should be configurable
        fileexamin = RemoteFileExaminer(path, self.cenv.host, 20, self.timeout, quiet=True)
        contents = fileexamin.run()
        return contents

    def get_basedir_from_path(self, path):
        for location in clouseau.retention.utils.config.conf[self.locations]:
            if path == location or path.startswith(location + os.path.sep):
                return location
        # fixme is this really the right fallback? check it
        return '/'

    def entry_is_not_ignored(self, path, entrytype, do_check):
        '''
        see if the given entry is in NOT in the ingored lists and return
        True if so, False otherwise
        we only do this check if the do_check argment is set to 'check';
        otherwise we default to True
        '''
        if do_check != 'check':
            return True

        basedir = self.get_basedir_from_path(path)
        if self.audit_type == 'logs' and entrytype == 'file':
            path = LocalLogsAuditor.normalize(path)

        if entrytype == 'file':
            checker = clouseau.retention.utils.ignores.file_is_ignored
            dirs = False
        else:
            checker = clouseau.retention.utils.ignores.dir_is_ignored
            dirs = True
            for ignored in [self.ignores.global_ignored,
                            self.ignored_also]:
                if dirs:
                    result = checker(path, ignored)
                else:
                    result = checker(path, basedir, ignored)
                if result:
                    return False

            for ignored in [self.ignores.perhost_ignored,
                            self.ignored_from_rulestore]:
                if self.cenv.host in ignored:
                    if dirs:
                        result = checker(path, ignored[self.cenv.host])
                    else:
                        result = checker(path, basedir, ignored[self.cenv.host])
                    if result:
                        return False

        return True

    def get_entries_from_wildcard(self, file_expr):
        '''
        get entries from cwdir that match the
        expression
        '''
        # fixme that dang batchno, what a bad idea it was
        self.dircontents.get(self.cenv.host, self.cenv.cwdir, 1)
        # one wildcard only, them's the breaks
        if '*' in file_expr:
            start, end = file_expr.split('*', 1)
            return [c for c in self.dircontents.entries_dict
                    if (c.startswith(start) and
                        c.endswith(end) and
                        len(c) >= len(start) + len(end))]
        elif file_expr in self.dircontents.entries_dict:
            return [file_expr]
        else:
            return []

    def do_mark(self):
        readline.set_completer(self.cmpl.dir_entries_completion)
        file_expr = raw_input("file or dirname expression (empty to quit): ")
        file_expr = file_expr.strip()
        if file_expr == '':
            return True
        if file_expr[-1] == os.path.sep:
            file_expr = file_expr[:-1]
        if '*' in file_expr:
            entries_todo = self.get_entries_from_wildcard(file_expr)
        else:
            entries_todo = [file_expr]
            self.dircontents.get(self.cenv.host, self.cenv.cwdir, self.batchno)
            if not self.dircontents.entries:
                print 'failed to get directory contents for', self.cenv.cwdir
                print 'marking dirs/files regardless'
        for entry in entries_todo:
            if entry not in self.dircontents.entries_dict:
                print 'skipping %s, not in current dir listing' % entry
                print self.dircontents.entries_dict
                continue
            filetype = clouseau.retention.utils.ruleutils.entrytype_to_text(
                self.dircontents.entries_dict[entry]['type'])
            if filetype == 'link':
                print 'No need to mark', file_expr, 'links are always skipped'
                continue
            elif filetype != 'dir' and filetype != 'file':
                print 'Not a dir or regular file, no need to mark, skipping'
                continue
            status = Status.text_to_status('good')
            clouseau.retention.utils.ruleutils.do_add_rule(
                self.cdb, file_expr, filetype, status, self.cenv.host)
        return True

    def do_add_rule(self):
        # fixme need different completer here I think, that
        # completes relative to self.cwdir
        readline.set_completer(None)
        path = raw_input("path or wildcard expr in rule (empty to quit): ")
        path = path.strip()
        if path == '':
            return True
        default = Status.text_to_status('good')
        self.cmpl.set_choices_completion(Status.STATUSES + ['Q'], default)
        while True:
            statuses_text = Status.get_statuses_prompt(", ")
            status = raw_input(statuses_text + " Q(quit)) [%s]: " %
                               default)
            status = status.strip()
            if status == "":
                status = default
            if status[0].upper() in Status.STATUSES:
                status = status[0].upper()
                break
            elif status == 'q' or status == 'Q':
                return None
            else:
                print "Unknown status type"
                continue

        # fixme should check that any wildcard is only one and only
        # in the last component... someday

        if path[0] != os.path.sep:
            path = os.path.join(self.cenv.cwdir, path)
        if path[-1] == os.path.sep:
            path = path[:-1]
            filetype = clouseau.retention.utils.ruleutils.text_to_entrytype('dir')
        else:
            filetype = clouseau.retention.utils.ruleutils.text_to_entrytype('file')

        clouseau.retention.utils.ruleutils.do_add_rule(
            self.cdb, path, filetype, status, self.cenv.host)
        # update the ignores list since we have a new rule
        results = clouseau.retention.utils.ignores.get_ignored_from_rulestore(
            self.cdb, [self.cenv.host])
        if self.cenv.host in results:
            self.ignored_from_rulestore[self.cenv.host] = results[self.cenv.host]
        return True

    def do_show_rules_with_status(self):
        default = Status.text_to_status('problem')
        self.cmpl.set_choices_completion(['A'] + Status.STATUSES + ['Q'], default)
        while True:
            statuses_text = Status.get_statuses_prompt(", ")
            status = raw_input("status type A(all), " + statuses_text +
                               ", Q(quit)) [%s]: " % default)
            status = status.strip()
            if status == "":
                status = default

            if status == 'q' or status == 'Q':
                return None
            elif status[0].upper() not in ['A'] + Status.STATUSES:
                print "Unknown status type"
                continue

            readline.set_completer(None)
            prefix = raw_input("starting with prefix? [/]: ")
            prefix = prefix.strip()
            if prefix == "":
                prefix = "/"
            if status == 'a' or status == 'A':
                clouseau.retention.utils.ruleutils.show_rules(
                    self.cdb, self.cenv.host, prefix=prefix)
                return True
            elif status[0].upper() in Status.STATUSES:
                clouseau.retention.utils.ruleutils.show_rules(
                    self.cdb, self.cenv.host, status[0].upper(), prefix=prefix)
                return True

    def do_remove_rule(self):
        # fixme need different completer here I think, that
        # completes relative to self.cwdir
        readline.set_completer(None)
        path = raw_input("path or wildcard expr in rule (empty to quit): ")
        path = path.strip()
        if path == '':
            return True
        elif path[0] != os.path.sep:
            path = os.path.join(self.cenv.cwdir, path)
        if path[-1] == os.path.sep:
            path = path[:-1]
        clouseau.retention.utils.ruleutils.do_remove_rule(self.cdb, path, self.cenv.host)
        # update the ignores list since we removed a rule
        results = clouseau.retention.utils.ignores.get_ignored_from_rulestore(
            self.cdb, [self.cenv.host])
        if self.cenv.host in results:
            self.ignored_from_rulestore[self.cenv.host] = results[self.cenv.host]
        return True

    def get_rules_path(self):
        readline.set_completer(None)
        rules_path = raw_input("full path to rules file (empty to quit): ")
        rules_path = rules_path.strip()
        if rules_path == '':
            return rules_path
        if not clouseau.retention.utils.cliutils.check_rules_path(rules_path):
            print "bad rules file path specified, aborting"
            return ''
        return rules_path

    def do_rule(self, command):
        if command == 'A' or command == 'a':
            result = self.do_add_rule()
        elif command == 'S' or command == 's':
            result = self.do_show_rules_with_status()
        elif command == 'D' or command == 'd':
            self.dircontents.get(self.cenv.host, self.cenv.cwdir, self.batchno)
            clouseau.retention.utils.ruleutils.get_rules_for_path(self.cdb, self.cenv.cwdir,
                                                                  self.cenv.host)
            result = True
        elif command == 'C' or command == 'c':
            self.dircontents.get(self.cenv.host, self.cenv.cwdir, self.batchno)
            clouseau.retention.utils.ruleutils.get_rules_for_entries(
                self.cdb, self.cenv.cwdir,
                self.dircontents.entries_dict,
                self.cenv.host)
            result = True
        elif command == 'R' or command == 'r':
            result = self.do_remove_rule()
        elif command == 'I' or command == 'i':
            rules_path = self.get_rules_path()
            if rules_path != '':
                clouseau.retention.utils.ruleutils.import_rules(self.cdb, rules_path,
                                                                self.cenv.host)
            result = True
        elif command == 'E' or command == 'e':
            rules_path = self.get_rules_path()
            if rules_path != '':
                clouseau.retention.utils.ruleutils.export_rules(self.cdb, rules_path,
                                                                self.cenv.host)
            result = True
        elif command == 'Q' or command == 'q':
            print "quitting this level"
            result = None
        else:
            clouseau.retention.utils.cliutils.show_help('rule')
            result = True
        return result

    def do_file_contents(self):
        # fixme need a different completer here... meh
        readline.set_completer(None)
        filename = raw_input("filename (empty to quit): ")
        filename = filename.strip()
        if filename == '':
            return
        if filename[0] != os.path.sep:
            filename = os.path.join(self.cenv.cwdir, filename)
        contents = self.get_file_contents(filename)
        if contents is not None:
            print contents
        else:
            print "failed to get contents of file"

    def do_filter(self):
        default = 'C'
        self.cmpl.set_choices_completion(['A', 'D', 'F', 'C', 'Q'], default)
        while True:
            filtertype = raw_input("filter A(all), D(directories only),"
                                   " F(files only),"
                                   " C(Entries checked (not ignored),"
                                   " Q(quit)) [?]: ")
            filtertype = filtertype.strip()
            if filtertype == "":
                filtertype = default
            if filtertype == 'a' or filtertype == 'A':
                self.filtertype = 'all'
                return True
            elif filtertype == 'D' or filtertype == 'd':
                self.filtertype = 'dir'
                return True
            elif filtertype == 'F' or filtertype == 'f':
                self.filtertype = 'file'
                return True
            elif filtertype == 'C' or filtertype == 'c':
                self.filtertype = 'check'
                return True
            elif filtertype == 'q' or filtertype == 'Q':
                return None
            else:
                print "Unknown filter type"
                continue

    def do_dir_descend(self, command):
        while True:
            # prompt user for dir to descend
            readline.set_completer(self.cmpl.dir_completion)
            self.cenv.set_prompt()
            directory = raw_input(self.cenv.prompt + ' ' + "directory name (empty to quit): ")
            directory = directory.strip()
            if directory == '':
                return command
            if directory[-1] == os.path.sep:
                directory = directory[:-1]
            if (directory[0] == '/' and
                    not directory.startswith(self.cenv.cwdir + os.path.sep)):
                print 'New directory is not a subdirectory of',
                print self.cenv.cwdir, "skipping"
            else:
                self.cenv.cwdir = os.path.join(self.cenv.cwdir,
                                               directory)
                self.dircontents.clear()
                self.cenv.set_prompt()
                print 'Now at', self.cenv.cwdir
                return True

    def do_examine(self, command):
        if command == 'D' or command == 'd':
            return self.do_dir_descend(command)
        elif command == 'U' or command == 'u':
            if self.cenv.cwdir != self.basedir:
                self.cenv.cwdir = os.path.dirname(self.cenv.cwdir)
                self.dircontents.clear()
                self.cenv.set_prompt()
                print 'Now at', self.cenv.cwdir
            else:
                print 'Already at top', self.cenv.cwdir
            result = True
        elif command == 'E' or command == 'e':
            self.dircontents.show(self.cenv.host, self.cenv.cwdir, 1,
                                  self.filtertype, self.entry_is_not_ignored)
            result = True
        elif command == 'C' or command == 'c':
            self.do_file_contents()
            result = True
        elif command == 'F' or command == 'f':
            self.do_filter()
            result = True
        elif command == 'R' or command == 'r':
            continuing = True
            while continuing:
                command = self.show_menu('rule')
                continuing = self.do_command(command, 'rule', self.cenv.cwdir)
            result = True
        elif command == 'M' or command == 'm':
            result = self.do_mark()
        elif command == 'Q' or command == 'q' or command == '':
            print "quitting this level"
            result = None
        else:
            clouseau.retention.utils.cliutils.show_help('examine')
            result = True
        return result

    def do_top(self, command, dir_path):
        result = True
        if command == 'S' or command == 's':
            continuing = True
            while continuing:
                command = self.show_menu('status')
                continuing = self.do_command(command, 'status', dir_path)
        elif command == 'E' or command == 'e':
            self.dircontents.show(self.cenv.host, self.cenv.cwdir, 1, self.filtertype,
                                  self.entry_is_not_ignored)
            continuing = True
            while continuing:
                # fixme this should let the user page through batches,
                # not use '1' every time
                command = self.show_menu('examine')
                continuing = self.do_command(command, 'examine',
                                             self.cenv.cwdir)
        elif command == 'F' or command == 'f':
            self.do_filter()
        elif command == 'I' or command == 'i':
            # do nothing
            result = command
        elif command == 'R' or command == 'r':
            continuing = True
            while continuing:
                command = self.show_menu('rule')
                continuing = self.do_command(command, 'rule', self.cenv.cwdir)
        elif command == 'Q' or command == 'q':
            result = None
        else:
            clouseau.retention.utils.cliutils.show_help('top')
        return result

    def do_command(self, command, level, dir_path):
        result = None
        if self.basedir is None:
            self.basedir = dir_path
        if self.cenv.cwdir is None:
            self.cenv.cwdir = dir_path

        if command is None:
            return None

        if level == 'top':
            result = self.do_top(command, dir_path)
        elif level == 'status':
            if command in Status.STATUSES:
                # this option is invoked on a directory so
                # type is dir every time
                clouseau.retention.utils.ruleutils.do_add_rule(
                    self.cdb, dir_path,
                    clouseau.retention.utils.ruleutils.text_to_entrytype('dir'),
                    command, self.cenv.host)
                return None
            elif command == 'Q' or command == 'q':
                return None
            else:
                clouseau.retention.utils.cliutils.show_help(level)
                result = True
        elif level == 'examine':
            result = self.do_examine(command)
        elif level == 'rule':
            result = self.do_rule(command)
        return result
class RemoteFilesAuditor(object):
    '''
    audit files across a set of remote hosts,
    in a specified set of directories
    '''
    def __init__(self, hosts_expr, audit_type,
                 confdir=None,
                 prettyprint=False,
                 show_content=False, dirsizes=False, summary_report=False,
                 depth=2, to_check=None, ignore_also=None,
                 timeout=60, maxfiles=None,
                 store_filepath=None,
                 verbose=False):
        '''
        hosts_expr:   list or grain-based or wildcard expr for hosts
                      to be audited
        audit_type:   type of audit e.g. 'logs', 'homes'
        confdir:      directory where the yaml config files are stored
        prettyprint:  nicely format the output display
        show_content: show the first line or so from problematic files
        dirsizes:     show only directories which have too many files to
                      audit properly, don't report on files at all
        summary_report: do a summary of results instead of detailed
                        this means different thiings depending on the audit
                        type
        depth:        the auditor will give up if a directory has too any files
                      it (saves it form dying on someone's 25gb homedir).
                      this option tells it how far down the tree to go from
                      the top dir of the audit, before starting to count.
                      e.g. do we count in /home/ariel or separately in
                      /home/ariel/* or in /home/ariel/*/*, etc.
        to_check:     comma-separated list of dirs (must end in '/') and/or
                      files that will be checked; if this is None then
                      all dirs/files will be checked
        ignore_also:  comma-separated list of dirs (must end in '/') and/or
                      files that will be skipped in addition to the ones
                      in the config, rules, etc.
        timeout:      salt timeout for running remote commands
        maxfiles:     how many files in a directory tree is too many to audit
                      (at which point we warn about that and move on)
        store_filepath: full path to rule store (sqlite3 db)
        verbose:      show informative messages during processing
        '''

        self.hosts_expr = hosts_expr
        self.audit_type = audit_type
        self.confdir = confdir
        self.locations = audit_type + "_locations"
        self.prettyprint = prettyprint
        self.show_sample_content = show_content
        self.dirsizes = dirsizes
        self.show_summary = summary_report
        self.depth = depth + 1  # actually count of path separators in dirname
        self.to_check = to_check

        self.ignore_also = ignore_also
        self.timeout = timeout
        self.store_filepath = store_filepath
        self.verbose = verbose

        self.max_files = maxfiles
        self.set_up_max_files(maxfiles)

        self.magic = clouseau.retention.utils.magic.magic_open(
            clouseau.retention.utils.magic.MAGIC_NONE)
        self.magic.load()
        self.summary = None
        self.display_from_dict = FileInfo.display_from_dict
        self.runner = None

        if self.hosts_expr == "127.0.0.1" or self.hosts_expr == "localhost":
            # run locally
            self.localaudit = True
            self.expanded_hosts = []
            self.cdb = None
        else:
            self.localaudit = False
            clouseau.retention.utils.config.set_up_conf(confdir)
            client = LocalClientPlus()
            hosts, expr_type = Runner.get_hosts_expr_type(self.hosts_expr)
            self.expanded_hosts = client.cmd_expandminions(
                hosts, "test.ping", expr_form=expr_type)

            self.cdb = RuleStore(self.store_filepath)
            self.cdb.store_db_init(self.expanded_hosts)
            self.set_up_and_export_rule_store()

    def get_audit_args(self):
        audit_args = [self.confdir,
                      self.show_sample_content,
                      self.dirsizes,
                      self.depth - 1,
                      self.to_check,
                      self.ignore_also,
                      self.max_files]
        return audit_args

    def set_up_runner(self):

        self.runner = Runner(self.confdir,
                             self.store_filepath,
                             self.hosts_expr,
                             self.expanded_hosts,
                             self.audit_type,
                             self.get_audit_args(),
                             self.show_sample_content,
                             self.to_check,
                             self.timeout,
                             self.verbose)

    def set_up_max_files(self, maxfiles):
        '''
        more than this many files in a subdir we won't process,
        we'll just try to name top offenders

        if we've been asked only to report dir trees that are
        too large in this manner, we can set defaults mich
        higher, since we don't stat files, open them to guess
        their filetype, etc; processing then goes much quicker
        '''

        if maxfiles is None:
            if self.dirsizes:
                self.max_files = 1000
            else:
                self.max_files = 100
        else:
            self.max_files = maxfiles

    def set_up_and_export_rule_store(self):
        hosts = self.cdb.store_db_list_all_hosts()
        destdir = os.path.join(os.path.dirname(self.store_filepath),
                               "data_retention.d")
        if not os.path.isdir(destdir):
            os.makedirs(destdir, 0755)
        for host in hosts:
            all_destpath = os.path.join(destdir, host + "_store.yaml")
            clouseau.retention.utils.ruleutils.export_rules(self.cdb, all_destpath, host)
            good_destpath = os.path.join(destdir, host + "_store_good.yaml")
            clouseau.retention.utils.ruleutils.export_rules(self.cdb, good_destpath, host,
                                                            Status.text_to_status('good'))

    def normalize(self, fname):
        '''
        subclasses may want to do something different, see
        LogsAuditor for an example
        '''
        return fname

    @staticmethod
    def get_dirname_from_warning(warning):
        '''
        some audit output lines warn about directory trees
        having too many files to audit; grab the dirname
        out of such a line and return it
        '''
        start = "WARNING: directory "
        if warning.startswith(start):
            # WARNING: directory %s has more than %d files
            rindex = warning.rfind(" has more than")
            if not rindex:
                return None
            else:
                return warning[len(start):rindex]

        start = "WARNING: too many files to audit in directory "
        if warning.startswith(start):
            return warning[len(start):]

        return None

    def add_stats(self, item, summary):
        '''
        gather stats on how many files/dirs
        may be problematic; summary is where the results
        are collected, item is the item to include in
        the summary if needed
        '''
        dirname = os.path.dirname(item['path'])

        if dirname not in summary:
            summary[dirname] = {
                'binary': {'old': 0, 'maybe_old': 0, 'nonroot': 0},
                'text': {'old': 0, 'maybe_old': 0, 'nonroot': 0}
            }
        if item['binary'] is True:
            group = 'binary'
        else:
            group = 'text'

        if item['old'] == 'T':
            summary[dirname][group]['old'] += 1
        elif item['old'] == '-':
            summary[dirname][group]['maybe_old'] += 1
        if item['owner'] != 0:
            summary[dirname][group]['nonroot'] += 1
        return summary

    def display_host_summary(self):
        if self.summary is not None:
            paths = sorted(self.summary.keys())
            for path in paths:
                for group in self.summary[path]:
                    if (self.summary[path][group]['old'] > 0 or
                            self.summary[path][group]['maybe_old'] > 0 or
                            self.summary[path][group]['nonroot'] > 0):
                        print ("in directory %s, (%s), %d old,"
                               " %d maybe old, %d with non root owner"
                               % (path, group, self.summary[path][group]['old'],
                                  self.summary[path][group]['maybe_old'],
                                  self.summary[path][group]['nonroot']))

    def display_summary(self, result):
        for host in result:
            self.summary = {}
            print "host:", host

            if result[host]:
                try:
                    lines = result[host].split('\n')
                    for line in lines:
                        if display_summary_line(line):
                            continue
                        else:
                            try:
                                item = json.loads(
                                    line, object_hook=JsonHelper.decode_dict)
                                if item['empty'] is not True:
                                    self.add_stats(item, self.summary)
                            except:
                                print "WARNING: failed to json load from host",
                                print host, "this line:", line
                    self.display_host_summary()
                except:
                    print "WARNING: failed to process output from host"
            else:
                if self.verbose:
                    print "WARNING: no output from host", host

    def display_remote_host(self, result):
        try:
            lines = result.split('\n')
            files = []
            for line in lines:
                if line == "":
                    continue
                elif line.startswith("WARNING:") or line.startswith("INFO:"):
                    print line
                else:
                    files.append(json.loads(line, object_hook=JsonHelper.decode_dict))

            if files == []:
                return
            path_justify = max([len(finfo['path']) for finfo in files]) + 2
            for finfo in files:
                self.display_from_dict(finfo, self.show_sample_content, path_justify)
        except:
            print "WARNING: failed to load json from host"

    def get_local_auditor(self):
        return LocalFilesAuditor(self.audit_type, self.confdir,
                                 self.show_sample_content, self.dirsizes,
                                 self.depth, self.to_check, self.ignore_also,
                                 self.max_files)

    def audit_hosts(self):
        # do local audit instead
        if self.localaudit:
            localauditor = self.get_local_auditor()
            result = localauditor.do_local_audit()
            self.display_remote_host(result)
            return

        # proceed to regular remote audit
        self.set_up_runner()
        result = self.runner.run_remotely()
        if result is None:
            print "WARNING: failed to get output from audit script on any host"
        elif self.show_summary:
            self.display_summary(result)
        else:
            for host in result:
                print "host:", host
                if result[host]:
                    self.display_remote_host(result[host])
                else:
                    if self.verbose:
                        print "no output from host", host
        # add some results to rule store
        self.update_status_rules_from_report(result)
        return result

    def update_status_rules_from_report(self, report):
        hostlist = report.keys()
        for host in hostlist:
            try:
                problem_rules = clouseau.retention.utils.ruleutils.get_rules(
                    self.cdb, host, Status.text_to_status('problem'))
            except:
                print 'WARNING: problem retrieving problem rules for host', host
                problem_rules = None
            if problem_rules is not None:
                existing_problems = [rule['path'] for rule in problem_rules]
            else:
                existing_problems = []

            dirs_problem, dirs_skipped = get_dirs_toexamine(report[host])
            if dirs_problem is not None:
                dirs_problem = list(set(dirs_problem))
                for dirname in dirs_problem:
                    clouseau.retention.utils.ruleutils.do_add_rule(
                        self.cdb, dirname,
                        clouseau.retention.utils.ruleutils.text_to_entrytype('dir'),
                        Status.text_to_status('problem'), host)

            if dirs_skipped is not None:
                dirs_skipped = list(set(dirs_skipped))
                for dirname in dirs_skipped:
                    if dirname in dirs_problem or dirname in existing_problems:
                        # problem report overrides 'too many to audit'
                        continue
                    clouseau.retention.utils.ruleutils.do_add_rule(
                        self.cdb, dirname,
                        clouseau.retention.utils.ruleutils.text_to_entrytype('dir'),
                        Status.text_to_status('unreviewed'), host)