예제 #1
0
파일: control.py 프로젝트: zhezhe168/telex
def status(nodes):

    util.output("%-10s %-10s %-10s %-13s %-6s %-6s %-20s " %
                ("Name", "Type", "Host", "Status", "Pid", "Peers", "Started"))

    all = isRunning(nodes)
    running = []

    cmds1 = []
    cmds2 = []
    for (node, isrunning) in all:
        if isrunning:
            running += [node]
            cmds1 += [(node, "cat-file", ["%s/.startup" % node.cwd()])]
            cmds2 += [(node, "cat-file", ["%s/.status" % node.cwd()])]

    startups = execute.runHelperParallel(cmds1)
    statuses = execute.runHelperParallel(cmds2)

    startups = dict([(n.tag, success and util.fmttime(output[0]) or "???")
                     for (n, success, output) in startups])
    statuses = dict([(n.tag, success and output[0].split()[0].lower() or "???")
                     for (n, success, output) in statuses])

    peers = {}
    nodes = [n for n in running if statuses[n.tag] == "running"]
    for (node, success, args) in _queryPeerStatus(nodes):
        if success:
            peers[node.tag] = []
            for f in args[0].split():
                (key, val) = f.split("=")
                if key == "peer" and val != "":
                    peers[node.tag] += [val]
        else:
            peers[node.tag] = None

    for (node, isrunning) in all:

        util.output("%-10s " % node.tag, nl=False)
        util.output("%-10s %-10s " % (node.type, node.host), nl=False)

        if isrunning:
            util.output("%-13s " % statuses[node.tag], nl=False)

        elif node.hasCrashed():
            util.output("%-13s " % "crashed", nl=False)
        else:
            util.output("%-13s " % "stopped", nl=False)

        if isrunning:
            util.output("%-6s " % node.getPID(), nl=False)

            if node.tag in peers and peers[node.tag] != None:
                util.output("%-6d " % len(peers[node.tag]), nl=False)
            else:
                util.output("%-6s " % "???", nl=False)

            util.output("%-8s  " % startups[node.tag], nl=False)

        util.output()
예제 #2
0
def status(nodes):

    util.output("%-10s %-10s %-10s %-13s %-6s %-6s %-20s " % ("Name",  "Type", "Host", "Status", "Pid", "Peers", "Started"))

    all = isRunning(nodes)
    running = []

    cmds1 = []
    cmds2 = []
    for (node, isrunning) in all:
        if isrunning:
            running += [node]
            cmds1 += [(node, "cat-file", ["%s/.startup" % node.cwd()])]
            cmds2 += [(node, "cat-file", ["%s/.status" % node.cwd()])]

    startups = execute.runHelperParallel(cmds1)
    statuses = execute.runHelperParallel(cmds2)

    startups = dict([(n.name, success and util.fmttime(output[0]) or "???") for (n, success, output) in startups])
    statuses = dict([(n.name, success and output[0].split()[0].lower() or "???") for (n, success, output) in statuses])

    peers = {}
    nodes = [n for n in running if statuses[n.name] == "running"]
    for (node, success, args) in _queryPeerStatus(nodes):
        if success:
            peers[node.name] = []
            for f in args[0].split():
                keyval = f.split("=")
                if len(keyval) > 1:
                    (key, val) = keyval
                    if key == "peer" and val != "":
                        peers[node.name] += [val]
        else:
            peers[node.name] = None

    for (node, isrunning) in all:

        util.output("%-10s " % node.name, nl=False)
        util.output("%-10s %-10s " % (node.type, node.host), nl=False)

        if isrunning:
            util.output("%-13s " % statuses[node.name], nl=False)

        elif node.hasCrashed():
            util.output("%-13s " % "crashed", nl=False)
        else:
            util.output("%-13s " % "stopped", nl=False)

        if isrunning:
            util.output("%-6s " % node.getPID(), nl=False)

            if node.name in peers and peers[node.name] != None:
                util.output("%-6d " % len(peers[node.name]), nl=False)
            else:
                util.output("%-6s " % "???", nl=False)

            util.output("%-8s  " % startups[node.name], nl=False)

        util.output()
예제 #3
0
def isRunning(nodes, setcrashed=True):

    results = []
    cmds = []

    for node in nodes:
        pid = node.getPID()
        if not pid:
            results += [(node, False)]
            continue

        cmds += [(node, "check-pid", [str(pid)])]

    for (node, success, output) in execute.runHelperParallel(cmds):

        # If we cannot connect to the host at all, we filter it out because
        # the process might actually still be running but we can't tell.
        if output == None:
            if config.Config.cron == "0":
                util.warn("cannot connect to %s" % node.name)
            continue

        results += [(node, success)]

        if not success:
            if setcrashed:
                # Grmpf. It crashed.
                node.clearPID()
                node.setCrashed()

    return results
예제 #4
0
def isRunning(nodes, setcrashed=True):

    results = []
    cmds = []

    for node in nodes:
        pid = node.getPID()
        if not pid:
            results += [(node, False)]
            continue

        cmds += [(node, "check-pid", [str(pid)])]

    for (node, success, output) in execute.runHelperParallel(cmds):

        # If we cannot connect to the host at all, we filter it out because
        # the process might actually still be running but we can't tell.
        if output == None:
            if config.Config.cron == "0":
                util.warn("cannot connect to %s" % node.name)
            continue

        results += [(node, success)]

        if not success:
            if setcrashed:
                # Grmpf. It crashed.
                node.clearPID();
                node.setCrashed()

    return results
예제 #5
0
파일: control.py 프로젝트: zhezhe168/telex
def getDf(nodes):

    dirs = ("logdir", "bindir", "helperdir", "cfgdir", "spooldir", "policydir",
            "libdir", "tmpdir", "staticdir", "scriptsdir")

    df = {}
    for node in nodes:
        df[node.tag] = {}

    for dir in dirs:
        path = config.Config.config[dir]

        cmds = []
        for node in nodes:
            cmds += [(node, "df", [path])]

        results = execute.runHelperParallel(cmds)

        for (node, success, output) in results:
            if success:
                fields = output[0].split()

                # Ignore NFS mounted volumes.
                if fields[0].find(":") < 0:
                    df[node.tag][fields[0]] = fields

    result = {}
    for node in df:
        result[node] = df[node].values()

    return result
예제 #6
0
파일: control.py 프로젝트: zhezhe168/telex
def getCapstatsOutput(nodes, interval):

    if not config.Config.capstats:
        if config.Config.cron == "0":
            util.warn("do not have capstats binary available")
        return []

    results = []
    cmds = []

    hosts = {}
    for node in nodes:
        try:
            hosts[(node.addr, node.interface)] = node
        except AttributeError:
            continue

    for (addr, interface) in hosts.keys():
        node = hosts[addr, interface]

        capstats = [
            config.Config.capstats, "-i", interface, "-I",
            str(interval), "-n", "1"
        ]

        # Unfinished feature: only consider a particular MAC. Works here for capstats
        # but Bro config is not adapted currently so we disable it for now.
        #        try:
        #            capstats += ["-f", "\\'", "ether dst %s" % node.ether, "\\'"]
        #        except AttributeError:
        #            pass

        cmds += [(node, "run-cmd", capstats)]

    outputs = execute.runHelperParallel(cmds)

    for (node, success, output) in outputs:

        if not success:
            results += [(node, "%s: cannot execute capstats" % node.tag, {})]
            continue

        fields = output[0].split()
        vals = {}

        try:
            for field in fields[1:]:
                (key, val) = field.split("=")
                vals[key] = float(val)

            results += [(node, None, vals)]

        except ValueError:
            results += [(node, "%s: unexpected capstats output: %s" %
                         (node.tag, output[0]), {})]

    return results
예제 #7
0
def getDf(nodes):
    hadError = False
    dirs = (
        "logdir",
        "bindir",
        "helperdir",
        "cfgdir",
        "spooldir",
        "policydir",
        "libdir",
        "tmpdir",
        "staticdir",
        "scriptsdir",
    )

    df = {}
    for node in nodes:
        df[node.name] = {}

    for dir in dirs:
        path = config.Config.config[dir]

        cmds = []
        for node in nodes:
            if dir == "logdir" and node.type != "manager":
                # Don't need this on the workers/proxies.
                continue

            cmds += [(node, "df", [path])]

        results = execute.runHelperParallel(cmds)

        for (node, success, output) in results:
            if success:
                if output:
                    fields = output[0].split()

                    # Ignore NFS mounted volumes.
                    if fields[0].find(":") < 0:
                        df[node.name][fields[0]] = fields
                else:
                    util.output("error checking disk space on node '%s': no df output" % node)
                    hadError = True
            else:
                if output:
                    msg = output[0]
                else:
                    msg = "unknown failure"
                util.output("error checking disk space on node '%s': %s" % (node, msg))
                hadError = True

    result = {}
    for node in df:
        result[node] = df[node].values()

    return (hadError, result)
예제 #8
0
파일: control.py 프로젝트: zhezhe168/telex
def executeCmd(nodes, cmd):

    for special in "|'\"":
        cmd = cmd.replace(special, "\\" + special)

    cmds = [(n, "run-cmd", [cmd]) for n in nodes]

    for (node, success, output) in execute.runHelperParallel(cmds):
        util.output("[%s] %s\n> %s" %
                    (node.host,
                     (success and " " or "error"), "\n> ".join(output)))
예제 #9
0
def attachGdb(nodes):
    running = isRunning(nodes)

    cmds = []
    for (node, isrunning) in running:
        if isrunning:
            cmds += [(node, "gdb-attach", ["gdb-%s" % node.name, config.Config.bro, str(node.getPID())])]

    results = execute.runHelperParallel(cmds)
    for (node, success, output) in results:
        if success:
            util.output("gdb attached on %s" % node.name)
        else:
            util.output("cannot attach gdb on %s: %s" % node.name, output)
예제 #10
0
파일: control.py 프로젝트: cubic1271/broctl
def getDf(nodes):
    dirs = ("logdir", "bindir", "helperdir", "cfgdir", "spooldir", "policydir", "libdir", "tmpdir", "staticdir", "scriptsdir")

    df = {}
    for node in nodes:
        df["%s/%s" % (node.name, node.host)] = {}

    for dir in dirs:
        path = config.Config.config[dir]

        cmds = []
        for node in nodes:
            if dir == "logdir" and node.type != "manager":
                # Don't need this on the workers/proxies.
                continue

            cmds += [(node, "df", [path])]

        results = execute.runHelperParallel(cmds)

        for (node, success, output) in results:
            nodehost = "%s/%s" % (node.name, node.host)
            if success:
                if output:
                    fields = output[0].split()

                    # Ignore NFS mounted volumes.
                    if fields[0].find(":") < 0:
                        total = float(fields[1])
                        used = float(fields[2])
                        avail = float(fields[3])
                        perc = used * 100.0 / (used + avail)
                        df[nodehost][fields[0]] = [fields[0], total, used,
                                                   avail, perc]
                else:
                    df[nodehost]["FAIL"] = ["FAIL", "no output from df helper"]
            else:
                if output:
                    msg = output[0]
                else:
                    msg = "unknown failure"
                df[nodehost]["FAIL"] = ["FAIL", msg]

    result = []
    for node in nodes:
        nodehost = "%s/%s" % (node.name, node.host)
        result.append((nodehost, df[nodehost].values()))

    return result
예제 #11
0
파일: control.py 프로젝트: zhezhe168/telex
def _makeCrashReports(nodes):
    cmds = []
    for node in nodes:
        cmds += [(node, "run-cmd", [
            os.path.join(config.Config.scriptsdir, "post-terminate"),
            node.cwd(), "crash"
        ])]

    for (node, success, output) in execute.runHelperParallel(cmds):
        if not success:
            util.output("cannot run post-terminate for %s" % node.tag)
        else:
            util.sendMail("Crash report from %s" % node.tag, "\n".join(output))

        node.clearCrashed()
예제 #12
0
def _makeCrashReports(nodes):

    for n in nodes:
        plugin.Registry.broProcessDied(n)

    cmds = []
    for node in nodes:
        cmds += [(node, "run-cmd",  [os.path.join(config.Config.scriptsdir, "post-terminate"), node.cwd(),  "crash"])]

    for (node, success, output) in execute.runHelperParallel(cmds):
        if not success:
            util.output("cannot run post-terminate for %s" % node.name)
        else:
            util.sendMail("Crash report from %s" % node.name, "\n".join(output))

        node.clearCrashed()
예제 #13
0
파일: control.py 프로젝트: zhezhe168/telex
def attachGdb(nodes):
    running = isRunning(nodes)

    cmds = []
    for (node, isrunning) in running:
        if isrunning:
            cmds += [(node, "gdb-attach",
                      ["gdb-%s" % node.tag, config.Config.bro,
                       node.getPID()])]

    results = execute.runHelperParallel(cmds)
    for (node, success, output) in results:
        if success:
            util.output("gdb attached on %s" % node.tag)
        else:
            util.output("cannot attach gdb on %s: %s" % node.tag, output)
예제 #14
0
def _makeCrashReports(nodes):

    for n in nodes:
        plugin.Registry.broProcessDied(n)

    msg = "If you want to help us debug this problem, then please forward\nthis mail to [email protected]\n"
    cmds = []
    for node in nodes:
        cmds += [(node, "run-cmd", [os.path.join(config.Config.scriptsdir, "post-terminate"), node.cwd(), "crash"])]

    for (node, success, output) in execute.runHelperParallel(cmds):
        if not success:
            util.output("cannot run post-terminate for %s" % node.name)
        else:
            util.sendMail("Crash report from %s" % node.name, msg + "\n".join(output))

        node.clearCrashed()
예제 #15
0
def getDf(nodes):

    dirs = ("logdir", "bindir", "helperdir", "cfgdir", "spooldir", "policydir",
            "libdir", "tmpdir", "staticdir", "scriptsdir")

    df = {}
    for node in nodes:
        df[node.name] = {}

    for dir in dirs:
        path = config.Config.config[dir]

        cmds = []
        for node in nodes:
            if dir == "logdir" and node.type != "manager":
                # Don't need this on the workers/proxies.
                continue

            cmds += [(node, "df", [path])]

        results = execute.runHelperParallel(cmds)

        for (node, success, output) in results:
            if success:
                if len(output) > 0:
                    fields = output[0].split()

                    # Ignore NFS mounted volumes.
                    if fields[0].find(":") < 0:
                        df[node.name][fields[0]] = fields
                else:
                    util.warn("Invalid df output for node '%s'." % node)

    result = {}
    for node in df:
        result[node] = df[node].values()

    return result
예제 #16
0
def getDf(nodes):

    dirs = ("logdir", "bindir", "helperdir", "cfgdir", "spooldir", "policydir", "libdir", "tmpdir", "staticdir", "scriptsdir")

    df = {}
    for node in nodes:
        df[node.name] = {}

    for dir in dirs:
        path = config.Config.config[dir]

        cmds = []
        for node in nodes:
            if dir == "logdir" and node.type != "manager":
                # Don't need this on the workers/proxies.
                continue

            cmds += [(node, "df", [path])]

        results = execute.runHelperParallel(cmds)

        for (node, success, output) in results:
            if success:
                if len(output) > 0:
                    fields = output[0].split()

                    # Ignore NFS mounted volumes.
                    if fields[0].find(":") < 0:
                        df[node.name][fields[0]] = fields
                else:
                    util.warn("Invalid df output for node '%s'." % node)


    result = {}
    for node in df:
        result[node] = df[node].values()

    return result
예제 #17
0
def getCapstatsOutput(nodes, interval):

    if not config.Config.capstatspath:
        if config.Config.cron == "0":
            util.warn("do not have capstats binary available")
        return []

    results = []
    cmds = []

    hosts = {}
    for node in nodes:
        try:
            hosts[(node.addr, node.interface)] = node
        except AttributeError:
            continue

    for (addr, interface) in hosts.keys():
        node = hosts[addr, interface]

        capstats = [config.Config.capstatspath, "-i", interface, "-I", str(interval), "-n", "1"]

# Unfinished feature: only consider a particular MAC. Works here for capstats
# but Bro config is not adapted currently so we disable it for now.
#        try:
#            capstats += ["-f", "\\'", "ether dst %s" % node.ether, "\\'"]
#        except AttributeError:
#            pass

        cmds += [(node, "run-cmd", capstats)]

    outputs = execute.runHelperParallel(cmds)

    totals = {}

    for (node, success, output) in outputs:

        if not success:
            results += [(node, "%s: cannot execute capstats" % node.name, {})]
            continue

        fields = output[0].split()
        vals = { }

        try:
            for field in fields[1:]:
                (key, val) = field.split("=")
                val = float(val)
                vals[key] = val

                try:
                    totals[key] += val
                except KeyError:
                    totals[key] = val

            results += [(node, None, vals)]

        except ValueError:
            results += [(node, "%s: unexpected capstats output: %s" % (node.name, output[0]), {})]

    # Add pseudo-node for totals
    if len(nodes) > 1:
        results += [(node_mod.Node("$total"), None, totals)]

    return results
예제 #18
0
def waitForBros(nodes, status, timeout, ensurerunning):

    # If ensurerunning is true, process must still be running.
    if ensurerunning:
        running = isRunning(nodes)
    else:
        running = [(node, True) for node in nodes]

    results = []

    # Determine set of nodes still to check.
    todo = {}
    for (node, isrunning) in running:
        if isrunning:
            todo[node.name] = node
        else:
            results += [(node, False)]

    more_than_one = (len(todo) > 1)

    points = False
    while True:
        # Determine  whether process is still running. We need to do this
        # before we get the state to avoid a race condition.
        running = isRunning(todo.values(), setcrashed=False)

        # Check nodes' .status file
        cmds = []
        for node in todo.values():
            cmds += [(node, "cat-file", ["%s/.status" % node.cwd()])]

        for (node, success, output) in execute.runHelperParallel(cmds):
            if success:
                try:
                    (stat, loc) = output[0].split()
                    if status in stat:
                        # Status reached. Cool.
                        del todo[node.name]
                        results += [(node, True)]
                except IndexError:
                    # Something's wrong. We give up on that node.
                    del todo[node.name]
                    results += [(node, False)]

        for (node, isrunning) in running:
            if node.name in todo and not isrunning:
                # Alright, a dead node's status will not change anymore.
                del todo[node.name]
                results += [(node, False)]

        if len(todo) == 0:
            # All done.
            break

        # Wait a bit before we start over.
        time.sleep(1)

        # Timeout reached?
        timeout -= 1
        if timeout <= 0:
            break

        if more_than_one:
            util.output("%d " % len(todo), nl=False)
        else:
            util.output(".", nl=False)

        points = True

    for node in todo.values():
        # These did time-out.
        results += [(node, False)]

    if points:
        if more_than_one:
            util.output("%d " % len(todo))
        else:
            util.output("")

    return results
예제 #19
0
def waitForBros(nodes, status, timeout, ensurerunning):

    # If ensurerunning is true, process must still be running.
    if ensurerunning:
        running = isRunning(nodes)
    else:
        running = [(node, True) for node in nodes]

    results = []

    # Determine set of nodes still to check.
    todo = {}
    for (node, isrunning) in running:
        if isrunning:
            todo[node.name] = node
        else:
            results += [(node, False)]

    more_than_one = (len(todo) > 1)

    points = False
    while True:
        # Determine  whether process is still running. We need to do this
        # before we get the state to avoid a race condition.
        running = isRunning(todo.values(), setcrashed=False)

        # Check nodes' .status file
        cmds = []
        for node in todo.values():
            cmds += [(node, "cat-file", ["%s/.status" % node.cwd()])]

        for (node, success, output) in execute.runHelperParallel(cmds):
            if success:
                try:
                    (stat, loc) = output[0].split()
                    if status in stat:
                        # Status reached. Cool.
                        del todo[node.name]
                        results += [(node, True)]
                except IndexError:
                    # Something's wrong. We give up on that node.
                    del todo[node.name]
                    results += [(node, False)]

        for (node, isrunning) in running:
            if node.name in todo and not isrunning:
                # Alright, a dead node's status will not change anymore.
                del todo[node.name]
                results += [(node, False)]

        if len(todo) == 0:
            # All done.
            break

        # Wait a bit before we start over.
        time.sleep(1)

        # Timeout reached?
        timeout -= 1
        if timeout <= 0:
            break

        if more_than_one:
            util.output("%d " % len(todo), nl=False)
        else:
            util.output(".", nl=False)

        points = True

    for node in todo.values():
        # These did time-out.
        results += [(node, False)]

    if points:
        if more_than_one:
            util.output("%d " % len(todo))
        else:
            util.output("")

    return results
예제 #20
0
파일: control.py 프로젝트: cubic1271/broctl
def getTopOutput(nodes):

    results = []
    cmds = []

    running = isRunning(nodes)

    # Get all the PIDs first.

    pids = {}
    parents = {}

    for (node, isrunning) in running:
        if isrunning:
            pid = node.getPID()
            pids[node.name] = [pid]
            parents[node.name] = str(pid)

            cmds += [(node, "get-childs", [str(pid)])]
        else:
            results += [(node, "not running", [{}])]
            continue

    if not cmds:
        return results

    for (node, success, output) in execute.runHelperParallel(cmds):

        if not success:
            results += [(node, "cannot get child pids", [{}])]
            continue

        pids[node.name] += [int(line) for line in output]

    cmds = []
    hosts = {}

    # Now run top once per host.
    for node in nodes:   # Do the loop again to keep the order.
        if node.name not in pids:
            continue

        if node.host in hosts:
            continue

        hosts[node.host] = 1

        cmds += [(node, "top", [])]

    if not cmds:
        return results

    res = {}
    for (node, success, output) in execute.runHelperParallel(cmds):
        res[node.host] = (success, output)

    # Gather results for all the nodes that are running
    for node in nodes:
        if node.name not in pids:
            continue

        success, output = res[node.host]

        if not success or not output:
            results += [(node, "cannot get top output", [{}])]
            continue

        procs = [line.split() for line in output if int(line.split()[0]) in pids[node.name]]

        if not procs:
            # It's possible that the process is no longer there.
            results += [(node, "not running", [{}])]
            continue

        vals = []

        try:
            for p in procs:
                d = {}
                d["pid"] = int(p[0])
                d["proc"] = (p[0] == parents[node.name] and "parent" or "child")
                d["vsize"] = long(float(p[1])) #May be something like 2.17684e+9
                d["rss"] = long(float(p[2]))
                d["cpu"] = p[3]
                d["cmd"] = " ".join(p[4:])
                vals += [d]
        except ValueError, err:
            results += [(node, "unexpected top output: %s" % err, [{}])]
            continue

        results += [(node, None, vals)]
예제 #21
0
파일: control.py 프로젝트: cubic1271/broctl
def getCapstatsOutput(nodes, interval):

    results = []

    hosts = {}
    for node in nodes:
        if not node.interface:
            continue

        try:
            hosts[(node.addr, node.interface)] = node
        except AttributeError:
            continue

    cmds = []

    for (addr, interface) in hosts.keys():
        node = hosts[addr, interface]

        # If interface name contains semicolons (to aggregate traffic from
        # multiple devices with PF_RING, the interface name can be in a
        # semicolon-delimited format, such as "p2p1;p2p2"), then we must
        # quote it to prevent shell from interpreting semicolon as command
        # separator (another layer of quotes is needed because the eval
        # command is used).
        capstats = [config.Config.capstatspath, "-I", str(interval), "-n", "1", "-i", "'\"%s\"'" % interface]

        cmds += [(node, "run-cmd", capstats)]

    outputs = execute.runHelperParallel(cmds)

    totals = {}

    for (node, success, output) in outputs:

        if not success:
            if output:
                results += [(node, "%s: capstats failed (%s)" % (node.name, output[0]), {})]
            else:
                results += [(node, "%s: cannot execute capstats" % node.name, {})]
            continue

        if not output:
            results += [(node, "%s: no capstats output" % node.name, {})]
            continue

        fields = output[0].split()[1:]

        if not fields:
            results += [(node, "%s: unexpected capstats output: %s" % (node.name, output[0]), {})]
            continue

        vals = {}

        try:
            for field in fields:
                (key, val) = field.split("=")
                val = float(val)
                vals[key] = val

                if key in totals:
                    totals[key] += val
                else:
                    totals[key] = val

            results += [(node, None, vals)]

        except ValueError:
            results += [(node, "%s: unexpected capstats output: %s" % (node.name, output[0]), {})]

    # Add pseudo-node for totals
    if len(nodes) > 1:
        results += [(node_mod.Node("$total"), None, totals)]

    return results
예제 #22
0
def getTopOutput(nodes):

    results = []
    cmds = []

    running = isRunning(nodes)

    # Get all the PIDs first.

    pids = {}
    parents = {}

    for (node, isrunning) in running:
        if isrunning:
            pid = node.getPID()
            pids[node.name] = [pid]
            parents[node.name] = str(pid)

            cmds += [(node, "get-childs", [str(pid)])]
        else:
            results += [(node, "not running", [{}])]
            continue

    if not cmds:
        return results

    for (node, success, output) in execute.runHelperParallel(cmds):

        if not success:
            results += [(node, "cannot get child pids", [{}])]
            continue

        pids[node.name] += [int(line) for line in output]

    cmds = []

    # Now run top.
    for node in nodes: # Do the loop again to keep the order.
        if not node.name in pids:
            continue

        cmds += [(node, "top", [])]

    if not cmds:
        return results

    for (node, success, output) in execute.runHelperParallel(cmds):

        if not success:
            results += [(node, "cannot get top output", [{}])]

        procs = [line.split() for line in output if int(line.split()[0]) in pids[node.name]]

        if not procs:
            # It can happen that on the meantime the process is not there anymore.
            results += [(node, "not running", [{}])]
            continue

        vals = []

        for p in procs:
            d = {}
            d["pid"] = int(p[0])
            d["proc"] = (p[0] == parents[node.name] and "parent" or "child")
            d["vsize"] = long(float(p[1])) # May be something like 2.17684e+09
            d["rss"] = long(float(p[2]))
            d["cpu"] = p[3]
            d["cmd"] = " ".join(p[4:])
            vals += [d]

        results += [(node, None, vals)]

    return results
예제 #23
0
def _stopNodes(nodes):

    results = []
    running = []

    # Check for crashed nodes.
    for (node, isrunning) in isRunning(nodes):
        if isrunning:
            running += [node]
            util.output("stopping %s ..." % node.name)
        else:
            results += [(node, True)]

            if node.hasCrashed():
                _makeCrashReports([node])
                util.output("%s not running (was crashed)" % node.name)
            else:
                util.output("%s not running" % node.name)

    # Helper function to stop nodes with given signal.
    def stop(nodes, signal):
        cmds = []
        for node in nodes:
            cmds += [(node, "stop", [str(node.getPID()), str(signal)])]

        return execute.runHelperParallel(cmds)
        #events = []
        #for node in nodes:
        #    events += [(node, "Control::shutdown_request", [], "Control::shutdown_response")]
        #return execute.sendEventsParallel(events)

    # Stop nodes.
    for (node, success, output) in stop(running, 15):
        if not success:
            util.output("failed to send stop signal to %s" % node.name)

    if running:
        time.sleep(1)

    # Check whether they terminated.
    terminated = []
    kill = []
    for (node, success) in waitForBros(running, "TERMINATED", int(config.Config.stoptimeout), False):
        if not success:
            # Check whether it crashed during shutdown ...
            result = isRunning([node])
            for (node, isrunning) in result:
                if isrunning:
                    util.output("%s did not terminate ... killing ..." % node.name)
                    kill += [node]
                else:
                    # crashed flag is set by isRunning().
                    util.output("%s crashed during shutdown" % node.name)

    if len(kill):
        # Kill those which did not terminate gracefully.
        stop(kill, 9)
        # Given them a bit to disappear.
        time.sleep(5)

    # Check which are still running. We check all nodes to be on the safe side
    # and give them a bit more time to finally disappear.
    timeout = 10

    todo = {}
    for node in running:
        todo[node.name] = node

    while True:

        running = isRunning(todo.values(), setcrashed=False)

        for (node, isrunning) in running:
            if node.name in todo and not isrunning:
                # Alright, it's gone.
                del todo[node.name]
                terminated += [node]
                results += [(node, True)]

        if len(todo) == 0:
            # All done.
            break

        # Wait a bit before we start over.

        if timeout <= 0:
            break

        time.sleep(1)
        timeout -= 1

    results += [(node, False) for node in todo]

    # Do post-terminate cleanup for those which terminated gracefully.
    cleanup = [node for node in terminated if not node.hasCrashed()]

    cmds = []
    for node in cleanup:
        cmds += [(node, "run-cmd",  [os.path.join(config.Config.scriptsdir, "post-terminate"), node.cwd()])]

    for (node, success, output) in execute.runHelperParallel(cmds):
        if not success:
            util.output("cannot run post-terminate for %s" % node.name)
            cron.logAction(node, "stopped (failed)")
        else:
            cron.logAction(node, "stopped")

        node.clearPID()
        node.clearCrashed()

    return results
예제 #24
0
    def stop(nodes, signal):
        cmds = []
        for node in nodes:
            cmds += [(node, "stop", [str(node.getPID()), str(signal)])]

        return execute.runHelperParallel(cmds)
예제 #25
0
def _startNodes(nodes):
    results = []

    filtered = []
    # Ignore nodes which are still running.
    for (node, isrunning) in isRunning(nodes):
        if not isrunning:
            filtered += [node]
            util.output("starting %s ..." % node.name)
        else:
            util.output("%s still running" % node.name)

    nodes = filtered

    # Generate crash report for any crashed nodes.
    crashed = [node for node in nodes if node.hasCrashed()]
    _makeCrashReports(crashed)

    # Make working directories.
    dirs = [(node, node.cwd()) for node in nodes]
    nodes = []
    for (node, success) in execute.mkdirs(dirs):
        if success:
            nodes += [node]
        else:
            util.output("cannot create working directory for %s" % node.name)
            results += [(node, False)]

    # Start Bro process.
    cmds = []
    envs = []
    for node in nodes:
        cmds += [(node, "start", [node.cwd()] + _makeBroParams(node, True))]
        envs += [_makeEnvParam(node)]

    nodes = []
    for (node, success, output) in execute.runHelperParallel(cmds, envs=envs):
        if success:
            nodes += [node]
            node.setPID(int(output[0]))
        else:
            util.output("cannot start %s" % node.name)
            results += [(node, False)]

    # Check whether processes did indeed start up.
    hanging = []
    running = []

    for (node, success) in waitForBros(nodes, "RUNNING", 3, True):
        if success:
            running += [node]
        else:
            hanging += [node]

    # It can happen that Bro hangs in DNS lookups at startup
    # which can take a while. At this point we already know
    # that the process has been started (waitForBro ensures that).
    # If by now there is not a TERMINATED status, we assume that it
    # is doing fine and will move on to RUNNING once DNS is done.
    for (node, success) in waitForBros(hanging, "TERMINATED", 0, False):
        if success:
            util.output("%s terminated immediately after starting; check output with \"diag\"" % node.name)
            node.clearPID()
            results += [(node, False)]
        else:
            util.output("(%s still initializing)" % node.name)
            running += [node]

    for node in running:
        cron.logAction(node, "started")
        results += [(node, True)]

    return results
예제 #26
0
def _startNodes(nodes):
    results = []

    filtered = []
    # Ignore nodes which are still running.
    for (node, isrunning) in isRunning(nodes):
        if not isrunning:
            filtered += [node]
            util.output("starting %s ..." % node.name)
        else:
            util.output("%s still running" % node.name)

    nodes = filtered

    # Generate crash report for any crashed nodes.
    crashed = [node for node in nodes if node.hasCrashed()]
    _makeCrashReports(crashed)

    # Make working directories.
    dirs = [(node, node.cwd()) for node in nodes]
    nodes = []
    for (node, success) in execute.mkdirs(dirs):
        if success:
            nodes += [node]
        else:
            util.output("cannot create working directory for %s" % node.name)
            results += [(node, False)]

    # Start Bro process.
    cmds = []
    envs = []
    for node in nodes:
        cmds += [(node, "start", [node.cwd()] + _makeBroParams(node, True))]
        envs += [_makeEnvParam(node)]

    nodes = []
    for (node, success, output) in execute.runHelperParallel(cmds, envs=envs):
        if success:
            nodes += [node]
            node.setPID(int(output[0]))
        else:
            util.output("cannot start %s" % node.name)
            results += [(node, False)]

    # Check whether processes did indeed start up.
    hanging = []
    running = []

    for (node, success) in waitForBros(nodes, "RUNNING", 3, True):
        if success:
            running += [node]
        else:
            hanging += [node]

    # It can happen that Bro hangs in DNS lookups at startup
    # which can take a while. At this point we already know
    # that the process has been started (waitForBro ensures that).
    # If by now there is not a TERMINATED status, we assume that it
    # is doing fine and will move on to RUNNING once DNS is done.
    for (node, success) in waitForBros(hanging, "TERMINATED", 0, False):
        if success:
            util.output(
                "%s terminated immediately after starting; check output with \"diag\""
                % node.name)
            node.clearPID()
            results += [(node, False)]
        else:
            util.output("(%s still initializing)" % node.name)
            running += [node]

    for node in running:
        cron.logAction(node, "started")
        results += [(node, True)]

    return results
예제 #27
0
def _stopNodes(nodes):

    results = []
    running = []

    # Check for crashed nodes.
    for (node, isrunning) in isRunning(nodes):
        if isrunning:
            running += [node]
            util.output("stopping %s ..." % node.name)
        else:
            results += [(node, True)]

            if node.hasCrashed():
                _makeCrashReports([node])
                util.output("%s not running (was crashed)" % node.name)
            else:
                util.output("%s not running" % node.name)

    # Helper function to stop nodes with given signal.
    def stop(nodes, signal):
        cmds = []
        for node in nodes:
            cmds += [(node, "stop", [str(node.getPID()), str(signal)])]

        return execute.runHelperParallel(cmds)
        #events = []
        #for node in nodes:
        #    events += [(node, "Control::shutdown_request", [], "Control::shutdown_response")]
        #return execute.sendEventsParallel(events)

    # Stop nodes.
    for (node, success, output) in stop(running, 15):
        if not success:
            util.output("failed to send stop signal to %s" % node.name)

    if running:
        time.sleep(1)

    # Check whether they terminated.
    terminated = []
    kill = []
    for (node, success) in waitForBros(running, "TERMINATED",
                                       int(config.Config.stoptimeout), False):
        if not success:
            # Check whether it crashed during shutdown ...
            result = isRunning([node])
            for (node, isrunning) in result:
                if isrunning:
                    util.output("%s did not terminate ... killing ..." %
                                node.name)
                    kill += [node]
                else:
                    # crashed flag is set by isRunning().
                    util.output("%s crashed during shutdown" % node.name)

    if len(kill):
        # Kill those which did not terminate gracefully.
        stop(kill, 9)
        # Given them a bit to disappear.
        time.sleep(5)

    # Check which are still running. We check all nodes to be on the safe side
    # and give them a bit more time to finally disappear.
    timeout = 10

    todo = {}
    for node in running:
        todo[node.name] = node

    while True:

        running = isRunning(todo.values(), setcrashed=False)

        for (node, isrunning) in running:
            if node.name in todo and not isrunning:
                # Alright, it's gone.
                del todo[node.name]
                terminated += [node]
                results += [(node, True)]

        if len(todo) == 0:
            # All done.
            break

        # Wait a bit before we start over.

        if timeout <= 0:
            break

        time.sleep(1)
        timeout -= 1

    results += [(node, False) for node in todo]

    # Do post-terminate cleanup for those which terminated gracefully.
    cleanup = [node for node in terminated if not node.hasCrashed()]

    cmds = []
    for node in cleanup:
        cmds += [(node, "run-cmd", [
            os.path.join(config.Config.scriptsdir, "post-terminate"),
            node.cwd()
        ])]

    for (node, success, output) in execute.runHelperParallel(cmds):
        if not success:
            util.output("cannot run post-terminate for %s" % node.name)
            cron.logAction(node, "stopped (failed)")
        else:
            cron.logAction(node, "stopped")

        node.clearPID()
        node.clearCrashed()

    return results
예제 #28
0
def getCapstatsOutput(nodes, interval):

    if not config.Config.capstatspath:
        if config.Config.cron == "0":
            util.warn("do not have capstats binary available")
        return []

    results = []
    cmds = []

    hosts = {}
    for node in nodes:
        if not node.interface:
            continue

        try:
            hosts[(node.addr, node.interface)] = node
        except AttributeError:
            continue

    for (addr, interface) in hosts.keys():
        node = hosts[addr, interface]

        # If interface name contains semicolons (to aggregate traffic from
        # multiple devices with PF_RING, the interface name can be in a
        # semicolon-delimited format, such as "p2p1;p2p2"), then we must
        # quote it to prevent shell from interpreting semicolon as command
        # separator (another layer of quotes is needed because the eval
        # command is used).
        capstats = [config.Config.capstatspath, "-I", str(interval), "-n", "1", "-i", "'\"%s\"'" % interface]

        # Unfinished feature: only consider a particular MAC. Works here for capstats
        # but Bro config is not adapted currently so we disable it for now.
        #        try:
        #            capstats += ["-f", "\\'", "ether dst %s" % node.ether, "\\'"]
        #        except AttributeError:
        #            pass

        cmds += [(node, "run-cmd", capstats)]

    outputs = execute.runHelperParallel(cmds)

    totals = {}

    for (node, success, output) in outputs:

        if not success:
            if output:
                results += [(node, "%s: capstats failed (%s)" % (node.name, output[0]), {})]
            else:
                results += [(node, "%s: cannot execute capstats" % node.name, {})]
            continue

        if not output:
            results += [(node, "%s: no capstats output" % node.name, {})]
            continue

        fields = output[0].split()[1:]

        if not fields:
            results += [(node, "%s: unexpected capstats output: %s" % (node.name, output[0]), {})]
            continue

        vals = {}

        try:
            for field in fields:
                (key, val) = field.split("=")
                val = float(val)
                vals[key] = val

                try:
                    totals[key] += val
                except KeyError:
                    totals[key] = val

            results += [(node, None, vals)]

        except ValueError:
            results += [(node, "%s: unexpected capstats output: %s" % (node.name, output[0]), {})]

    # Add pseudo-node for totals
    if len(nodes) > 1:
        results += [(node_mod.Node("$total"), None, totals)]

    return results
예제 #29
0
def status(nodes):
    typewidth = 7
    hostwidth = 16
    if config.Config.standalone == "1":
        # In standalone mode, the "type" column needs more width
        typewidth = 10
        hostwidth = 13

    util.output(
        "%-12s %-*s %-*s %-9s %-6s %-6s %s"
        % ("Name", typewidth, "Type", hostwidth, "Host", "Status", "Pid", "Peers", "Started")
    )

    all = isRunning(nodes)
    running = []

    cmds1 = []
    cmds2 = []
    for (node, isrunning) in all:
        if isrunning:
            running += [node]
            cmds1 += [(node, "cat-file", ["%s/.startup" % node.cwd()])]
            cmds2 += [(node, "cat-file", ["%s/.status" % node.cwd()])]

    startups = execute.runHelperParallel(cmds1)
    statuses = execute.runHelperParallel(cmds2)

    startups = dict([(n.name, success and util.fmttime(output[0]) or "???") for (n, success, output) in startups])
    statuses = dict([(n.name, success and output[0].split()[0].lower() or "???") for (n, success, output) in statuses])

    peers = {}
    nodes = [n for n in running if statuses[n.name] == "running"]
    for (node, success, args) in _queryPeerStatus(nodes):
        if success:
            peers[node.name] = []
            for f in args[0].split():
                keyval = f.split("=")
                if len(keyval) > 1:
                    (key, val) = keyval
                    if key == "peer" and val != "":
                        peers[node.name] += [val]
        else:
            peers[node.name] = None

    for (node, isrunning) in all:

        util.output("%-12s " % node.name, nl=False)
        util.output("%-*s %-*s " % (typewidth, node.type, hostwidth, node.host), nl=False)

        if isrunning:
            util.output("%-9s " % statuses[node.name], nl=False)

        elif node.hasCrashed():
            util.output("%-9s " % "crashed", nl=False)
        else:
            util.output("%-9s " % "stopped", nl=False)

        if isrunning:
            util.output("%-6s " % node.getPID(), nl=False)

            if node.name in peers and peers[node.name] != None:
                util.output("%-6d " % len(peers[node.name]), nl=False)
            else:
                util.output("%-6s " % "???", nl=False)

            util.output("%s" % startups[node.name], nl=False)

        util.output()

    # Return True if all nodes are running
    return len(nodes) == len(all)
예제 #30
0
    def stop(nodes, signal):
        cmds = []
        for node in nodes:
            cmds += [(node, "stop", [str(node.getPID()), str(signal)])]

        return execute.runHelperParallel(cmds)
예제 #31
0
def getTopOutput(nodes):

    results = []
    cmds = []

    running = isRunning(nodes)

    # Get all the PIDs first.

    pids = {}
    parents = {}

    for (node, isrunning) in running:
        if isrunning:
            pid = node.getPID()
            pids[node.name] = [pid]
            parents[node.name] = str(pid)

            cmds += [(node, "get-childs", [str(pid)])]
        else:
            results += [(node, "not running", [{}])]
            continue

    if not cmds:
        return results

    for (node, success, output) in execute.runHelperParallel(cmds):

        if not success:
            results += [(node, "cannot get child pids", [{}])]
            continue

        pids[node.name] += [int(line) for line in output]

    cmds = []

    # Now run top.
    for node in nodes:  # Do the loop again to keep the order.
        if not node.name in pids:
            continue

        cmds += [(node, "top", [])]

    if not cmds:
        return results

    for (node, success, output) in execute.runHelperParallel(cmds):

        if not success:
            results += [(node, "cannot get top output", [{}])]

        procs = [
            line.split() for line in output
            if int(line.split()[0]) in pids[node.name]
        ]

        if not procs:
            # It can happen that on the meantime the process is not there anymore.
            results += [(node, "not running", [{}])]
            continue

        vals = []

        for p in procs:
            d = {}
            d["pid"] = int(p[0])
            d["proc"] = (p[0] == parents[node.name] and "parent" or "child")
            d["vsize"] = long(float(p[1]))  # May be something like 2.17684e+09
            d["rss"] = long(float(p[2]))
            d["cpu"] = p[3]
            d["cmd"] = " ".join(p[4:])
            vals += [d]

        results += [(node, None, vals)]

    return results