Beispiel #1
0
def _stopNodes(nodes):

    results = []
    running = []

    # Check for crashed nodes.
    for (node, isrunning) in isRunning(nodes):
        if isrunning:
            running += [node]
            util.output("stopping %s ..." % node.name)
        else:
            results += [(node, True)]

            if node.hasCrashed():
                _makeCrashReports([node])
                util.output("%s not running (was crashed)" % node.name)
            else:
                util.output("%s not running" % node.name)

    # Helper function to stop nodes with given signal.
    def stop(nodes, signal):
        cmds = []
        for node in nodes:
            cmds += [(node, "stop", [str(node.getPID()), str(signal)])]

        return execute.runHelperParallel(cmds)
        #events = []
        #for node in nodes:
        #    events += [(node, "Control::shutdown_request", [], "Control::shutdown_response")]
        #return execute.sendEventsParallel(events)

    # Stop nodes.
    for (node, success, output) in stop(running, 15):
        if not success:
            util.output("failed to send stop signal to %s" % node.name)

    if running:
        time.sleep(1)

    # Check whether they terminated.
    terminated = []
    kill = []
    for (node, success) in waitForBros(running, "TERMINATED", int(config.Config.stoptimeout), False):
        if not success:
            # Check whether it crashed during shutdown ...
            result = isRunning([node])
            for (node, isrunning) in result:
                if isrunning:
                    util.output("%s did not terminate ... killing ..." % node.name)
                    kill += [node]
                else:
                    # crashed flag is set by isRunning().
                    util.output("%s crashed during shutdown" % node.name)

    if len(kill):
        # Kill those which did not terminate gracefully.
        stop(kill, 9)
        # Given them a bit to disappear.
        time.sleep(5)

    # Check which are still running. We check all nodes to be on the safe side
    # and give them a bit more time to finally disappear.
    timeout = 10

    todo = {}
    for node in running:
        todo[node.name] = node

    while True:

        running = isRunning(todo.values(), setcrashed=False)

        for (node, isrunning) in running:
            if node.name in todo and not isrunning:
                # Alright, it's gone.
                del todo[node.name]
                terminated += [node]
                results += [(node, True)]

        if len(todo) == 0:
            # All done.
            break

        # Wait a bit before we start over.

        if timeout <= 0:
            break

        time.sleep(1)
        timeout -= 1

    results += [(node, False) for node in todo]

    # Do post-terminate cleanup for those which terminated gracefully.
    cleanup = [node for node in terminated if not node.hasCrashed()]

    cmds = []
    for node in cleanup:
        cmds += [(node, "run-cmd",  [os.path.join(config.Config.scriptsdir, "post-terminate"), node.cwd()])]

    for (node, success, output) in execute.runHelperParallel(cmds):
        if not success:
            util.output("cannot run post-terminate for %s" % node.name)
            cron.logAction(node, "stopped (failed)")
        else:
            cron.logAction(node, "stopped")

        node.clearPID()
        node.clearCrashed()

    return results
Beispiel #2
0
def _stopNodes(nodes):

    results = []
    running = []

    # Check for crashed nodes.
    for (node, isrunning) in isRunning(nodes):
        if isrunning:
            running += [node]
            util.output("stopping %s ..." % node.name)
        else:
            results += [(node, True)]

            if node.hasCrashed():
                _makeCrashReports([node])
                util.output("%s not running (was crashed)" % node.name)
            else:
                util.output("%s not running" % node.name)

    # Helper function to stop nodes with given signal.
    def stop(nodes, signal):
        cmds = []
        for node in nodes:
            cmds += [(node, "stop", [str(node.getPID()), str(signal)])]

        return execute.runHelperParallel(cmds)
        #events = []
        #for node in nodes:
        #    events += [(node, "Control::shutdown_request", [], "Control::shutdown_response")]
        #return execute.sendEventsParallel(events)

    # Stop nodes.
    for (node, success, output) in stop(running, 15):
        if not success:
            util.output("failed to send stop signal to %s" % node.name)

    if running:
        time.sleep(1)

    # Check whether they terminated.
    terminated = []
    kill = []
    for (node, success) in waitForBros(running, "TERMINATED",
                                       int(config.Config.stoptimeout), False):
        if not success:
            # Check whether it crashed during shutdown ...
            result = isRunning([node])
            for (node, isrunning) in result:
                if isrunning:
                    util.output("%s did not terminate ... killing ..." %
                                node.name)
                    kill += [node]
                else:
                    # crashed flag is set by isRunning().
                    util.output("%s crashed during shutdown" % node.name)

    if len(kill):
        # Kill those which did not terminate gracefully.
        stop(kill, 9)
        # Given them a bit to disappear.
        time.sleep(5)

    # Check which are still running. We check all nodes to be on the safe side
    # and give them a bit more time to finally disappear.
    timeout = 10

    todo = {}
    for node in running:
        todo[node.name] = node

    while True:

        running = isRunning(todo.values(), setcrashed=False)

        for (node, isrunning) in running:
            if node.name in todo and not isrunning:
                # Alright, it's gone.
                del todo[node.name]
                terminated += [node]
                results += [(node, True)]

        if len(todo) == 0:
            # All done.
            break

        # Wait a bit before we start over.

        if timeout <= 0:
            break

        time.sleep(1)
        timeout -= 1

    results += [(node, False) for node in todo]

    # Do post-terminate cleanup for those which terminated gracefully.
    cleanup = [node for node in terminated if not node.hasCrashed()]

    cmds = []
    for node in cleanup:
        cmds += [(node, "run-cmd", [
            os.path.join(config.Config.scriptsdir, "post-terminate"),
            node.cwd()
        ])]

    for (node, success, output) in execute.runHelperParallel(cmds):
        if not success:
            util.output("cannot run post-terminate for %s" % node.name)
            cron.logAction(node, "stopped (failed)")
        else:
            cron.logAction(node, "stopped")

        node.clearPID()
        node.clearCrashed()

    return results
Beispiel #3
0
def _startNodes(nodes):
    results = []

    filtered = []
    # Ignore nodes which are still running.
    for (node, isrunning) in isRunning(nodes):
        if not isrunning:
            filtered += [node]
            util.output("starting %s ..." % node.name)
        else:
            util.output("%s still running" % node.name)

    nodes = filtered

    # Generate crash report for any crashed nodes.
    crashed = [node for node in nodes if node.hasCrashed()]
    _makeCrashReports(crashed)

    # Make working directories.
    dirs = [(node, node.cwd()) for node in nodes]
    nodes = []
    for (node, success) in execute.mkdirs(dirs):
        if success:
            nodes += [node]
        else:
            util.output("cannot create working directory for %s" % node.name)
            results += [(node, False)]

    # Start Bro process.
    cmds = []
    envs = []
    for node in nodes:
        cmds += [(node, "start", [node.cwd()] + _makeBroParams(node, True))]
        envs += [_makeEnvParam(node)]

    nodes = []
    for (node, success, output) in execute.runHelperParallel(cmds, envs=envs):
        if success:
            nodes += [node]
            node.setPID(int(output[0]))
        else:
            util.output("cannot start %s" % node.name)
            results += [(node, False)]

    # Check whether processes did indeed start up.
    hanging = []
    running = []

    for (node, success) in waitForBros(nodes, "RUNNING", 3, True):
        if success:
            running += [node]
        else:
            hanging += [node]

    # It can happen that Bro hangs in DNS lookups at startup
    # which can take a while. At this point we already know
    # that the process has been started (waitForBro ensures that).
    # If by now there is not a TERMINATED status, we assume that it
    # is doing fine and will move on to RUNNING once DNS is done.
    for (node, success) in waitForBros(hanging, "TERMINATED", 0, False):
        if success:
            util.output("%s terminated immediately after starting; check output with \"diag\"" % node.name)
            node.clearPID()
            results += [(node, False)]
        else:
            util.output("(%s still initializing)" % node.name)
            running += [node]

    for node in running:
        cron.logAction(node, "started")
        results += [(node, True)]

    return results
Beispiel #4
0
def _startNodes(nodes):
    results = []

    filtered = []
    # Ignore nodes which are still running.
    for (node, isrunning) in isRunning(nodes):
        if not isrunning:
            filtered += [node]
            util.output("starting %s ..." % node.name)
        else:
            util.output("%s still running" % node.name)

    nodes = filtered

    # Generate crash report for any crashed nodes.
    crashed = [node for node in nodes if node.hasCrashed()]
    _makeCrashReports(crashed)

    # Make working directories.
    dirs = [(node, node.cwd()) for node in nodes]
    nodes = []
    for (node, success) in execute.mkdirs(dirs):
        if success:
            nodes += [node]
        else:
            util.output("cannot create working directory for %s" % node.name)
            results += [(node, False)]

    # Start Bro process.
    cmds = []
    envs = []
    for node in nodes:
        cmds += [(node, "start", [node.cwd()] + _makeBroParams(node, True))]
        envs += [_makeEnvParam(node)]

    nodes = []
    for (node, success, output) in execute.runHelperParallel(cmds, envs=envs):
        if success:
            nodes += [node]
            node.setPID(int(output[0]))
        else:
            util.output("cannot start %s" % node.name)
            results += [(node, False)]

    # Check whether processes did indeed start up.
    hanging = []
    running = []

    for (node, success) in waitForBros(nodes, "RUNNING", 3, True):
        if success:
            running += [node]
        else:
            hanging += [node]

    # It can happen that Bro hangs in DNS lookups at startup
    # which can take a while. At this point we already know
    # that the process has been started (waitForBro ensures that).
    # If by now there is not a TERMINATED status, we assume that it
    # is doing fine and will move on to RUNNING once DNS is done.
    for (node, success) in waitForBros(hanging, "TERMINATED", 0, False):
        if success:
            util.output(
                "%s terminated immediately after starting; check output with \"diag\""
                % node.name)
            node.clearPID()
            results += [(node, False)]
        else:
            util.output("(%s still initializing)" % node.name)
            running += [node]

    for node in running:
        cron.logAction(node, "started")
        results += [(node, True)]

    return results