def _stopNodes(nodes): results = [] running = [] # Check for crashed nodes. for (node, isrunning) in isRunning(nodes): if isrunning: running += [node] util.output("stopping %s ..." % node.name) else: results += [(node, True)] if node.hasCrashed(): _makeCrashReports([node]) util.output("%s not running (was crashed)" % node.name) else: util.output("%s not running" % node.name) # Helper function to stop nodes with given signal. def stop(nodes, signal): cmds = [] for node in nodes: cmds += [(node, "stop", [str(node.getPID()), str(signal)])] return execute.runHelperParallel(cmds) #events = [] #for node in nodes: # events += [(node, "Control::shutdown_request", [], "Control::shutdown_response")] #return execute.sendEventsParallel(events) # Stop nodes. for (node, success, output) in stop(running, 15): if not success: util.output("failed to send stop signal to %s" % node.name) if running: time.sleep(1) # Check whether they terminated. terminated = [] kill = [] for (node, success) in waitForBros(running, "TERMINATED", int(config.Config.stoptimeout), False): if not success: # Check whether it crashed during shutdown ... result = isRunning([node]) for (node, isrunning) in result: if isrunning: util.output("%s did not terminate ... killing ..." % node.name) kill += [node] else: # crashed flag is set by isRunning(). util.output("%s crashed during shutdown" % node.name) if len(kill): # Kill those which did not terminate gracefully. stop(kill, 9) # Given them a bit to disappear. time.sleep(5) # Check which are still running. We check all nodes to be on the safe side # and give them a bit more time to finally disappear. timeout = 10 todo = {} for node in running: todo[node.name] = node while True: running = isRunning(todo.values(), setcrashed=False) for (node, isrunning) in running: if node.name in todo and not isrunning: # Alright, it's gone. del todo[node.name] terminated += [node] results += [(node, True)] if len(todo) == 0: # All done. break # Wait a bit before we start over. if timeout <= 0: break time.sleep(1) timeout -= 1 results += [(node, False) for node in todo] # Do post-terminate cleanup for those which terminated gracefully. cleanup = [node for node in terminated if not node.hasCrashed()] cmds = [] for node in cleanup: cmds += [(node, "run-cmd", [os.path.join(config.Config.scriptsdir, "post-terminate"), node.cwd()])] for (node, success, output) in execute.runHelperParallel(cmds): if not success: util.output("cannot run post-terminate for %s" % node.name) cron.logAction(node, "stopped (failed)") else: cron.logAction(node, "stopped") node.clearPID() node.clearCrashed() return results
def _stopNodes(nodes): results = [] running = [] # Check for crashed nodes. for (node, isrunning) in isRunning(nodes): if isrunning: running += [node] util.output("stopping %s ..." % node.name) else: results += [(node, True)] if node.hasCrashed(): _makeCrashReports([node]) util.output("%s not running (was crashed)" % node.name) else: util.output("%s not running" % node.name) # Helper function to stop nodes with given signal. def stop(nodes, signal): cmds = [] for node in nodes: cmds += [(node, "stop", [str(node.getPID()), str(signal)])] return execute.runHelperParallel(cmds) #events = [] #for node in nodes: # events += [(node, "Control::shutdown_request", [], "Control::shutdown_response")] #return execute.sendEventsParallel(events) # Stop nodes. for (node, success, output) in stop(running, 15): if not success: util.output("failed to send stop signal to %s" % node.name) if running: time.sleep(1) # Check whether they terminated. terminated = [] kill = [] for (node, success) in waitForBros(running, "TERMINATED", int(config.Config.stoptimeout), False): if not success: # Check whether it crashed during shutdown ... result = isRunning([node]) for (node, isrunning) in result: if isrunning: util.output("%s did not terminate ... killing ..." % node.name) kill += [node] else: # crashed flag is set by isRunning(). util.output("%s crashed during shutdown" % node.name) if len(kill): # Kill those which did not terminate gracefully. stop(kill, 9) # Given them a bit to disappear. time.sleep(5) # Check which are still running. We check all nodes to be on the safe side # and give them a bit more time to finally disappear. timeout = 10 todo = {} for node in running: todo[node.name] = node while True: running = isRunning(todo.values(), setcrashed=False) for (node, isrunning) in running: if node.name in todo and not isrunning: # Alright, it's gone. del todo[node.name] terminated += [node] results += [(node, True)] if len(todo) == 0: # All done. break # Wait a bit before we start over. if timeout <= 0: break time.sleep(1) timeout -= 1 results += [(node, False) for node in todo] # Do post-terminate cleanup for those which terminated gracefully. cleanup = [node for node in terminated if not node.hasCrashed()] cmds = [] for node in cleanup: cmds += [(node, "run-cmd", [ os.path.join(config.Config.scriptsdir, "post-terminate"), node.cwd() ])] for (node, success, output) in execute.runHelperParallel(cmds): if not success: util.output("cannot run post-terminate for %s" % node.name) cron.logAction(node, "stopped (failed)") else: cron.logAction(node, "stopped") node.clearPID() node.clearCrashed() return results
def _startNodes(nodes): results = [] filtered = [] # Ignore nodes which are still running. for (node, isrunning) in isRunning(nodes): if not isrunning: filtered += [node] util.output("starting %s ..." % node.name) else: util.output("%s still running" % node.name) nodes = filtered # Generate crash report for any crashed nodes. crashed = [node for node in nodes if node.hasCrashed()] _makeCrashReports(crashed) # Make working directories. dirs = [(node, node.cwd()) for node in nodes] nodes = [] for (node, success) in execute.mkdirs(dirs): if success: nodes += [node] else: util.output("cannot create working directory for %s" % node.name) results += [(node, False)] # Start Bro process. cmds = [] envs = [] for node in nodes: cmds += [(node, "start", [node.cwd()] + _makeBroParams(node, True))] envs += [_makeEnvParam(node)] nodes = [] for (node, success, output) in execute.runHelperParallel(cmds, envs=envs): if success: nodes += [node] node.setPID(int(output[0])) else: util.output("cannot start %s" % node.name) results += [(node, False)] # Check whether processes did indeed start up. hanging = [] running = [] for (node, success) in waitForBros(nodes, "RUNNING", 3, True): if success: running += [node] else: hanging += [node] # It can happen that Bro hangs in DNS lookups at startup # which can take a while. At this point we already know # that the process has been started (waitForBro ensures that). # If by now there is not a TERMINATED status, we assume that it # is doing fine and will move on to RUNNING once DNS is done. for (node, success) in waitForBros(hanging, "TERMINATED", 0, False): if success: util.output("%s terminated immediately after starting; check output with \"diag\"" % node.name) node.clearPID() results += [(node, False)] else: util.output("(%s still initializing)" % node.name) running += [node] for node in running: cron.logAction(node, "started") results += [(node, True)] return results
def _startNodes(nodes): results = [] filtered = [] # Ignore nodes which are still running. for (node, isrunning) in isRunning(nodes): if not isrunning: filtered += [node] util.output("starting %s ..." % node.name) else: util.output("%s still running" % node.name) nodes = filtered # Generate crash report for any crashed nodes. crashed = [node for node in nodes if node.hasCrashed()] _makeCrashReports(crashed) # Make working directories. dirs = [(node, node.cwd()) for node in nodes] nodes = [] for (node, success) in execute.mkdirs(dirs): if success: nodes += [node] else: util.output("cannot create working directory for %s" % node.name) results += [(node, False)] # Start Bro process. cmds = [] envs = [] for node in nodes: cmds += [(node, "start", [node.cwd()] + _makeBroParams(node, True))] envs += [_makeEnvParam(node)] nodes = [] for (node, success, output) in execute.runHelperParallel(cmds, envs=envs): if success: nodes += [node] node.setPID(int(output[0])) else: util.output("cannot start %s" % node.name) results += [(node, False)] # Check whether processes did indeed start up. hanging = [] running = [] for (node, success) in waitForBros(nodes, "RUNNING", 3, True): if success: running += [node] else: hanging += [node] # It can happen that Bro hangs in DNS lookups at startup # which can take a while. At this point we already know # that the process has been started (waitForBro ensures that). # If by now there is not a TERMINATED status, we assume that it # is doing fine and will move on to RUNNING once DNS is done. for (node, success) in waitForBros(hanging, "TERMINATED", 0, False): if success: util.output( "%s terminated immediately after starting; check output with \"diag\"" % node.name) node.clearPID() results += [(node, False)] else: util.output("(%s still initializing)" % node.name) running += [node] for node in running: cron.logAction(node, "started") results += [(node, True)] return results