def testFDTDKillProcess2(): c = """ [general] port = 6700 debug = DEBUG killCommand = ../wrapper_kill.sh %(pid)s portRangeFDTServer = 54321,54400 """ f = getTempFile(c) inputOption = "--config=%s" % f.name conf = ConfigFDTD(inputOption.split()) testName = inspect.stack()[0][3] logger = Logger(name=testName, level=logging.DEBUG) apMon = None daemon = FDTD(conf, apMon, logger) # need long-running, non blocking process command = "dd if=/dev/zero of=/dev/null count=100000000 bs=102400" executor = Executor("some_id", command, blocking=False, caller=daemon, logger=logger, killTimeout=0) try: executor.execute() daemon.killProcess("some_id", logger) finally: # definitely release port and kill the process daemon.shutdown() daemon.pyroDaemon.closedown() try: p = Process(executor.proc.pid) m = "FAIL: Process PID:%s should have been " "killed." % executor.proc.pid logger.debug(m) py.test.fail(m) except NoSuchProcess as ex: logger.debug("OK: Process PID:%s doesn't exist now." % executor.proc.pid)
def testFDTDWaitingTimeoutWhenCleanup(): """ Test issues long running job (dd copy) on the background (non-blocking) and when killing the job, the timeout is set higher than issuing ALARM signal. It's tested that the ALARM signal was raised, but implemented as is is in fact ignored. More obvious when the killTimeout is set much higher. Implementation of #33 - CleanupProcessesAction - attribute to ignore any wait-to-finish timeouts """ class Handler(object): def __init__(self, flag, testName): self.flag = flag self.testName = testName def signalHandler(self, signum, frame): print("test %s signal handler called (sig: %s)" % (self.testName, signum)) # sets flag to check whether some reaction was successfully # invoked self.flag = True c = """ [general] port = 6700 debug = DEBUG killCommand = ../wrapper_kill.sh %(pid)s portRangeFDTServer = 54321,54400 """ f = getTempFile(c) inputOption = "--config=%s" % f.name conf = ConfigFDTD(inputOption.split()) testName = inspect.stack()[0][3] logger = Logger(name=testName, level=logging.DEBUG) apMon = None fdtd = FDTD(conf, apMon, logger) # need long running job command = "dd if=/dev/zero of=/dev/null count=100000000 bs=102400" # set long timeout (will be interrupted sooner by alarm - while # waiting on kill timeout) e = Executor("some_id", command, caller=fdtd, blocking=False, killTimeout=2, logger=logger) try: e.execute() # command remains is running now # try killing the command # since waitTimeout = True, kill will be waiting cl = CleanupProcessesAction("some_id", timeout=1, waitTimeout=True) handler = Handler(False, testName) signal.signal(signal.SIGALRM, handler.signalHandler) assert handler.flag == False print("test %s is waiting here ..." % testName) signal.alarm(1) # raise alarm in timeout seconds cl.execute(conf=conf, caller=fdtd, apMon=None, logger=logger) signal.alarm(0) # disable alarm # but the alarm was called during this waiting (test flag value) assert handler.flag finally: fdtd.shutdown() fdtd.pyroDaemon.closedown()
def testFDTDAndExecutorContainerHandling(): c = """ [general] port = 6700 debug = DEBUG portRangeFDTServer = 54321,54323 killCommand = "kill -9 %(pid)s" killCommandSudo = "kill -9 %(pid)s" """ f = getTempFile(c) inputOptions = "-d DEBUG -p 6700 --config=%s" % f.name conf = ConfigFDTD(inputOptions.split()) testName = inspect.stack()[0][3] logger = Logger(name=testName, level=logging.DEBUG) apMon = None daemon = FDTD(conf, apMon, logger) assert len(daemon._executors) == 0 # needs to be blocking command, since simple ls finishes too quickly # for default blocking it would be considered that it failed # just needed existing process and executor container handling with it executor = Executor("some_id", "ls /tmp", logger=logger, blocking=True) executor.execute() daemon.addExecutor(executor) # subsequent adding of the same executor shall raise exception # (the same id) py.test.raises(FDTDException, daemon.addExecutor, executor) daemon.removeExecutor(executor) assert len(daemon._executors) == 0 # need long running job command = "dd if=/dev/zero of=/dev/null count=100000000 bs=102400" # caller is not explicitly specified, so all executors container # manipulation is done from this test executor = Executor("some_id", command, blocking=False, killTimeout=0, logger=logger) executor.execute() daemon.addExecutor(executor) assert len(daemon._executors) == 1 # check getting the executor reference from the container ex = daemon.getExecutor("some_id_nonsence") # doens't exist assert ex == None ex = daemon.getExecutor("some_id") assert ex == executor # this in fact should discover that the process is still running # and not remove it daemon.removeExecutor(executor) assert len(daemon._executors) == 1 # now kill the process and try to remove it afterwards - test # different branch of removeExecutor daemon.killProcess(executor.id, logger) assert len(daemon._executors) == 0 # should have been removed daemon.shutdown() daemon.pyroDaemon.closedown()
def testExecutorNonBlockingWaitingForLogOutput(): script = """ c=2 while [ $c -gt 0 ] do echo "some undesired logging output" sleep 1 let "c -= 1" done echo "expected logging output" sleep 1 """ f = getTempFile(script) command = "bash %s" % f.name # wait for correct log output logOutputToWaitFor = "expected logging output" e = Executor("some_id", command, blocking=False, logOutputToWaitFor=logOutputToWaitFor, logOutputWaitTime=3) logs = e.execute() # the process shall be considered running - the expected log output # was captured assert e.returncode is None # the process time.sleep(3) # internal Executor returncode hasn't been updated since the last call # chain: .execute() assert e.proc.poll() == 0 # by now shall finish with 0 (no errors) # wait for wrong log output logOutputToWaitFor = "EXPECTED LOGGING OUTPUT" # wait more than the time of running of script, it script will finish # though from here (for non-blocking long-running) process, it is as if # the process failed e = Executor("some_id", command, blocking=False, logOutputToWaitFor=logOutputToWaitFor, logOutputWaitTime=5) py.test.raises(ExecutorException, e.execute) # wait too short time, so it seems the process runs anyway and check that logOutputToWaitFor = "rubbish" e = Executor("some_id", command, blocking=False, logOutputToWaitFor=logOutputToWaitFor, logOutputWaitTime=1) logs = e.execute() time.sleep(0.5) # even after this time, the process shall still be running assert e.proc.poll() == None time.sleep(1) # by now it shall be finished assert e.proc.poll() == 0
def testExecutorNonBlockingWaitingForLogOutput(): script = """ c=2 while [ $c -gt 0 ] do echo "some undesired logging output" sleep 1 let "c -= 1" done echo "expected logging output" sleep 1 """ f = getTempFile(script) command = "bash %s" % f.name # wait for correct log output logOutputToWaitFor = "expected logging output" e = Executor("some_id", command, blocking=False, logOutputToWaitFor=logOutputToWaitFor, logOutputWaitTime=3) logs = e.execute() # the process shall be considered running - the expected log output # was captured assert e.returncode == None # the process time.sleep(3) # internal Executor returncode hasn't been updated since the last call # chain: .execute() assert e.proc.poll() == 0 # by now shall finish with 0 (no errors) # wait for wrong log output logOutputToWaitFor = "EXPECTED LOGGING OUTPUT" # wait more than the time of running of script, it script will finish # though from here (for non-blocking long-running) process, it is as if # the process failed e = Executor("some_id", command, blocking=False, logOutputToWaitFor=logOutputToWaitFor, logOutputWaitTime=5) py.test.raises(ExecutorException, e.execute) # wait too short time, so it seems the process runs anyway and check that logOutputToWaitFor = "rubbish" e = Executor("some_id", command, blocking=False, logOutputToWaitFor=logOutputToWaitFor, logOutputWaitTime=1) logs = e.execute() time.sleep(0.5) # even after this time, the process shall still be running assert e.proc.poll() == None time.sleep(1) # by now it shall be finished assert e.proc.poll() == 0
def testFDTDAndExecutorContainerHandling(): c = """ [general] port = 6700 debug = DEBUG portRangeFDTServer = 54321,54323 killCommand = "kill -9 %(pid)s" killCommandSudo = "kill -9 %(pid)s" """ f = getTempFile(c) inputOptions = "-d DEBUG -p 6700 --config=%s" % f.name conf = ConfigFDTD(inputOptions.split()) testName = inspect.stack()[0][3] logger = Logger(name=testName, level=logging.DEBUG) apMon = None daemon = FDTD(conf, apMon, logger) assert len(daemon._executors) == 0 # needs to be blocking command, since simple ls finishes too quickly # for default blocking it would be considered that it failed # just needed existing process and executor container handling with it executor = Executor("some_id", "ls /tmp", logger=logger, blocking=True) executor.execute() daemon.addExecutor(executor) # subsequent adding of the same executor shall raise exception # (the same id) py.test.raises(FDTDException, daemon.addExecutor, executor) daemon.removeExecutor(executor) assert len(daemon._executors) == 0 # need long running job command = "dd if=/dev/zero of=/dev/null count=100000000 bs=102400" # caller is not explicitly specified, so all executors container # manipulation is done from this test executor = Executor("some_id", command, blocking=False, killTimeout=0, logger=logger) executor.execute() daemon.addExecutor(executor) assert len(daemon._executors) == 1 # check getting the executor reference from the container ex = daemon.getExecutor("some_id_nonsence") # doens't exist assert ex is None ex = daemon.getExecutor("some_id") assert ex == executor # this in fact should discover that the process is still running # and not remove it daemon.removeExecutor(executor) assert len(daemon._executors) == 1 # now kill the process and try to remove it afterwards - test # different branch of removeExecutor daemon.killProcess(executor.id, logger) assert len(daemon._executors) == 0 # should have been removed daemon.shutdown() daemon.pyroDaemon.closedown()
def testKillProcessTimeout(): script = """ c=$1 while [ $c -gt 0 ] do sleep 1 let "c -= 1" done exit 0 """ inputOptions = "-d DEBUG -p 6700" conf = ConfigFDTD(inputOptions.split()) logger = Logger("test logger", level = logging.DEBUG) apMon = None daemon = FDTD(conf, apMon, logger) f = getTempFile(script) # wait time 5s command = "bash %s 5" % f.name # wait only 2s for me before killing e = Executor("some_id", command, caller = daemon, blocking = False, killTimeout = 3) try: output = e.execute() daemon.killProcess("some_id", logger) finally: daemon.shutdown() assert e.proc.poll() == -9 # the process was still running, timeout elapsed and was killed # try different port, even if the previous was released, # immediate rebinding attempt makes PYRO fail inputOptions = "-d DEBUG -p 6701" conf = ConfigFDTD(inputOptions.split()) apMon = None daemon = FDTD(conf, apMon, logger) # wait time 2s command = "bash %s 2" % f.name e = Executor("some_id", command, caller = daemon, blocking = False, killTimeout = 4) try: output = e.execute() daemon.killProcess("some_id", logger) finally: daemon.shutdown() # the process should have normally finished, waiting killTimeout assert e.proc.poll() == 0
def testExecutorBlocking(): command = "ls -la1 /tmp" e = Executor("some_id", command, blocking=True, caller=MockCaller()) logs = e.execute() # test __str__ method print "running '%s'" % e print "logs:%s" % logs s = e.getLogs() e = Executor("some_id", command, blocking=True, caller=None) logs = e.execute() # test __str__ method print "running '%s'" % e print "logs:%s" % logs s = e.getLogs()
def testExecutorBlocking(): command = "ls -la1 /tmp" e = Executor("some_id", command, blocking = True, caller = MockCaller()) logs = e.execute() # test __str__ method print "running '%s'" % e print "logs:%s" % logs s = e.getLogs() e = Executor("some_id", command, blocking = True, caller = None) logs = e.execute() # test __str__ method print "running '%s'" % e print "logs:%s" % logs s = e.getLogs()
def execute(self, conf = None, caller = None, apMon = None, logger = None): """This method is is called on the remote site where fdtd runs - here are also known all configuration details, thus final set up has to happen here. """ startTime = datetime.datetime.now() # may fail with subset of FDTDException which will be propagated port = caller.getFreePort() self._setUp(conf, port) user = self.options["sudouser"] logger.debug("Local grid user is '%s'" % user) toWaitFor = conf.get("fdtServerLogOutputToWaitFor") % dict(port = port) toWaitTimeout = conf.get("fdtServerLogOutputTimeout") killTimeout = conf.get("fdtReceivingServerKillTimeout") executor = Executor(self.id, caller = caller, command = self.command, blocking = False, port = port, userName = user, logOutputToWaitFor = toWaitFor, killTimeout = killTimeout, logOutputWaitTime = toWaitTimeout, logger = logger) try: output = executor.execute() except ExecutorException, ex: m = ("Could not start FDT server on %s port: %s, reason: %s" % (getHostName(), port, ex)) caller.releasePort(port) logger.error(m) raise FDTDException(m)
def execute(self, conf=None, caller=None, apMon=None, logger=None): """This method is invoked by fdtcp (locally).""" # fileNameToStoreRemoteUserName - file into which AuthClient # stores name of the Grid user at the remote party, this # information is then forwarded later to # fdtd which doens't have to do the user mapping lookup again fileName = "/tmp/" + self.id + "--" + getRandomString('a', 'z', 5) if os.path.exists(fileName): raise FDTCopyException("File %s exists." % fileName) self._setUp(conf, fileName) executor = Executor(self.id, self.command, blocking=True, logger=logger) # here the Java AuthClient stores the Grid user name into the file output = executor.execute() try: remoteGridUser = open(fileName, 'r').read() os.remove(fileName) except Exception, ex: m = ("Problem handling file %s (reading remote Grid user " "name), reason: %s" % (fileName, ex)) raise FDTCopyException(m)
def testExecutorNonBlocking(): # need long running job command = "dd if=/dev/zero of=/dev/null count=100000000 bs=102400" e = Executor("some_id", command, blocking=False, caller=MockCaller()) try: e.execute() assert e.proc.poll() == None # means command is running assert len(e.caller.processes) == 1 finally: os.kill(e.proc.pid, signal.SIGKILL) e = Executor("some_id", command, blocking=False, caller=None) try: e.execute() assert e.proc.poll() == None # means command is running finally: os.kill(e.proc.pid, signal.SIGKILL)
def testExecutorNonBlocking(): # need long running job command = "dd if=/dev/zero of=/dev/null count=100000000 bs=102400" e = Executor("some_id", command, blocking = False, caller = MockCaller()) try: e.execute() assert e.proc.poll() == None # means command is running assert len(e.caller.processes) == 1 finally: os.kill(e.proc.pid, signal.SIGKILL) e = Executor("some_id", command, blocking = False, caller = None) try: e.execute() assert e.proc.poll() == None # means command is running finally: os.kill(e.proc.pid, signal.SIGKILL)
def testFDTDKillProcess2(): c = """ [general] port = 6700 debug = DEBUG killCommand = ../wrapper_kill.sh %(pid)s portRangeFDTServer = 54321,54400 """ f = getTempFile(c) inputOption = "--config=%s" % f.name conf = ConfigFDTD(inputOption.split()) testName = inspect.stack()[0][3] logger = Logger(name=testName, level=logging.DEBUG) apMon = None daemon = FDTD(conf, apMon, logger) # need long-running, non blocking process command = "dd if=/dev/zero of=/dev/null count=100000000 bs=102400" executor = Executor("some_id", command, blocking=False, caller=daemon, logger=logger, killTimeout=0) try: executor.execute() daemon.killProcess("some_id", logger) finally: # definitely release port and kill the process daemon.shutdown() daemon.pyroDaemon.closedown() try: p = Process(executor.proc.pid) m = ("FAIL: Process PID:%s should have been " "killed." % executor.proc.pid) logger.debug(m) py.test.fail(m) except NoSuchProcess, ex: logger.debug("OK: Process PID:%s doesn't exist now." % executor.proc.pid)
def testFDTDKillProcess(): inputOptions = "-d DEBUG -p 6700" conf = ConfigFDTD(inputOptions.split()) logger = Logger("test logger", level = logging.DEBUG) apMon = None daemon = FDTD(conf, apMon, logger) # need long-running, non blocking process command = "dd if=/dev/zero of=/dev/null count=100000000 bs=102400" executor = Executor("some_id", command, blocking = False, caller = daemon, logger = logger) try: executor.execute() daemon.killProcess("some_id", logger) finally: # definitely release port and kill the process daemon.shutdown() # if kill wasn't successful try again try: os.kill(executor.proc.pid, signal.SIGKILL) except OSError: pass
def execute(self, conf=None, caller=None, apMon=None, logger=None): """ This method is invoked by fdtd once the action object is received from remote fdtcp (where the action instance was created). Options known on fdtd are set on the action instance (e.g. finalizing command for invoking FDT Java - location of fdt.jar is known only at fdtd site). """ # this method is called on PYRO service side, user its logger # from now on localGridUser = self.options["gridUserSrc"] logger.debug("Local grid user is '%s'" % localGridUser) self.options["sudouser"] = localGridUser self._setUp(conf) killTimeout = conf.get("fdtSendingClientKillTimeout") executor = Executor(self.id, self.command, blocking=True, caller=caller, userName=localGridUser, killTimeout=killTimeout, syncFlag=False, logger=logger) try: try: output = executor.execute() except ExecutorException as ex: m = ("FDT Java client on %s failed, " "reason: %s" % (getHostName(), ex)) logger.error(m) raise FDTDException(m) except Exception as ex: m = ("FDT Java client on %s failed, " "reason: %s" % (getHostName(), ex)) logger.error(m, traceBack=True) raise FDTDException(m) else: # no other exception was raised during execution r = Result(self.id) r.status = 0 r.log = output r.msg = "Output from FDT client" logger.debug("FDT client log (as sent to " "fdtcp):\n%s" % output) return r finally: # give signal on this actions Executor instance that its handling # finished (e.g. CleanupProcessesAction may be waiting for this) executor.syncFlag = False
def testExecutorLogsNotCatching(): outputLogs = """------------------------------------------------------------------------------ stdout: <catching disabled on request> ------------------------------------------------------------------------------ stderr: <catching disabled on request> ------------------------------------------------------------------------------""" script = """ echo "output to stdout" > /dev/stdout echo "output to stderr" > /dev/stderr """ f = getTempFile(script) command = "bash %s" % f.name e = Executor("some_id", command, caller = None, catchLogs = False, blocking = True) logs = e.execute()
def testExecutorLogsCatching(): outputLogs = """------------------------------------------------------------------------------ stdout: output to stdout ------------------------------------------------------------------------------ stderr: output to stderr ------------------------------------------------------------------------------""" script = """ echo "output to stdout" > /dev/stdout echo "output to stderr" > /dev/stderr """ f = getTempFile(script) command = "bash %s" % f.name e = Executor("some_id", command, caller=None, blocking=True) output = e.execute() logs = e.getLogs()
def execute(self, conf=None, caller=None, apMon=None, logger=None): """ This method is is called on the remote site where fdtd runs - here are also known all configuration details, thus final set up has to happen here. """ startTime = datetime.datetime.now() # may fail with subset of FDTDException which will be propagated port = caller.getFreePort() self._setUp(conf, port) destFiles = self.options["destFiles"] logger.info("%s - checking presence of files at target " "location ..." % self.__class__.__name__) logger.debug("Results:\n%s" % self._checkTargetFileNames(destFiles)) user = self.options["sudouser"] logger.debug("Local grid user is '%s'" % user) toWaitFor = conf.get("fdtServerLogOutputToWaitFor") % dict(port=port) toWaitTimeout = conf.get("fdtServerLogOutputTimeout") killTimeout = conf.get("fdtReceivingServerKillTimeout") executor = Executor(self.id, caller=caller, command=self.command, blocking=False, port=port, userName=user, logOutputToWaitFor=toWaitFor, killTimeout=killTimeout, logOutputWaitTime=toWaitTimeout, logger=logger) try: output = executor.execute() # on errors, do not do any cleanup or port releasing, from # Executor.execute() the instance is in the container and shall # be handled by CleanupProcessesAction except Exception, ex: m = ("Could not start FDT server on %s port: %s, reason: %s" % (getHostName(), port, ex)) logger.critical(m, traceBack=True) self._checkForAddressAlreadyInUseError(str(ex), port, logger) raise FDTDException(m)
def execute(self, conf = None, caller = None, apMon = None, logger = None): """This method is invoked by fdtcp (locally).""" # fileNameToStoreRemoteUserName - file into which AuthClient stores name of the # Grid user at the remote party, this information is then forwarded later to # fdtd which doens't have to do the user mapping lookup again fileName = "/tmp/" + self.id + "--" + getRandomString('a', 'z', 5) if os.path.exists(fileName): raise FDTCopyException("File %s exists." % fileName) self._setUp(conf, fileName) executor = Executor(self.id, self.command, blocking = True, logger = logger) # here the Java AuthClient stores the Grid user name into the file output = executor.execute() try: remoteGridUser = open(fileName, 'r').read() os.remove(fileName) except Exception, ex: m = "Problem handling file %s (reading remote Grid user name), reason: %s" % (fileName, ex) raise FDTCopyException(m)
def testExecutorLogsNotCatching(): outputLogs = """------------------------------------------------------------------------------ stdout: <catching disabled on request> ------------------------------------------------------------------------------ stderr: <catching disabled on request> ------------------------------------------------------------------------------""" script = """ echo "output to stdout" > /dev/stdout echo "output to stderr" > /dev/stderr """ f = getTempFile(script) command = "bash %s" % f.name e = Executor("some_id", command, caller=None, catchLogs=False, blocking=True) logs = e.execute()
def execute(self, conf=None, caller=None, apMon=None, logger=None): """ This method is invoked by fdtd once the action object is received from remote fdtcp (where the action instance was created). Options known on fdtd are set on the action instance (e.g. finalizing command for invoking FDT Java - location of fdt.jar is known only at fdtd site). """ # this method is called on PYRO service side, user its logger # from now on localGridUser = self.options["gridUserSrc"] logger.debug("Local grid user is '%s'" % localGridUser) self.options["sudouser"] = localGridUser self._setUp(conf) killTimeout = conf.get("fdtSendingClientKillTimeout") executor = Executor(self.id, self.command, blocking=True, caller=caller, userName=localGridUser, killTimeout=killTimeout, syncFlag=True, logger=logger) try: try: output = executor.execute() except ExecutorException, ex: m = ("FDT Java client on %s failed, " "reason: %s" % (getHostName(), ex)) logger.error(m) raise FDTDException(m) except Exception, ex: m = ("FDT Java client on %s failed, " "reason: %s" % (getHostName(), ex)) logger.error(m, traceBack=True) raise FDTDException(m)
def execute(self, conf = None, caller = None, apMon = None, logger = None): """This method is invoked by fdtd once the action object is received from remote fdtcp (where the action instance was created). Options known on fdtd are set on the action instance (e.g. finalizing command for invoking FDT Java - location of fdt.jar is known only at fdtd site). """ # this method is called on PYRO service side, user its logger from now on localGridUser = self.options["gridUserSrc"] logger.debug("Local grid user is '%s'" % localGridUser) self.options["sudouser"] = localGridUser self._setUp(conf) killTimeout = conf.get("fdtSendingClientKillTimeout") executor = Executor(self.id, self.command, blocking = True, caller = caller, userName = localGridUser, killTimeout = killTimeout, logger = logger) try: output = executor.execute() except ExecutorException, ex: m = ("Could not start FDT client on %s, reason: %s" % (getHostName(), ex)) logger.error(m) raise FDTDException(m)
def execute(self, conf=None, caller=None, apMon=None, logger=None): """ This method is is called on the remote site where fdtd runs - here are also known all configuration details, thus final set up has to happen here. """ startTime = datetime.datetime.now() # may fail with subset of FDTDException which will be propagated port = None if 'portServer' in self.options: logger.info("Forcing to use user specified port %s" % self.options['portServer']) try: port = int(self.options['portServer']) except TypeError as er: logger.info("Provided portServer key is not convertable to integer: %, Error: %s", self.options['portServer'], er) port = int(caller.getFreePort()) else: logger.info("Try to get a free port") port = int(caller.getFreePort()) self._setUp(conf, port) destFiles = self.options["destFiles"] logger.info("%s - checking presence of files at target " "location ..." % self.__class__.__name__) logger.debug("Results:\n%s" % self._checkTargetFileNames(destFiles)) user = self.options["sudouser"] logger.debug("Local grid user is '%s'" % user) toWaitFor = conf.get("fdtServerLogOutputToWaitFor") % dict(port=port) toWaitTimeout = conf.get("fdtServerLogOutputTimeout") killTimeout = conf.get("fdtReceivingServerKillTimeout") executor = Executor(self.id, caller=caller, command=self.command, blocking=False, port=port, userName=user, logOutputToWaitFor=toWaitFor, killTimeout=killTimeout, logOutputWaitTime=toWaitTimeout, logger=logger) try: output = executor.execute() # on errors, do not do any cleanup or port releasing, from # Executor.execute() the instance is in the container and shall # be handled by CleanupProcessesAction except Exception as ex: raiser, ex = self._checkForAddressAlreadyInUseError(str(ex), port, logger) m = ("Could not start FDT server on %s port: %s, reason: %s" % (getHostName(), port, ex)) logger.critical(m, traceBack=True) raise raiser(m) else: r = Result(self.id) r.status = 0 # port on which FDT Java server runs r.serverPort = port r.msg = "FDT server is running" r.log = output logger.debug("Response to client: %s" % r) endTime = datetime.datetime.now() elapsed = (endTime - startTime).seconds par = dict(id=self.id, fdt_server_init=elapsed) logger.debug("Starting FDT server lasted: %s [s]." % elapsed) if apMon: logger.debug("Sending data to ApMon ...") apMon.sendParameters("fdtd_server_writer", None, par) return r
def testKillProcessTimeout(): c = """ [general] port = 6700 debug = DEBUG killCommand = ../wrapper_kill.sh %(pid)s portRangeFDTServer = 54321,54400 """ script = """ c=$1 while [ $c -gt 0 ] do sleep 1 let "c -= 1" done exit 0 """ f = getTempFile(c) inputOption = "--config=%s" % f.name conf = ConfigFDTD(inputOption.split()) testName = inspect.stack()[0][3] logger = Logger(name=testName, level=logging.DEBUG) apMon = None daemon = FDTD(conf, apMon, logger) f = getTempFile(script) # wait time 5s command = "bash %s 5" % f.name # wait only 2s for me before killing e = Executor("some_id", command, caller=daemon, blocking=False, killTimeout=3, logger=logger) try: output = e.execute() daemon.killProcess("some_id", logger) finally: daemon.shutdown() daemon.pyroDaemon.closedown() # the process was still running, timeout elapsed and was killed assert e.proc.poll() == -9 # try different port, even if the previous was released, # immediate rebinding attempt makes PYRO fail inputOptions = "-d DEBUG -p 6701" conf = ConfigFDTD(inputOptions.split()) apMon = None daemon = FDTD(conf, apMon, logger) # wait time 2s command = "bash %s 2" % f.name e = Executor("some_id", command, caller=daemon, blocking=False, killTimeout=4, logger=logger) try: output = e.execute() daemon.killProcess("some_id", logger) finally: daemon.shutdown() daemon.pyroDaemon.closedown() # the process should have normally finished, waiting killTimeout assert e.proc.poll() == 0
def testFDTDNotWaitingTimeoutWhenCleanupForced(): """ Test issues long running job (dd copy) on the background (non-blocking) and when killing the job, the timeout is set high. But no timeout is waited and the command is killed immediately. Raising ALARM in 2s never happens and command shall finish immediately (value of the flag changed in the signal handler never happens). Implementation of #33 - CleanupProcessesAction - attribute to ignore any wait-to-finish timeouts """ class Handler(object): def __init__(self, flag, testName): self.flag = flag self.testName = testName def signalHandler(self, signum, frame): print("test %s signal handler called (sig: %s)" % (self.testName, signum)) # sets flag to check whether some reaction was successfully # invoked self.flag = True c = """ [general] port = 6700 debug = DEBUG killCommand = ../wrapper_kill.sh %(pid)s portRangeFDTServer = 54321,54400 """ f = getTempFile(c) inputOption = "--config=%s" % f.name conf = ConfigFDTD(inputOption.split()) testName = inspect.stack()[0][3] logger = Logger(name=testName, level=logging.DEBUG) apMon = None fdtd = FDTD(conf, apMon, logger) # need long running job command = "dd if=/dev/zero of=/dev/null count=100000000 bs=102400" # set long timeout, shall be killed immediately anyway e = Executor("some_id", command, caller=fdtd, blocking=False, killTimeout=100, logger=logger) try: e.execute() # command remains is running now # try killing the command # since waitTimeout = False, shall be killed immediately cl = CleanupProcessesAction("some_id", timeout=1, waitTimeout=False) handler = Handler(False, testName) signal.signal(signal.SIGALRM, handler.signalHandler) assert handler.flag == False signal.alarm(1) # raise alarm in timeout seconds # should happen immediately so that ALARM is not raised cl.execute(conf=conf, caller=fdtd, apMon=None, logger=logger) signal.alarm(0) # disable alarm # the alarm shouldn't have been called - value should have # remained the same assert handler.flag == False finally: fdtd.shutdown() fdtd.pyroDaemon.closedown()
class ReceivingServerAction(Action): """ Receiving Server Action """ def __init__(self, options): """ Instance of this class is created by fdtcp and some parameters are passed (options), options is a dictionary. """ self.apMon = None self.id = options['transferId'] Action.__init__(self, self.id) self.options = options self.command = None self.status = -1 self.logger = self.options['logger'] self.caller = self.options['caller'] self.conf = self.options['conf'] self.executor = None self.port = None if 'apmonObj' in self.options.keys(): self.apMon = self.options['apmonObj'] def _setUp(self, conf, port): """ Setup Receiver server action objects """ # separate method for testing purposes, basically sets up command self.options["sudouser"] = self.options["gridUserDest"] self.options["port"] = port if 'monID' not in self.options: self.options["monID"] = self.id newOptions = self.options newOptions['apmonDest'] = conf.get("apMonDestinations") if not self.apMon: self.apMon = getApmonObj(newOptions['apmonDest']) self.command = conf.get("fdtReceivingServerCommand") % newOptions def _checkTargetFileNames(self, destFiles): """ #36 - additional checks to debug AlreadyBeingCreatedException Check all the paths in the supplied list and log whether the files as well as the dotName form of the files exist. The intention is to help debug HDFS AlreadyBeingCreatedException """ out = "" ind = ' ' * 4 for fileName in destFiles: exists = os.path.exists(fileName) out = ind.join([out, "exists %5s: %s\n" % (exists, fileName)]) dotName = '.' + os.path.basename(fileName) dotNameFull = os.path.join(os.path.dirname(fileName), dotName) exists = os.path.exists(dotNameFull) out = ind.join([out, "exists %5s: %s\n" % (exists, dotNameFull)]) return out def _checkAddrAlreadyInUseError(self, exMsg): """ Check if "Address already in use" error occurred and if so try to find out which process has the port. Also return what to raise. """ errMsg = "Address already in use" self.logger.debug("Checking for '%s' error message (port: %s) ... " % (errMsg, self.port)) addressUsedObj = re.compile(errMsg) match = addressUsedObj.search(exMsg) if not match: self.logger.debug( "Error message '%s' not found, different failure.") return FDTDException, exMsg self.logger.debug("'%s' problem detected, analyzing running " "processes ..." % errMsg) startTime = datetime.datetime.now() found = False procs = psutil.pids() self.logger.debug("Going to check %s processes ..." % len(procs)) for pid in procs: try: proc = psutil.Process(pid) conns = proc.connections() except psutil.AccessDenied: self.logger.debug("Access denied to process PID: %s, " "continue ..." % pid) continue for conn in conns: # It can have multiple connections, so need to check all try: connPort = int(conn.local_address[1]) if self.port == connPort: msg = ("Detected: process PID: %s occupies port: %s " "(user: %s, cmdline: %s)" % (pid, self.port, proc.username, " ".join( proc.cmdline))) self.logger.debug(msg) exMsg += msg found = True except AttributeError as ex: self.logger.debug( "Got unnexpected attribute error. %s %s" % (conn, ex)) continue if found: return PortInUseException, exMsg endTime = datetime.datetime.now() elapsed = old_div(((endTime - startTime).microseconds), 1000) self.logger.debug("Process checking is over, took %s ms." % elapsed) return FDTDException, exMsg def execute(self): """ This method is is called on the remote site where fdtd runs - here are also known all configuration details, thus final set up has to happen here. """ startTime = datetime.datetime.now() # may fail with subset of FDTDException which will be propagated if 'portServer' in self.options and self.options['portServer']: self.logger.info("Forcing to use user specified port %s" % self.options['portServer']) try: self.port = int(self.options['portServer']) except TypeError as ex: self.logger.info( "Provided portServer key is not convertable to integer: %s, Error: %s" % (self.options['portServer'], ex)) self.port = int(self.caller.getFreePort()) else: self.logger.info("Try to get a free port") self.port = int(self.caller.getFreePort()) self._setUp(self.conf, self.port) destFiles = self.options["destFiles"] self.logger.info("%s - checking presence of files at target " "location ..." % self.__class__.__name__) self.logger.debug("Results:\n%s" % self._checkTargetFileNames(destFiles)) user = self.options["sudouser"] self.logger.debug("Local grid user is '%s'" % user) self.executor = Executor(self.id, caller=self.caller, command=self.command, port=self.port, userName=user, logger=self.logger) try: output = self.executor.execute() # on errors, do not do any cleanup or port releasing, from # Executor.execute() the instance is in the container and shall # be handled by CleanupProcessesAction except Exception as ex: raiser, ex = self._checkAddrAlreadyInUseError(str(ex)) msg = ("Could not start FDT server on %s port: %s, reason: %s" % (getHostName(), self.port, ex)) self.logger.critical(msg, traceBack=True) self.status = -2 raise raiser(msg) else: rObj = Result(self.id) rObj.status = 0 self.status = 0 # port on which FDT Java server runs rObj.serverPort = self.port rObj.msg = "FDT server is running" rObj.log = output self.logger.debug("Response to client: %s" % rObj) endTime = datetime.datetime.now() elapsed = (endTime - startTime).seconds par = dict(id=self.id, fdt_server_init=elapsed) self.logger.debug("Starting FDT server lasted: %s [s]." % elapsed) if self.apMon: self.logger.debug("Sending data to ApMon ...") self.apMon.sendParameters("fdtd_server_writer", None, par) return rObj def getID(self): """ Returns transfer ID """ return self.id def getStatus(self): """ Returns class status """ return self.status def getHost(self): """ Returns hostname. """ return self.conf.get("hostname") or getHostName() def getMsg(self): """ Returns last raised message in the queue """ return self.executor.lastMessage() def getLog(self): """ Returns log file lines """ return self.executor.getLogs() def getServerPort(self): """Returns server port on which it is listening""" return self.port def executeWithLogOut(self): """ Execute transfer which will log everything back to calling client """ for line in self.executor.executeWithLogOut(): yield line def executeWithOutLogOut(self): """ Execute without log output to the client on the fly. Logs can be received from getLog """ return self.executor.executeWithOutLogOut()
class SendingClientAction(Action): """ Sending client class """ def __init__(self, options): """ Instance of this class is created by fdtcp and some parameters are passed (options), options is a dictionary. """ self.apMon = None self.id = options['transferId'] Action.__init__(self, self.id) self.options = options self.command = None self.status = -1 self.logger = self.options['logger'] self.caller = self.options['caller'] self.conf = self.options['conf'] self.executor = None self.port = None if 'apmonObj' in self.options.keys(): self.apMon = self.options['apmonObj'] def _setUp(self): """ Set up Sending client action """ # generate FDT fileList, put it into the same location as log file if self.conf.get("logFile"): logDir = os.path.dirname(self.conf.get("logFile")) else: logDir = "/tmp" directory = os.path.join(logDir, "fileLists") fileListName = os.path.join(directory, "fdt-fileList-%s" % self.id) try: if not os.path.exists(directory): os.mkdir(directory) except IOError as ex: msg = ("Could not create FDT client fileList file %s, " "reason: %s" % (fileListName, ex)) self.status = -2 raise FDTDException(msg) # this will be propagated to fdtcp with open(fileListName, 'w') as fd: for fileName in self.options["transferFiles"]: # is list of TransferFiles instances fd.write("%s\n" % fileName) self.options["fileList"] = fileListName if 'monID' not in self.options: self.options["monID"] = self.id newOptions = self.options newOptions['apmonDest'] = self.conf.get("apMonDestinations") if not self.apMon: self.apMon = getApmonObj(newOptions['apmonDest']) self.command = self.conf.get("fdtSendingClientCommand") % newOptions def execute(self): """ This method is invoked by fdtd once the action object is received from remote fdtcp (where the action instance was created). Options known on fdtd are set on the action instance (e.g. finalizing command for invoking FDT Java - location of fdt.jar is known only at fdtd site). """ # this method is called on PYRO service side, user its logger # from now on localGridUser = self.options["gridUserSrc"] self.logger.debug("Local grid user is '%s'" % localGridUser) self.options["sudouser"] = localGridUser self._setUp() killTimeout = self.conf.get("fdtSendingClientKillTimeout") self.executor = Executor(self.id, self.command, caller=self.caller, userName=localGridUser, logger=self.logger) try: try: output = self.executor.execute() except ExecutorException as ex: msg = ("FDT Java client on %s failed, " "reason: %s" % (getHostName(), ex)) self.logger.error(msg) raise FDTDException(msg) except Exception as ex: msg = ("FDT Java client on %s failed, " "reason: %s" % (getHostName(), ex)) self.logger.error(msg, traceBack=True) raise FDTDException(msg) else: # no other exception was raised during execution rObj = Result(self.id) rObj.status = 0 self.status = 0 rObj.log = output rObj.msg = "Output from FDT client" self.logger.debug("FDT client log (as sent to " "fdtcp):\n%s" % output) return rObj finally: # give signal on this actions Executor instance that its handling # finished (e.g. CleanupProcessesAction may be waiting for this) self.executor.syncFlag = False def getID(self): """ Returns transfer ID """ return self.id def getStatus(self): """ Returns class status """ return self.status def getHost(self): """ Returns hostname. """ return self.conf.get("hostname") or getHostName() def getMsg(self): """ Returns last raised message in the queue """ return self.executor.lastMessage() def getLog(self): """ Returns log file lines """ return self.executor.getLogs() def getServerPort(self): """Returns server port on which it is listening""" raise NotImplementedError("Sending client does not provide port info.") # This behaviour might change whenever we do pull mode. def executeWithLogOut(self): """ Execute transfer and yield log out to the client """ for line in self.executor.executeWithLogOut(): print(line, end='') yield line def executeWithOutLogOut(self): """ Execute without log output to the client on the fly. Logs can be received from getLog """ return self.executor.executeWithOutLogOut()
def testFDTDNotWaitingTimeoutWhenCleanupForced(): """ Test issues long running job (dd copy) on the background (non-blocking) and when killing the job, the timeout is set high. But no timeout is waited and the command is killed immediately. Raising ALARM in 2s never happens and command shall finish immediately (value of the flag changed in the signal handler never happens). Implementation of #33 - CleanupProcessesAction - attribute to ignore any wait-to-finish timeouts """ class Handler: def __init__(self, flag, testName): self.flag = flag self.testName = testName def signalHandler(self, signum, frame): print("test %s signal handler called (sig: %s)" % (self.testName, signum)) # sets flag to check whether some reaction was successfully # invoked self.flag = True c = """ [general] port = 6700 debug = DEBUG killCommand = ../wrapper_kill.sh %(pid)s portRangeFDTServer = 54321,54400 """ f = getTempFile(c) inputOption = "--config=%s" % f.name conf = ConfigFDTD(inputOption.split()) testName = inspect.stack()[0][3] logger = Logger(name=testName, level=logging.DEBUG) apMon = None fdtd = FDTD(conf, apMon, logger) # need long running job command = "dd if=/dev/zero of=/dev/null count=100000000 bs=102400" # set long timeout, shall be killed immediately anyway e = Executor("some_id", command, caller=fdtd, blocking=False, killTimeout=100, logger=logger) try: e.execute() # command remains is running now # try killing the command # since waitTimeout = False, shall be killed immediately cl = CleanupProcessesAction("some_id", timeout=1, waitTimeout=False) handler = Handler(False, testName) signal.signal(signal.SIGALRM, handler.signalHandler) assert handler.flag == False signal.alarm(1) # raise alarm in timeout seconds # should happen immediately so that ALARM is not raised cl.execute(conf=conf, caller=fdtd, apMon=None, logger=logger) signal.alarm(0) # disable alarm # the alarm shouldn't have been called - value should have # remained the same assert handler.flag == False finally: fdtd.shutdown() fdtd.pyroDaemon.closedown()
def testFDTDWaitingTimeoutWhenCleanup(): """ Test issues long running job (dd copy) on the background (non-blocking) and when killing the job, the timeout is set higher than issuing ALARM signal. It's tested that the ALARM signal was raised, but implemented as is is in fact ignored. More obvious when the killTimeout is set much higher. Implementation of #33 - CleanupProcessesAction - attribute to ignore any wait-to-finish timeouts """ class Handler: def __init__(self, flag, testName): self.flag = flag self.testName = testName def signalHandler(self, signum, frame): print("test %s signal handler called (sig: %s)" % (self.testName, signum)) # sets flag to check whether some reaction was successfully # invoked self.flag = True c = """ [general] port = 6700 debug = DEBUG killCommand = ../wrapper_kill.sh %(pid)s portRangeFDTServer = 54321,54400 """ f = getTempFile(c) inputOption = "--config=%s" % f.name conf = ConfigFDTD(inputOption.split()) testName = inspect.stack()[0][3] logger = Logger(name=testName, level=logging.DEBUG) apMon = None fdtd = FDTD(conf, apMon, logger) # need long running job command = "dd if=/dev/zero of=/dev/null count=100000000 bs=102400" # set long timeout (will be interrupted sooner by alarm - while # waiting on kill timeout) e = Executor("some_id", command, caller=fdtd, blocking=False, killTimeout=2, logger=logger) try: e.execute() # command remains is running now # try killing the command # since waitTimeout = True, kill will be waiting cl = CleanupProcessesAction("some_id", timeout=1, waitTimeout=True) handler = Handler(False, testName) signal.signal(signal.SIGALRM, handler.signalHandler) assert handler.flag == False print "test %s is waiting here ..." % testName signal.alarm(1) # raise alarm in timeout seconds cl.execute(conf=conf, caller=fdtd, apMon=None, logger=logger) signal.alarm(0) # disable alarm # but the alarm was called during this waiting (test flag value) assert handler.flag == True finally: fdtd.shutdown() fdtd.pyroDaemon.closedown()