def testCleanupProcessesAction(): a = CleanupProcessesAction("some_id", timeout = 45) assert a.id == "some_id" assert a.timeout == 45 fdtd = Mock() r = a.execute(None, fdtd, None) assert r.status == 0
def testFDTDWaitingTimeoutWhenCleanup(): """ Test issues long running job (dd copy) on the background (non-blocking) and when killing the job, the timeout is set higher than issuing ALARM signal. It's tested that the ALARM signal was raised, but implemented as is is in fact ignored. More obvious when the killTimeout is set much higher. Implementation of #33 - CleanupProcessesAction - attribute to ignore any wait-to-finish timeouts """ class Handler(object): def __init__(self, flag, testName): self.flag = flag self.testName = testName def signalHandler(self, signum, frame): print("test %s signal handler called (sig: %s)" % (self.testName, signum)) # sets flag to check whether some reaction was successfully # invoked self.flag = True c = """ [general] port = 6700 debug = DEBUG killCommand = ../wrapper_kill.sh %(pid)s portRangeFDTServer = 54321,54400 """ f = getTempFile(c) inputOption = "--config=%s" % f.name conf = ConfigFDTD(inputOption.split()) testName = inspect.stack()[0][3] logger = Logger(name=testName, level=logging.DEBUG) apMon = None fdtd = FDTD(conf, apMon, logger) # need long running job command = "dd if=/dev/zero of=/dev/null count=100000000 bs=102400" # set long timeout (will be interrupted sooner by alarm - while # waiting on kill timeout) e = Executor("some_id", command, caller=fdtd, blocking=False, killTimeout=2, logger=logger) try: e.execute() # command remains is running now # try killing the command # since waitTimeout = True, kill will be waiting cl = CleanupProcessesAction("some_id", timeout=1, waitTimeout=True) handler = Handler(False, testName) signal.signal(signal.SIGALRM, handler.signalHandler) assert handler.flag == False print("test %s is waiting here ..." % testName) signal.alarm(1) # raise alarm in timeout seconds cl.execute(conf=conf, caller=fdtd, apMon=None, logger=logger) signal.alarm(0) # disable alarm # but the alarm was called during this waiting (test flag value) assert handler.flag finally: fdtd.shutdown() fdtd.pyroDaemon.closedown()
def testCleanupProcessesAction(): logger = Logger() a = CleanupProcessesAction("some_id", timeout=4) assert a.id == "some_id" assert a.timeout == 4 assert a.waitTimeout fdtd = Mock() # wihtout this, it remains in the loop, see # CleanupProcessesAction.execute() fdtd.getExecutor().syncFlag = False r = a.execute(caller=fdtd, logger=logger) assert r.status == 0 a = CleanupProcessesAction("some_id", timeout=5, waitTimeout=False) assert a.waitTimeout == False
def testFDTDServiceOpenFiles(): """ #41 - Too many open files (fdtd side) """ hostName = os.uname()[1] f = getTempFile(functionalFDTDConfiguration) inputOption = "--config=%s" % f.name conf = ConfigFDTD(inputOption.split()) conf.sanitize() testName = inspect.stack()[0][3] logger = Logger(name=testName, logFile="/tmp/fdtdtest-%s.log" % testName, level=logging.DEBUG) apMon = None fdtd = FDTD(conf, apMon, logger) proc = Process(os.getpid()) initStateNumOpenFiles = len(proc.get_open_files()) for testAction in [TestAction("fakeSrc", "fakeDst") for i in range(3)]: r = fdtd.service.service(testAction) logger.debug("Result: %s" % r) assert r.status == 0 # after TestAction, there should not be left behind any open files numOpenFilesNow = len(proc.get_open_files()) assert initStateNumOpenFiles == numOpenFilesNow # test on ReceivingServerAction - it's action after which the # separate logger is not closed, test the number of open files went +1, # send CleanupProcessesAction and shall again remain # initStateNumOpenFiles send appropriate TestAction first (like in real) serverId = "server-id" testAction = TestAction(hostName, hostName) testAction.id = serverId r = fdtd.service.service(testAction) assert r.status == 0 options = dict(gridUserDest="someuserDest", clientIP=os.uname()[1], destFiles=[]) recvServerAction = ReceivingServerAction(testAction.id, options) r = fdtd.service.service(recvServerAction) print r.msg assert r.status == 0 numOpenFilesNow = len(proc.get_open_files()) # there should be only 1 extra opened file now assert initStateNumOpenFiles == numOpenFilesNow - 1 cleanupAction = CleanupProcessesAction(serverId, timeout=2) r = fdtd.service.service(cleanupAction) print r.msg assert r.status == 0 numOpenFilesNow = len(proc.get_open_files()) assert initStateNumOpenFiles == numOpenFilesNow fdtd.shutdown() fdtd.pyroDaemon.closedown() logger.close()
def performCleanup(self, waitTimeout=True): """ waitTimeout: whether fdtd shall wait timeout period before killing the process. """ if len(self.toCleanup) < 1: self.logger.info("Cleanup action - nothing to clean up, " "transfer: %s" % self) return cleanupStartTime = datetime.datetime.now() self.logger.info("Cleanup action, transfer: %s - terminate remote " "processes ..." % self) self.logger.debug("%s items in the clean up container." % len(self.toCleanup)) timeout = int(self.conf.get("timeout")) cl = CleanupProcessesAction(self.id, timeout=timeout, waitTimeout=waitTimeout) # iterate the container from the tail, the last one added was # client and client should have finished by this point anyway ... self.toCleanup.reverse() # have to have another copy of the container to iterate # over, the original will be shrunk tmpToCleanup = copy.deepcopy(self.toCleanup) for uri in tmpToCleanup: # have to try to get another PYRO proxy, should the previous # one be blocked and every subsequent call on it will time out # and will not even be received by fdtd (ticket #26, though # this method already had another # FDTCopy, resp. PYRO instantiation) fdtCopy = FDTCopy(uri, self.logger) try: fdtCopy.perform(cl) except FDTCopyException, ex: self.logger.error("During clean up: %s" % ex) # TODO # have to be specific about exceptions here, shall exhaust the # whole list though likely FDTDException may happen here and is # translated into FDTCopyException above except Exception, ex: self.logger.error("Unspecified exception during clean " "up: %s" % ex)
def testFDTDServiceOpenFilesFullTransfer(): """ #41:comment:8 - Too many open files (fdtd side) SendingClient actually removed itself from the executors container once it finishes so subsequent CleanupProcessesAction doesn't know about this process, nor about its open separate log file, which doesn't get closed. Simulate a simple successful transfer, send all actions and check number of open files - does all as it happens in fdtd.service() """ hostName = os.uname()[1] testName = inspect.stack()[0][3] initStateNumOpenFilesTestStart, filesStr = getOpenFilesList() print("%s: test 0: open files: %s items:\n%s" % (testName, initStateNumOpenFilesTestStart, filesStr)) # there should not be any open files now assert initStateNumOpenFilesTestStart == 0 f = getTempFile(functionalFDTDConfiguration) inputOption = "--config=%s --port=10001" % f.name confServer = ConfigFDTD(inputOption.split()) confServer.sanitize() loggerServer = Logger(name=testName, logFile="/tmp/fdtdtest-%s-writer.log" % testName, level=logging.DEBUG) apMon = None fdtdServer = FDTD(confServer, apMon, loggerServer) inputOption = "--config=%s --port=10002" % f.name confReader = ConfigFDTD(inputOption.split()) confReader.sanitize() loggerReader = Logger(name=testName, logFile="/tmp/fdtdtest-%s-reader.log" % testName, level=logging.DEBUG) apMon = None fdtdReader = FDTD(confReader, apMon, loggerReader) # -2 open log files, additional -1 is the temp config file initStateNumOpenFiles, filesStr = getOpenFilesList() print("%s: test 1: open files: %s items:\n%s" % (testName, initStateNumOpenFiles, filesStr)) assert initStateNumOpenFilesTestStart == initStateNumOpenFiles - 2 - 1 testActionServer = TestAction(hostName, hostName) testActionServer.id = testActionServer.id + "-writer" r = fdtdServer.service.service(testActionServer) assert r.status == 0 options = dict(gridUserDest="someuserDest", clientIP=os.uname()[1], destFiles=["/dev/null"]) recvServerAction = ReceivingServerAction(testActionServer.id, options) r = fdtdServer.service.service(recvServerAction) print r.msg assert r.status == 0 serverFDTPort = r.serverPort # there should be only 1 extra opened file now - ReceivingServerAction # separate log numOpenFilesNow, filesStr = getOpenFilesList() print("%s: test 2: open files: %s items:\n%s" % (testName, numOpenFilesNow, filesStr)) assert initStateNumOpenFiles == numOpenFilesNow - 1 testActionReader = TestAction(hostName, hostName) testActionReader.id = testActionReader.id + "-reader" r = fdtdReader.service.service(testActionReader) assert r.status == 0 files = [TransferFile("/etc/passwd", "/dev/null")] # list of TransferFile options = dict(port=serverFDTPort, hostDest=os.uname()[1], transferFiles=files, gridUserSrc="soemuserSrc") sndClientAction = SendingClientAction(testActionReader.id, options) r = fdtdReader.service.service(sndClientAction) assert r.status == 0 # there should be +2 extra - for separate both server and client numOpenFilesNow, filesStr = getOpenFilesList() print("%s: test 3: open files: %s items:\n%s" % (testName, numOpenFilesNow, filesStr)) # 2 extra files - separate transfer log at both ends assert initStateNumOpenFiles == numOpenFilesNow - 2 # now the transfer is over, both server (writer) and sender (reader) # parties kept their separate log files open, CleanupProcessesAction # will close them print "going to clean up" cl = CleanupProcessesAction(testActionReader.id, waitTimeout=False) r = fdtdReader.service.service(cl) assert r.status == 0 # one shall be closed now numOpenFilesNow, filesStr = getOpenFilesList() print("%s: test 4: open files: %s items:\n%s" % (testName, numOpenFilesNow, filesStr)) assert initStateNumOpenFiles == numOpenFilesNow - 1 cl = CleanupProcessesAction(testActionServer.id, waitTimeout=False) r = fdtdServer.service.service(cl) assert r.status == 0 # both separate log files should be closed now # problem #41:comment:8 was here - server behaved correctly, but # reader kept its separate log file open numOpenFilesNow, filesStr = getOpenFilesList() print("%s: test 5: open files: %s items:\n%s" % (testName, numOpenFilesNow, filesStr)) assert initStateNumOpenFiles == numOpenFilesNow fdtdServer.shutdown() fdtdServer.pyroDaemon.closedown() loggerServer.close() fdtdReader.shutdown() fdtdReader.pyroDaemon.closedown() loggerReader.close() # after even log files were closed, etc numOpenFilesNow, filesStr = getOpenFilesList() print("%s: test 6: open files: %s items:\n%s" % (testName, numOpenFilesNow, filesStr)) # -1: the temp configuration file is still open assert initStateNumOpenFilesTestStart == numOpenFilesNow - 1
def testAddressAlreadyInUseRoundRobinPortReservation(): """ #38 - Address already in use FDT Java https://trac.hep.caltech.edu/trac/fdtcp/ticket/38 Address already in use problem was seen during #5:comment:20 https://trac.hep.caltech.edu/trac/fdtcp/ticket/5#comment:20 2 times out of 338 transfer (attempts). Probably, when there is traffic the port can't be bound immediately even if it was released very short ago by the previous process. This test could not reproduce the problem (when reusing immediately the same port number for the next request), so FDTD.getFreePort() was reimplemented to reserver ports on round-robin basis. """ hostName = os.uname()[1] f = getTempFile(functionalFDTDConfiguration) inputOption = "--config=%s" % f.name conf = ConfigFDTD(inputOption.split()) conf.sanitize() testName = inspect.stack()[0][3] logger = Logger(name=testName, logFile="/tmp/fdtdtest-%s.log" % testName, level=logging.DEBUG) apMon = None fdtd = FDTD(conf, apMon, logger) # launch two subsequent ReceivingServerAction, second will likely fail # to bind the same, just very short ago, released port serverId = "%s" % testName testAction = TestAction(hostName, hostName) testAction.id = serverId # do TestAction r = fdtd.service.service(testAction) assert r.status == 0 options = dict(gridUserDest="someuserDest", clientIP=os.uname()[1], destFiles=[]) recvServerAction = ReceivingServerAction(testAction.id, options) # do ReceivingServerAction - start FDT Java server r = fdtd.service.service(recvServerAction) print r.msg assert r.status == 0 assert r.serverPort == 54321 cleanupAction = CleanupProcessesAction(serverId, timeout=0, waitTimeout=False) # do CleanupProcessesAction - shut FDT Java server, port shall be # released r = fdtd.service.service(cleanupAction) print r.msg assert r.status == 0 # do another ReceivingServerAction - start FDT Java server r = fdtd.service.service(recvServerAction) print r.msg assert r.status == 0 # will not get the same port, but the next one in the range assert r.serverPort == 54322 # in fact, if separate log files are enabled, after this last # ReceivingServerAction, there is a separate log file open. # taking the the service down, it should also closed it's related # to open files #41 problem fdtd.shutdown() fdtd.pyroDaemon.closedown() logger.close()
def testFDTDNotWaitingTimeoutWhenCleanupForced(): """ Test issues long running job (dd copy) on the background (non-blocking) and when killing the job, the timeout is set high. But no timeout is waited and the command is killed immediately. Raising ALARM in 2s never happens and command shall finish immediately (value of the flag changed in the signal handler never happens). Implementation of #33 - CleanupProcessesAction - attribute to ignore any wait-to-finish timeouts """ class Handler: def __init__(self, flag, testName): self.flag = flag self.testName = testName def signalHandler(self, signum, frame): print("test %s signal handler called (sig: %s)" % (self.testName, signum)) # sets flag to check whether some reaction was successfully # invoked self.flag = True c = """ [general] port = 6700 debug = DEBUG killCommand = ../wrapper_kill.sh %(pid)s portRangeFDTServer = 54321,54400 """ f = getTempFile(c) inputOption = "--config=%s" % f.name conf = ConfigFDTD(inputOption.split()) testName = inspect.stack()[0][3] logger = Logger(name=testName, level=logging.DEBUG) apMon = None fdtd = FDTD(conf, apMon, logger) # need long running job command = "dd if=/dev/zero of=/dev/null count=100000000 bs=102400" # set long timeout, shall be killed immediately anyway e = Executor("some_id", command, caller=fdtd, blocking=False, killTimeout=100, logger=logger) try: e.execute() # command remains is running now # try killing the command # since waitTimeout = False, shall be killed immediately cl = CleanupProcessesAction("some_id", timeout=1, waitTimeout=False) handler = Handler(False, testName) signal.signal(signal.SIGALRM, handler.signalHandler) assert handler.flag == False signal.alarm(1) # raise alarm in timeout seconds # should happen immediately so that ALARM is not raised cl.execute(conf=conf, caller=fdtd, apMon=None, logger=logger) signal.alarm(0) # disable alarm # the alarm shouldn't have been called - value should have # remained the same assert handler.flag == False finally: fdtd.shutdown() fdtd.pyroDaemon.closedown()
def testFDTDWaitingTimeoutWhenCleanup(): """ Test issues long running job (dd copy) on the background (non-blocking) and when killing the job, the timeout is set higher than issuing ALARM signal. It's tested that the ALARM signal was raised, but implemented as is is in fact ignored. More obvious when the killTimeout is set much higher. Implementation of #33 - CleanupProcessesAction - attribute to ignore any wait-to-finish timeouts """ class Handler: def __init__(self, flag, testName): self.flag = flag self.testName = testName def signalHandler(self, signum, frame): print("test %s signal handler called (sig: %s)" % (self.testName, signum)) # sets flag to check whether some reaction was successfully # invoked self.flag = True c = """ [general] port = 6700 debug = DEBUG killCommand = ../wrapper_kill.sh %(pid)s portRangeFDTServer = 54321,54400 """ f = getTempFile(c) inputOption = "--config=%s" % f.name conf = ConfigFDTD(inputOption.split()) testName = inspect.stack()[0][3] logger = Logger(name=testName, level=logging.DEBUG) apMon = None fdtd = FDTD(conf, apMon, logger) # need long running job command = "dd if=/dev/zero of=/dev/null count=100000000 bs=102400" # set long timeout (will be interrupted sooner by alarm - while # waiting on kill timeout) e = Executor("some_id", command, caller=fdtd, blocking=False, killTimeout=2, logger=logger) try: e.execute() # command remains is running now # try killing the command # since waitTimeout = True, kill will be waiting cl = CleanupProcessesAction("some_id", timeout=1, waitTimeout=True) handler = Handler(False, testName) signal.signal(signal.SIGALRM, handler.signalHandler) assert handler.flag == False print "test %s is waiting here ..." % testName signal.alarm(1) # raise alarm in timeout seconds cl.execute(conf=conf, caller=fdtd, apMon=None, logger=logger) signal.alarm(0) # disable alarm # but the alarm was called during this waiting (test flag value) assert handler.flag == True finally: fdtd.shutdown() fdtd.pyroDaemon.closedown()
def testFDTDNotWaitingTimeoutWhenCleanupForced(): """ Test issues long running job (dd copy) on the background (non-blocking) and when killing the job, the timeout is set high. But no timeout is waited and the command is killed immediately. Raising ALARM in 2s never happens and command shall finish immediately (value of the flag changed in the signal handler never happens). Implementation of #33 - CleanupProcessesAction - attribute to ignore any wait-to-finish timeouts """ class Handler(object): def __init__(self, flag, testName): self.flag = flag self.testName = testName def signalHandler(self, signum, frame): print("test %s signal handler called (sig: %s)" % (self.testName, signum)) # sets flag to check whether some reaction was successfully # invoked self.flag = True c = """ [general] port = 6700 debug = DEBUG killCommand = ../wrapper_kill.sh %(pid)s portRangeFDTServer = 54321,54400 """ f = getTempFile(c) inputOption = "--config=%s" % f.name conf = ConfigFDTD(inputOption.split()) testName = inspect.stack()[0][3] logger = Logger(name=testName, level=logging.DEBUG) apMon = None fdtd = FDTD(conf, apMon, logger) # need long running job command = "dd if=/dev/zero of=/dev/null count=100000000 bs=102400" # set long timeout, shall be killed immediately anyway e = Executor("some_id", command, caller=fdtd, blocking=False, killTimeout=100, logger=logger) try: e.execute() # command remains is running now # try killing the command # since waitTimeout = False, shall be killed immediately cl = CleanupProcessesAction("some_id", timeout=1, waitTimeout=False) handler = Handler(False, testName) signal.signal(signal.SIGALRM, handler.signalHandler) assert handler.flag == False signal.alarm(1) # raise alarm in timeout seconds # should happen immediately so that ALARM is not raised cl.execute(conf=conf, caller=fdtd, apMon=None, logger=logger) signal.alarm(0) # disable alarm # the alarm shouldn't have been called - value should have # remained the same assert handler.flag == False finally: fdtd.shutdown() fdtd.pyroDaemon.closedown()