def runJobMPISlave(mpiComm): """ This slave will be receiving commands to execute until 'None' is received. """ rank = mpiComm.Get_rank() hostname = getLocalHostName() print "Running runJobMPISlave: ", rank # Listen for commands until we get 'None' cwd = None # We run without changing directory by default env = None # And we don't change the environment either! while True: # Receive command in a non-blocking way req_recv = mpiComm.irecv(dest=0, tag=TAG_RUN_JOB + rank) while True: done, command = req_recv.test() if done: break sleep(1) print "Slave %s(rank %d) received command." % (hostname, rank) if command == "None": print " Stopping..." return # Run the command and get the result (exit code or exception) try: if command.startswith("cwd="): cwd = command.split("=", 1)[-1] print " Changing to dir %s ..." % cwd elif command.startswith("env="): env = loads(command.split("=", 1)[-1]) print " Setting the environment..." if envVarOn("SCIPION_DEBUG"): print env else: print " %s" % command runCommand(command, cwd=cwd, env=env) cwd = None # unset directory env = None # unset environment except Exception as e: req_send = mpiComm.isend(str(e), dest=0, tag=TAG_RUN_JOB + rank) t0 = time() while not req_send.test()[0]: sleep(1) if time() - t0 > TIMEOUT: print ("Timeout in process %d, cannot send error " "message to master." % os.getpid()) return return # Send 0 (it worked!) in a non-blocking way. req_send = mpiComm.isend(0, dest=0, tag=TAG_RUN_JOB + rank) t0 = time() while not req_send.test()[0]: sleep(1) if time() - t0 > TIMEOUT: print ("Timeout in process %d, cannot send result " "to master." % os.getpid()) return
def runJobMPISlave(mpiComm): """ This slave will be receiving commands to execute until 'None' is received. """ rank = mpiComm.Get_rank() hostname = getLocalHostName() print "Running runJobMPISlave: ", rank exitResult = 0 msg = "Timeout in process %d, cannot send result to master." # Listen for commands until we get 'None' cwd = None # We run without changing directory by default env = None # And we don't change the environment either! while True: # Receive command in a non-blocking way req_recv = mpiComm.irecv(dest=0, tag=TAG_RUN_JOB + rank) while True: done, command = req_recv.test() if done: break sleep(1) print "Slave %s(rank %d) received command." % (hostname, rank) if command == 'None': print " Stopping..." return # Run the command and get the result (exit code or exception) try: if command.startswith("cwd="): cwd = command.split("=", 1)[-1] print " Changing to dir %s ..." % cwd elif command.startswith("env="): env = loads(command.split("=", 1)[-1]) print " Setting the environment..." if envVarOn('SCIPION_DEBUG'): print env else: print " %s" % command runCommand(command, cwd=cwd, env=env) cwd = None # unset directory env = None # unset environment except Exception as e: msg = "Timeout in process %d, cannot send error message to master." exitResult = str(e) # Communicate to master, either error os success req_send = mpiComm.isend(exitResult, dest=0, tag=TAG_RUN_JOB + rank) t0 = time() while not req_send.test()[0]: sleep(1) if time() - t0 > TIMEOUT: print(msg % os.getpid()) break
def testAll(self): tempFolder = "allTransfer" filePaths = {} sourceFilesPathList = getFiles(self.localSourceFolder) remoteSourceFilePathList1 = ft.getRemoteFolderFiles(self.remoteHostName1, self.remoteUserName1, self.remotePassword1, self.remoteSourceFolder1) remoteSourceFilePathList2 = ft.getRemoteFolderFiles(self.remoteHostName2, self.remoteUserName2, self.remotePassword2, self.remoteSourceFolder2) localUserName = getLocalUserName() localHostname = getLocalHostName() for sourceFilePath in sourceFilesPathList: sourceFileName = basename(sourceFilePath) targetFilePath1 = join(self.remoteTargetFolder1, tempFolder, sourceFileName) targetFilePath2 = join(self.remoteTargetFolder2, tempFolder, sourceFileName) targetFilePath3 = join (self.localTargetFolder, tempFolder, sourceFileName) targetFilePathList = [] targetFilePathList.append(self.hostsRefs[0] + ":" + targetFilePath1) targetFilePathList.append(self.hostsRefs[1] + ":" + targetFilePath2) targetFilePathList.append(localUserName + "@" + localHostname + ":" + targetFilePath3) filePaths[sourceFilePath] = targetFilePathList for remoteSourceFilePath in remoteSourceFilePathList1: targetFilePathList = [] targetFilePathList.append(join(self.localTargetFolder, tempFolder, "test1", basename(remoteSourceFilePath))) filePaths[self.hostsRefs[0] + ":" + remoteSourceFilePath] = targetFilePathList for remoteSourceFilePath in remoteSourceFilePathList2: targetFilePathList = [] targetFilePathList.append(join(self.localTargetFolder, tempFolder, "test2", basename(remoteSourceFilePath))) filePaths[self.hostsRefs[1] + ":" + remoteSourceFilePath] = targetFilePathList self.fileTransfer.transferFiles(filePaths, self.hostPasswords, gatewayHosts=self.gatewayHosts, numberTrials=self.numberTrials, forceOperation=self.forceOperation, operationId=self.operationId) checkPathList = ft.getFilePathList(filePaths) passTest = len(self.fileTransfer.checkFiles(checkPathList, self.hostPasswords, gatewayHosts=self.gatewayHosts, numberTrials=self.numberTrials, forceOperation=self.forceOperation, operationId=self.operationId)) == 0 # self.fileTransfer.deleteFiles(checkPathList, self.hostPasswords, gatewayHosts=self.gatewayHosts, numberTrials=self.numberTrials, forceOperation=self.forceOperation, operationId=self.operationId) cleanPath(join(self.localTargetFolder, tempFolder)) ft.removeRemoteFolder(self.remoteHostName1, self.remoteUserName1, self.remotePassword1, join(self.remoteTargetFolder1, tempFolder)) ft.removeRemoteFolder(self.remoteHostName2, self.remoteUserName2, self.remotePassword2, join(self.remoteTargetFolder2, tempFolder)) self.assertTrue(passTest)
def runJobMPISlave(mpiComm): """ This slave will be receiving commands to execute until 'None' is received. """ rank = mpiComm.Get_rank() hostname = getLocalHostName() print(" Running MPIWorker: ", rank) exitResult = 0 # Listen for commands until we get 'None' cwd = None # We run without changing directory by default env = None # And we don't change the environment either! while True: # Receive command in a non-blocking way req_recv = mpiComm.irecv(source=0, tag=TAG_RUN_JOB + rank) while True: done, command = req_recv.test() if done: break sleep(1) print(" Worker %s(rank %d) received command." % (hostname, rank)) # We need to convert to string because req_recv.test() returns bytes or None if command == 'None': print(" Stopping...") return else: command = loads(command) # Run the command and get the result (exit code or exception) try: if command.startswith("cwd="): cwd = command.split("=", 1)[-1] print(" Changing to dir %s ..." % cwd) elif command.startswith("env="): env = command.split("=", 1)[-1] env = eval(env) print(" Setting the environment...") if Config.debugOn(): print(env) else: runCommand(command, cwd=cwd, env=env) cwd = None # unset directory env = None # unset environment except Exception as e: print(" Error in process %d (rank %d)" % (os.getpid(), rank)) import traceback traceback.print_exc() exitResult = str(e) # Communicate to master, either error os success req_send = mpiComm.isend(exitResult, dest=0, tag=TAG_RUN_JOB + rank) t0 = time() while not req_send.test()[0]: sleep(1) if time() - t0 > TIMEOUT: msg = " Error in process %d, cannot send error message to master." print(msg % os.getpid()) break