def parallelRun(self,verbose=False): import pytom_mpi from pytom.parallel.messages import StatusMessage,MessageError from pytom.basic.exceptions import ParameterError from pytom.basic.structures import PyTomClassError end = False if not pytom_mpi.isInitialised(): pytom_mpi.init() mpi_id = pytom_mpi.rank() while not end: #listen for messages mpi_msgString = pytom_mpi.receive() if verbose: print(mpi_msgString) try: #wait for job and start processing msg = ReconstructionMessage() msg.fromStr(mpi_msgString) self.setJob(msg) self.run() resultMsg = StatusMessage(mpi_id,'0') resultMsg.setStatus('Finished') pytom_mpi.send(str(resultMsg),0) except (MessageError,PyTomClassError,ParameterError): try: #as StatusMessage and finish msg = StatusMessage('','') msg.fromStr(mpi_msgString) if msg.getStatus() == 'End': end = True except (MessageError,PyTomClassError,ParameterError): #print mpi_msgString #raise MessageError('Message unknown!') print('Error parsing message. Message either unknown or invalid.') assert False except: print('wild except')
def parallelEnd(self, verbose=True): """ parallelEnd: End the parallel running of the program. """ import pytom_mpi from pytom.parallel.messages import StatusMessage if verbose == True: print(self.name + ': sending end messages to all') for i in range(pytom_mpi.size()): msg = StatusMessage(str(self.mpi_id), str(i)) msg.setStatus("End") pytom_mpi.send(str(msg), i)
def end(self, verbose=False): if verbose == True: print(self.node_name + ': sending end messages to others') from pytom.parallel.messages import StatusMessage mpi_numberNodes = pytom_mpi.size() mpi_myid = pytom_mpi.rank() for i in range(1, mpi_numberNodes): msg = StatusMessage(str(mpi_myid),str(i)) msg.setStatus("End") pytom_mpi.send(str(msg),i) pytom_mpi.finalise()
def run(self): import pytom_mpi from pytom.parallel.messages import StatusMessage,MessageError from pytom.basic.exceptions import ParameterError from pytom.basic.structures import PyTomClassError if not pytom_mpi.isInitialised(): pytom_mpi.init() end = False while not end: try: mpi_msgString = pytom_mpi.receive() msg = GrowingAverageJobMessage('','') msg.fromStr(mpi_msgString) self.fromJob(msg.getJob()) self._run() returnMessage = GrowingAverageResultMessage(self._mpiId,0) pytom_mpi.send(returnMessage.__str__(),0) except (MessageError,PyTomClassError,ParameterError,IndexError): #as StatusMessage and finish msg = StatusMessage('','') msg.fromStr(mpi_msgString) if msg.getStatus() == 'End': end = True
def parallelEnd(self): """ parallelEnd : Sends status message = end to all workers. All workers will terminate upon receiving this message. @author: Thomas Hrabe """ import pytom_mpi from pytom.parallel.messages import StatusMessage if not pytom_mpi.isInitialised(): pytom_mpi.init() mpi_myid = pytom_mpi.rank() mpi_numberNodes = pytom_mpi.size() for i in range(1, mpi_numberNodes): msg = StatusMessage(mpi_myid.__str__(), i.__str__()) msg.setStatus("End") pytom_mpi.send(msg.__str__(), i)
def parallelEnd(self, verbose=True): """ parallelEnd : Sends status message = end to all workers. @param verbose: verbose mode @type verbose: boolean """ if verbose == True: print('Manager: sending end messages to workers') import pytom_mpi from pytom.parallel.messages import StatusMessage mpi_numberNodes = pytom_mpi.size() mpi_myid = pytom_mpi.rank() for i in range(1, mpi_numberNodes): msg = StatusMessage(str(mpi_myid), str(i)) msg.setStatus("End") pytom_mpi.send(str(msg), i)
def parallelRun(self, verbose=True, **kwargs): ''' parallelRun: Run the worker in parallel status and send the result message back @param verbose: verbose mode @type verbose: boolean ''' from pytom.parallel.messages import StatusMessage, MessageError from pytom.basic.exceptions import ParameterError from pytom.basic.structures import PyTomClassError end = False while not end: # get the message string mpi_msgString = getMsgStr() try: msg = self.getJobMsg(mpi_msgString) self.jobFromMsg(msg) if verbose == True: print(self.name + ': running...') [resV, orientV, sumV, sqrV] = self.run(verbose, moreInfo=True) # write the result volume back to the disk resFilename = self.name + '_job' + str(self.jobID) + '_res.em' orientFilename = self.name + '_job' + str( self.jobID) + '_orient.em' resV.write(resFilename) orientV.write(orientFilename) if sumV and sqrV: sumFilename = self.name + '_job' + str( self.jobID) + '_sum.em' sqrFilename = self.name + '_job' + str( self.jobID) + '_sqr.em' sumV.write(sumFilename) sqrV.write(sqrFilename) from pytom.localization.structures import Volume, Orientation res = Volume(resFilename, self.volume.subregion) orient = Orientation(orientFilename) # construct the result from pytom.localization.peak_job import PeakResult result = PeakResult(res, orient, self.jobID) # sent the result back if verbose == True: print(self.name + ': sending back result') result.send(self.mpi_id, self.backTo) except (MessageError, PyTomClassError, ParameterError): try: # get the message as StatusMessage and finish msg = StatusMessage('', '') msg.fromStr(mpi_msgString) if msg.getStatus() == 'End': end = True if verbose == True: print(self.name + ': ending...') except (MessageError, PyTomClassError, ParameterError): print( 'Error parsing message. Message either unknown or invalid.' ) assert False
def parallelRun(self, job, splitX=0, splitY=0, splitZ=0, verbose=True, gpuID=-1): """ parallelRun: Parallel run the job on the computer cluster. @param job: job @type job: L{pytom.localization.peak_job.PeakJob} @param splitX: split part along the x dimension @type splitX: integer @param splitY: split part along the y dimension @type splitY: integer @param splitZ: split part along the z dimension @type splitZ: integer """ import pytom_mpi if self.mpi_id == 0: # send the first message # if not pytom_mpi.isInitialised(): # pytom_mpi.init() job.members = pytom_mpi.size() print('job members', job.members) job.send(0, 0) print("\n") self.gpuID = gpuID end = False while not end: # get the message string mpi_msgString = getMsgStr() msgType = self.getMsgType(mpi_msgString) if msgType == 2: # Job msg msg = self.getJobMsg(mpi_msgString) job = self.jobFromMsg(msg) # set members if self.mpi_id == 0: self.distributeJobs(job, splitX, splitY, splitZ) else: self.distributeJobs(job) result = self.run(verbose, gpuID=gpuID) self.summarize(result, self.jobID) elif msgType == 1: # Result msg msg = self.getResMsg(mpi_msgString) res = self.resFromMsg(msg) if verbose == True: print(self.name + ": processing result from worker " + msg.getSender()) resV = res.result.getVolume() resO = res.orient.getVolume() jobID = res.jobID self.summarize([resV, resO], jobID) elif msgType == 0: # Status msg # get the message as StatusMessage and finish from pytom.parallel.messages import StatusMessage msg = StatusMessage('', '') msg.fromStr(mpi_msgString) if msg.getStatus() == 'End': end = True if verbose == True: print(self.name + ': end') else: # Error raise RuntimeError("False message type!") if self.mpi_id == 0: # delete the temporary files on the disk import os files = os.listdir(self.dstDir) for name in files: if 'job' in name and '.em' in name and not 'sum' in name and not 'sqr' in name: os.remove(self.dstDir + '/' + name) if self.gpuID: gpuflag = '' else: gpuflag = '' # rename the result files name os.rename( self.dstDir + '/' + 'node_0_res.em', self.dstDir + '/' + 'scores_{}{}.em'.format(self.suffix, gpuflag)) os.rename( self.dstDir + '/' + 'node_0_orient.em', self.dstDir + '/' + 'angles_{}{}.em'.format(self.suffix, gpuflag)) self.clean() # clean itself pytom_mpi.finalise()
def parallelReconstruction(particleList, projectionList, cubeSize, binning, applyWeighting,verbose=False): """ parallelReconstruction """ import pytom_mpi from pytom.parallel.messages import StatusMessage if not pytom_mpi.isInitialised(): pytom_mpi.init() mpi_id = pytom_mpi.rank() if mpi_id == 0: firstDistribute = False numberWorkers = pytom_mpi.size() -1 #split particleList by number nodes splitSize = len(particleList) / numberWorkers pl = [] for i in range(0,len(particleList),splitSize): pl.append(particleList[i:i+splitSize]) for i in range(0,numberWorkers): msg = ReconstructionMessage(0,i+1,pl[i],projectionList,cubeSize, binning,applyWeighting) pytom_mpi.send(str(msg),i+1) finished = False msgCounter = 0 while not finished: mpi_msgString = pytom_mpi.receive() msg = StatusMessage(1,'0') msg.fromStr(mpi_msgString) if not msg.getStatus() == 'Finished': print('Worker ' + str(msg.getSender()) + ' sent status: ' + str(msg.getStatus())) msgCounter += 1 finished = msgCounter == numberWorkers for i in range(0,numberWorkers): msg = StatusMessage(mpi_id,'0') msg.setStatus('End') pytom_mpi.send(str(msg),i+1) else: worker = ReconstructionWorker() worker.parallelRun(verbose) pytom_mpi.finalise()
def distributedCorrelationMatrix(job, verbose=False): """ distributedCorrelationMatrix: Performs calculation of correlation matrix either on multiple processes or sequentially. """ import pytom_mpi pytom_mpi.init() if pytom_mpi.size() > 1: mpi_myid = pytom_mpi.rank() if mpi_myid == 0: manager = CMManager(job) manager.distributeCalculation(mpi_myid, verbose) manager.parallelEnd() manager.saveMatrix() else: from pytom.parallel.clusterMessages import CorrelationVectorMessage, CorrelationVectorJobMessage from pytom.parallel.messages import StatusMessage, MessageError end = False while not end: mpi_msg = pytom_mpi.receive() if verbose: print(mpi_msg) try: msg = CorrelationVectorJobMessage() msg.fromStr(mpi_msg) worker = CMWorker(msg.getJob()) #worker.dumpMsg2Log('node'+str(mpi_myid)+'.log', msg.__str__()) resultVector = worker.run() resultMessage = CorrelationVectorMessage(mpi_myid, 0) resultMessage.setVector(resultVector) #worker.dumpMsg2Log('node'+mpi_myid.__str__()+'.log', resultMessage.__str__()) if verbose and False: print(resultMessage) pytom_mpi.send(resultMessage.__str__(), 0) except (MessageError, RuntimeError, IndexError): msg = StatusMessage('', '') msg.fromStr(mpi_msg) if msg.getStatus() == 'End': end = True print('Node ' + mpi_myid.__str__() + ' finished') else: print('Sequential Processing! Running on one machine only!') manager = CMManager(job) manager.calculateMatrix() manager.saveMatrix() pytom_mpi.finalise()
def parallelWork(self, verbose=False, doFinalize=True): """ parallelWork: Distribute joblist to workers. Leave as it is. @param verbose: @param doFinalize: """ import pytom_mpi from pytom.parallel.messages import Message, StatusMessage, MessageError from pytom.basic.exceptions import ParameterError from pytom.basic.structures import PyTomClassError if not pytom_mpi.isInitialised(): pytom_mpi.init() if self._mpi_id == 0: #if current node == 0, be the master node numberJobs = len(self._jobList) if self._numberWorkers <= numberJobs: numberJobsToSend = self._numberWorkers else: numberJobsToSend = numberJobs for i in range(0, numberJobsToSend): #send out all first numberJobsToSend jobs pytom_mpi.send(str(self._jobList[i]), i + 1) numberFinishedJobs = 0 numberSentJobs = numberJobsToSend finished = numberSentJobs == numberJobs and numberFinishedJobs == numberJobs while not finished: #distribute remaining jobs to finished workers mpi_msgString = pytom_mpi.receive() msg = Message('1', '0') msg.fromStr(mpi_msgString) numberFinishedJobs += 1 if numberSentJobs < numberJobs: pytom_mpi.send(str(self._jobList[numberSentJobs]), int(msg.getSender())) numberSentJobs += 1 finished = numberSentJobs == numberJobs and numberFinishedJobs == numberJobs if doFinalize: for i in range(0, self._numberWorkers): msg = StatusMessage('0', i + 1) msg.setStatus('End') pytom_mpi.send(str(msg), i + 1) print('Sending end msg to:', i + 1) else: #if any other node id, be a worker node end = False while not end: #listen for messages mpi_msgString = pytom_mpi.receive() if verbose: print(mpi_msgString) try: #wait for job and start processing msg = self.getMsgObject(mpi_msgString) self.setJob(msg) self.run() resultMsg = StatusMessage(self._mpi_id, '0') resultMsg.setStatus('Finished') pytom_mpi.send(str(resultMsg), 0) except (MessageError, PyTomClassError, ParameterError): try: #message is a StatusMessage #if message status is End, finish this worker. #You can also add other statuses msg = StatusMessage('', '') msg.fromStr(mpi_msgString) if msg.getStatus() == 'End': end = True except (MessageError, PyTomClassError, ParameterError): #print mpi_msgString #raise MessageError('Message unknown!') raise RuntimeError( 'Error parsing message. Message either unknown or invalid.' ) except: raise RuntimeError( 'Something went terribly wrong. Aborting.') if doFinalize: pytom_mpi.finalise()