def parallelRun(self,verbose=False): import pytom_mpi from pytom.parallel.messages import StatusMessage,MessageError from pytom.basic.exceptions import ParameterError from pytom.basic.structures import PyTomClassError end = False if not pytom_mpi.isInitialised(): pytom_mpi.init() mpi_id = pytom_mpi.rank() while not end: #listen for messages mpi_msgString = pytom_mpi.receive() if verbose: print(mpi_msgString) try: #wait for job and start processing msg = ReconstructionMessage() msg.fromStr(mpi_msgString) self.setJob(msg) self.run() resultMsg = StatusMessage(mpi_id,'0') resultMsg.setStatus('Finished') pytom_mpi.send(str(resultMsg),0) except (MessageError,PyTomClassError,ParameterError): try: #as StatusMessage and finish msg = StatusMessage('','') msg.fromStr(mpi_msgString) if msg.getStatus() == 'End': end = True except (MessageError,PyTomClassError,ParameterError): #print mpi_msgString #raise MessageError('Message unknown!') print('Error parsing message. Message either unknown or invalid.') assert False except: print('wild except')
def run(self): import pytom_mpi from pytom.parallel.messages import StatusMessage,MessageError from pytom.basic.exceptions import ParameterError from pytom.basic.structures import PyTomClassError if not pytom_mpi.isInitialised(): pytom_mpi.init() end = False while not end: try: mpi_msgString = pytom_mpi.receive() msg = GrowingAverageJobMessage('','') msg.fromStr(mpi_msgString) self.fromJob(msg.getJob()) self._run() returnMessage = GrowingAverageResultMessage(self._mpiId,0) pytom_mpi.send(returnMessage.__str__(),0) except (MessageError,PyTomClassError,ParameterError,IndexError): #as StatusMessage and finish msg = StatusMessage('','') msg.fromStr(mpi_msgString) if msg.getStatus() == 'End': end = True
def send(self, source, destination): from pytom.localization.peak_job_msg import PeakResultMsg msg = PeakResultMsg(str(source), str(destination)) msg.setResult(self) import pytom_mpi print(f'destination: {destination}\ntype: {source}') pytom_mpi.send(str(msg), int(destination))
def parallelEnd(self, verbose=True): """ parallelEnd: End the parallel running of the program. """ import pytom_mpi from pytom.parallel.messages import StatusMessage if verbose == True: print(self.name + ': sending end messages to all') for i in range(pytom_mpi.size()): msg = StatusMessage(str(self.mpi_id), str(i)) msg.setStatus("End") pytom_mpi.send(str(msg), i)
def end(self, verbose=False): if verbose == True: print(self.node_name + ': sending end messages to others') from pytom.parallel.messages import StatusMessage mpi_numberNodes = pytom_mpi.size() mpi_myid = pytom_mpi.rank() for i in range(1, mpi_numberNodes): msg = StatusMessage(str(mpi_myid),str(i)) msg.setStatus("End") pytom_mpi.send(str(msg),i) pytom_mpi.finalise()
def parallelEnd(self): """ parallelEnd : Sends status message = end to all workers. All workers will terminate upon receiving this message. @author: Thomas Hrabe """ import pytom_mpi from pytom.parallel.messages import StatusMessage if not pytom_mpi.isInitialised(): pytom_mpi.init() mpi_myid = pytom_mpi.rank() mpi_numberNodes = pytom_mpi.size() for i in range(1, mpi_numberNodes): msg = StatusMessage(mpi_myid.__str__(), i.__str__()) msg.setStatus("End") pytom_mpi.send(msg.__str__(), i)
def send(self, source, destination): """ send: Send the job-relevant message from source to destination @param source: source machine id gained from pytom_mpi @type source: int @param destination: destination machine id @type destination: int @author: chen """ from pytom.localization.peak_job_msg import PeakJobMsg # self.check() msg = PeakJobMsg(str(source), str(destination)) msg.setJob(self) import pytom_mpi print(f'destination: {destination}\ntype: {type(destination)}') pytom_mpi.send(str(msg), int(destination))
def parallelEnd(self, verbose=True): """ parallelEnd : Sends status message = end to all workers. @param verbose: verbose mode @type verbose: boolean """ if verbose == True: print('Manager: sending end messages to workers') import pytom_mpi from pytom.parallel.messages import StatusMessage mpi_numberNodes = pytom_mpi.size() mpi_myid = pytom_mpi.rank() for i in range(1, mpi_numberNodes): msg = StatusMessage(str(mpi_myid), str(i)) msg.setStatus("End") pytom_mpi.send(str(msg), i)
def send_result(self, result): pickled = pickle.dumps(result, protocol=0, fix_imports=True).decode('utf-8') pytom_mpi.send(pickled, 0)
def send_job(self, job, dest): pickled = pickle.dumps(job, protocol=0, fix_imports=True).decode('utf-8') pytom_mpi.send(pickled, dest)
def send_result(self, result): pytom_mpi.send(str(result), 0)
def send_job(self, job, dest): """ @param job: FRM job @type job: L{FRMJob} """ pytom_mpi.send(str(job), dest)
def send_job(self, job, dest): pytom_mpi.send(str(job), dest)
def parallelWork(self, verbose=False, doFinalize=True): """ parallelWork: Distribute joblist to workers. Leave as it is. @param verbose: @param doFinalize: """ import pytom_mpi from pytom.parallel.messages import Message, StatusMessage, MessageError from pytom.basic.exceptions import ParameterError from pytom.basic.structures import PyTomClassError if not pytom_mpi.isInitialised(): pytom_mpi.init() if self._mpi_id == 0: #if current node == 0, be the master node numberJobs = len(self._jobList) if self._numberWorkers <= numberJobs: numberJobsToSend = self._numberWorkers else: numberJobsToSend = numberJobs for i in range(0, numberJobsToSend): #send out all first numberJobsToSend jobs pytom_mpi.send(str(self._jobList[i]), i + 1) numberFinishedJobs = 0 numberSentJobs = numberJobsToSend finished = numberSentJobs == numberJobs and numberFinishedJobs == numberJobs while not finished: #distribute remaining jobs to finished workers mpi_msgString = pytom_mpi.receive() msg = Message('1', '0') msg.fromStr(mpi_msgString) numberFinishedJobs += 1 if numberSentJobs < numberJobs: pytom_mpi.send(str(self._jobList[numberSentJobs]), int(msg.getSender())) numberSentJobs += 1 finished = numberSentJobs == numberJobs and numberFinishedJobs == numberJobs if doFinalize: for i in range(0, self._numberWorkers): msg = StatusMessage('0', i + 1) msg.setStatus('End') pytom_mpi.send(str(msg), i + 1) print('Sending end msg to:', i + 1) else: #if any other node id, be a worker node end = False while not end: #listen for messages mpi_msgString = pytom_mpi.receive() if verbose: print(mpi_msgString) try: #wait for job and start processing msg = self.getMsgObject(mpi_msgString) self.setJob(msg) self.run() resultMsg = StatusMessage(self._mpi_id, '0') resultMsg.setStatus('Finished') pytom_mpi.send(str(resultMsg), 0) except (MessageError, PyTomClassError, ParameterError): try: #message is a StatusMessage #if message status is End, finish this worker. #You can also add other statuses msg = StatusMessage('', '') msg.fromStr(mpi_msgString) if msg.getStatus() == 'End': end = True except (MessageError, PyTomClassError, ParameterError): #print mpi_msgString #raise MessageError('Message unknown!') raise RuntimeError( 'Error parsing message. Message either unknown or invalid.' ) except: raise RuntimeError( 'Something went terribly wrong. Aborting.') if doFinalize: pytom_mpi.finalise()
def parallelReconstruction(particleList, projectionList, cubeSize, binning, applyWeighting,verbose=False): """ parallelReconstruction """ import pytom_mpi from pytom.parallel.messages import StatusMessage if not pytom_mpi.isInitialised(): pytom_mpi.init() mpi_id = pytom_mpi.rank() if mpi_id == 0: firstDistribute = False numberWorkers = pytom_mpi.size() -1 #split particleList by number nodes splitSize = len(particleList) / numberWorkers pl = [] for i in range(0,len(particleList),splitSize): pl.append(particleList[i:i+splitSize]) for i in range(0,numberWorkers): msg = ReconstructionMessage(0,i+1,pl[i],projectionList,cubeSize, binning,applyWeighting) pytom_mpi.send(str(msg),i+1) finished = False msgCounter = 0 while not finished: mpi_msgString = pytom_mpi.receive() msg = StatusMessage(1,'0') msg.fromStr(mpi_msgString) if not msg.getStatus() == 'Finished': print('Worker ' + str(msg.getSender()) + ' sent status: ' + str(msg.getStatus())) msgCounter += 1 finished = msgCounter == numberWorkers for i in range(0,numberWorkers): msg = StatusMessage(mpi_id,'0') msg.setStatus('End') pytom_mpi.send(str(msg),i+1) else: worker = ReconstructionWorker() worker.parallelRun(verbose) pytom_mpi.finalise()
def distributeCalculation(self, mpi_myid, verbose=False): """ distributeCalculation: Distribute calculation of matrix to multiple nodes. """ import pytom_mpi from pytom.cluster.correlationMatrixStructures import CorrelationVectorJob from pytom.parallel.clusterMessages import CorrelationVectorJobMessage, CorrelationVectorMessage from pytom.tools.ProgressBar import FixedProgBar if not mpi_myid == 0: raise Exception( 'This function (distributeCalculation) can only be processed by mpi_id = 0! ID == ' + mpi_myid.__str__() + ' Aborting!') mpi_myname = 'node_' + mpi_myid.__str__() mpi_numberNodes = pytom_mpi.size() particleIndex = 0 progressBar = FixedProgBar(0, len(self._particleList), 'Particles correlated ') progressBar.update(0) #distribute on all nodes for nodeIndex in range(1, mpi_numberNodes): if particleIndex < len(self._particleList): particle = self._particleList[particleIndex] reducedParticleList = self._particleList[particleIndex + 1:] job = CorrelationVectorJob(particle, reducedParticleList, self._mask, particleIndex, self._applyWedge, self._binningFactor, self._lowestFrequency, self._highestFrequency) jobMsg = CorrelationVectorJobMessage(str(mpi_myid), str(nodeIndex)) jobMsg.setJob(job) if verbose: print(jobMsg) pytom_mpi.send(str(jobMsg), nodeIndex) particleIndex = particleIndex + 1 numberVectorsReceived = 0 finished = numberVectorsReceived > len(self._particleList) while not finished: #listen until numberVectorsReceived > len(self._particleList) and continue distributing mpi_msgString = pytom_mpi.receive() if verbose: print(mpi_msgString) correlationVectorMsg = CorrelationVectorMessage() correlationVectorMsg.fromStr(mpi_msgString) assert correlationVectorMsg.__str__() == mpi_msgString vector = correlationVectorMsg.getVector() self._setMatrixValuesFromVector(vector.getParticleIndex(), vector) self._savePreliminaryResult() #print 'Result received from ' + correlationVectorMsg.getSender().__str__() + ' and matrix saved to disk.' numberVectorsReceived = numberVectorsReceived + 1 if particleIndex < len(self._particleList): #print 'Send particle number :' , particleIndex particle = self._particleList[particleIndex] reducedParticleList = self._particleList[particleIndex + 1:] job = CorrelationVectorJob(particle, reducedParticleList, self._mask, particleIndex, self._applyWedge, self._binningFactor, self._lowestFrequency, self._highestFrequency) jobMsg = CorrelationVectorJobMessage( mpi_myid.__str__(), correlationVectorMsg.getSender().__str__()) jobMsg.setJob(job) pytom_mpi.send(jobMsg.__str__(), int(correlationVectorMsg.getSender())) particleIndex = particleIndex + 1 #update progress bar progressBar.update(numberVectorsReceived) finished = numberVectorsReceived >= len(self._particleList)
def distributeExpectation(particleLists, iterationDirectory, averagePrefix, verbose=False, symmetry=None): """ distributeExpectation: Distributes particle expectation (averaging) to multiple workers. Required by many algorithms such as MCOEXMX @param particleLists: list of particleLists @param iterationDirectory: @param averagePrefix: @param verbose: @param symmetry: """ import pytom_mpi from pytom.tools.files import checkDirExists from pytom.parallel.alignmentMessages import ExpectationJobMsg, ExpectationResultMsg from pytom.alignment.structures import ExpectationJob from pytom.basic.structures import Reference, ReferenceList from os import mkdir if not pytom_mpi.isInitialised(): pytom_mpi.init() mpi_myid = pytom_mpi.rank() if not mpi_myid == 0: raise RuntimeError( 'This function (distributeExpectation) can only be processed by mpi_id = 0! ID == ' + str(mpi_myid) + ' Aborting!') if not checkDirExists(iterationDirectory): raise IOError('The iteration directory does not exist. ' + iterationDirectory) mpi_numberNodes = pytom_mpi.size() if mpi_numberNodes <= 1: raise RuntimeError( 'You must run clustering with openMPI on multiple CPUs') listIterator = 0 referenceList = ReferenceList() #distribute jobs to all nodes for i in range(1, mpi_numberNodes): if verbose: print('Starting first job distribute step') if listIterator < len(particleLists): if not checkDirExists(iterationDirectory + 'class' + str(listIterator) + '/'): mkdir(iterationDirectory + 'class' + str(listIterator) + '/') averageName = iterationDirectory + 'class' + str( listIterator) + '/' + averagePrefix + '-' + str( listIterator) + '.em' if not symmetry.isOneFold(): newPl = symmetry.apply(particleLists[listIterator]) job = ExpectationJob(newPl, averageName) else: job = ExpectationJob(particleLists[listIterator], averageName) newReference = Reference(averageName, particleLists[listIterator]) referenceList.append(newReference) jobMsg = ExpectationJobMsg(0, str(i)) jobMsg.setJob(job) pytom_mpi.send(str(jobMsg), i) if verbose: print(jobMsg) listIterator = listIterator + 1 finished = False #there are more jobs than nodes. continue distributing and collect results receivedMsgCounter = 0 while not finished: #listen and collect mpi_msgString = pytom_mpi.receive() if verbose: print(mpi_msgString) jobResultMsg = ExpectationResultMsg('', '') jobResultMsg.fromStr(mpi_msgString) receivedMsgCounter = receivedMsgCounter + 1 #send new job to free node if listIterator < len(particleLists): if not checkDirExists(iterationDirectory + 'class' + str(listIterator) + '/'): mkdir(iterationDirectory + 'class' + str(listIterator) + '/') averageName = iterationDirectory + 'class' + str( listIterator) + '/' + averagePrefix + '-' + str( listIterator) + '.em' job = ExpectationJob(particleLists[listIterator], averageName) newReference = Reference(averageName, particleLists[listIterator]) referenceList.append(newReference) jobMsg = ExpectationJobMsg(0, str(jobResultMsg.getSender())) jobMsg.setJob(job) pytom_mpi.send(str(jobMsg), i) if verbose: print(jobMsg) listIterator = listIterator + 1 finished = listIterator >= len( particleLists) and receivedMsgCounter == len(particleLists) return referenceList
def distributedCorrelationMatrix(job, verbose=False): """ distributedCorrelationMatrix: Performs calculation of correlation matrix either on multiple processes or sequentially. """ import pytom_mpi pytom_mpi.init() if pytom_mpi.size() > 1: mpi_myid = pytom_mpi.rank() if mpi_myid == 0: manager = CMManager(job) manager.distributeCalculation(mpi_myid, verbose) manager.parallelEnd() manager.saveMatrix() else: from pytom.parallel.clusterMessages import CorrelationVectorMessage, CorrelationVectorJobMessage from pytom.parallel.messages import StatusMessage, MessageError end = False while not end: mpi_msg = pytom_mpi.receive() if verbose: print(mpi_msg) try: msg = CorrelationVectorJobMessage() msg.fromStr(mpi_msg) worker = CMWorker(msg.getJob()) #worker.dumpMsg2Log('node'+str(mpi_myid)+'.log', msg.__str__()) resultVector = worker.run() resultMessage = CorrelationVectorMessage(mpi_myid, 0) resultMessage.setVector(resultVector) #worker.dumpMsg2Log('node'+mpi_myid.__str__()+'.log', resultMessage.__str__()) if verbose and False: print(resultMessage) pytom_mpi.send(resultMessage.__str__(), 0) except (MessageError, RuntimeError, IndexError): msg = StatusMessage('', '') msg.fromStr(mpi_msg) if msg.getStatus() == 'End': end = True print('Node ' + mpi_myid.__str__() + ' finished') else: print('Sequential Processing! Running on one machine only!') manager = CMManager(job) manager.calculateMatrix() manager.saveMatrix() pytom_mpi.finalise()