def process_compute_mode(self, numtasks): # Send superstep self.surveyor.send("%d" % (self.superstep)) halt = True respondents = 0 try: while True: msg = remap_utils.decode(self.surveyor.recv()) if msg != "H": halt = False respondents = respondents + 1 if respondents == numtasks: # all replied logger.info("All respondents replied") self.mode = MODE_PROCESS break except nn.NanoMsgAPIError as nme: logger.error("No vertex nodes connected") print(nme) self.mode = MODE_RECOVERY return True if halt: self.mode = MODE_HALT else: self.superstep = self.superstep + 1 return True
def process_compute_mode( self, numtasks ): # Send superstep self.surveyor.send( "%d"%(self.superstep) ) halt = True respondents = 0 try: while( True ): msg = remap_utils.decode( self.surveyor.recv() ) if msg != "H": halt = False respondents = respondents + 1 if respondents == numtasks: # all replied logger.info("All respondents replied") self.mode = MODE_PROCESS break except nn.NanoMsgAPIError as nme: logger.error("No vertex nodes connected") print(nme) self.mode = MODE_RECOVERY return True if halt: self.mode = MODE_HALT else: self.superstep = self.superstep + 1 return True
def process_halt_mode(self, numtasks): self.surveyor.send("H") respondents = 0 try: while True: msg = remap_utils.decode(self.surveyor.recv()) respondents = respondents + 1 if respondents == numtasks: logger.info("All respondents replied") break except nn.NanoMsgAPIError as nme: print(nme) return False
def process_halt_mode( self, numtasks ): self.surveyor.send( "H" ) respondents = 0 try: while( True ): msg = remap_utils.decode( self.surveyor.recv() ) respondents = respondents + 1 if respondents == numtasks: logger.info("All respondents replied") break except nn.NanoMsgAPIError as nme: print(nme) return False
def process_process_mode(self, numtasks): self.surveyor.send("P") respondents = 0 try: while True: msg = remap_utils.decode(self.surveyor.recv()) respondents = respondents + 1 if respondents == numtasks: logger.info("All respondents replied") self.mode = MODE_SHIFT break except nn.NanoMsgAPIError as nme: logger.error("No vertex nodes replied") print(nme) self.mode = MODE_RECOVERY return True
def process_shift_mode( self, numtasks ): self.surveyor.send( "S" ) respondents = 0 try: while( True ): msg = remap_utils.decode( self.surveyor.recv() ) respondents = respondents + 1 if respondents == numtasks: logger.info("All respondents replied") self.mode = MODE_COMPUTE break except nn.NanoMsgAPIError as nme: logger.error("No vertex nodes replied") print(nme) self.mode = MODE_RECOVERY return True
def work( self ): if not self.ready: return True surveyormsg = None try: surveyormsg = remap_utils.decode( self.surveyor.recv() ) except nn.NanoMsgAPIError as e: return True if surveyormsg[0] == 'S': # Shift messages if self.mode != MODE_MSGS: self.mode = MODE_MSGS # We haven't done this in a previous step. Due to recovery, it might be # used by the initiator to get others up to speed. for key, (vertex,messages,messagesNext) in self.vertices.items(): self.vertices[ key ] = ( vertex, messagesNext, [] ) self.surveyor.send( "D" ) return True if surveyormsg[0] == 'H': self.mode = MODE_HALT logger.info("Halting core.") self.surveyor.close() return False if surveyormsg[0] == 'P': if self.mode != MODE_PROCESS: # First time in this state, we need to grab all messages and # allocate them to vertex queue self.mode = MODE_PROCESS logger.info("Processing messages 1") while True: try: msg = self.vsub.recv() prefix, data = remap_utils.unpack_msg( msg ) if prefix in self.vertices: # This vertex is indeed on this host. Add the message to its new msg list for next iteration vertex, messages, messagesNext = self.vertices[ prefix ] messagesNext.append( data ) except nn.NanoMsgAPIError as e: logger.error( "No more messages available." ) break else: logger.info("Processing messages 2") # doing things twice does not make a difference. Second time around, just throw away all messages while True: try: msg = self.vsub.recv() print("Received and thrown away: ", msg) except nn.NanoMsgAPIError as e: logger.error( "No more messages available." ) break self.surveyor.send( "D" ) return True self.mode = MODE_RUN self.superstep = int(surveyormsg) mainHalt = True for key, (vertex,messages,messagesNext) in self.vertices.items(): vertex, halt = self.app.compute( self.forward, self.subscribe, self.unsubscribe, self.superstep, vertex, messages ) if vertex != None: # Store the new vertex object in its place, maintaining the messagesNext list as we know it self.vertices[ key ] = (vertex,[],messagesNext) if not halt: mainHalt = False if mainHalt: self.surveyor.send( "H" ) else: self.surveyor.send( "D" ) return True