def coordinatorLookup(): global Coordinator # Reference the global variable (Very Important) f = open(SEEDS, 'r') me = socket.gethostname() for x in range(1, 2): for node in f: node = node.strip() # Remove leading and trailing whitespace if node != me: logging.debug("Checking if " + node + " knows the coordinator") conn = None try: conn = networking.getConnection(node, LISTEN_PORT) networking.perfectSend('LKUP', conn) response = networking.perfectReceive( 4, conn) # response can be None as well if response == 'RPLY': nameLen = int(networking.perfectReceive(2, conn)) Coordinator = networking.perfectReceive(nameLen, conn) logging.debug("Received RPLY " + Coordinator + " in coordinatorLookup()") elif response == 'NONE': logging.debug(node + " doesn't know about the coordinator") except Exception: exctype, value = sys.exc_info()[:2] logging.debug("General exception: " + str(exctype) + " Value: " + str(value)) Coordinator = '' # If we got error, the node we were talking wasn't a good candidate for coordinator anyway finally: if conn: networking.closeConnection(conn) # print "Coordinator :" + Coordinator if Coordinator != '': logging.debug("Found Coordinator") break if Coordinator == '': # Retry to query for coordinator after some interval time.sleep(COORDINATOR_CHECK_INTERVAL) if Coordinator == '': Coordinator = me logging.info("coordinatorLookup returning [COORDINATOR]: " + Coordinator) return Coordinator
def distributeMembersFile(members): for node in members: node = node.strip() # Strip trailing \n if node == Coordinator: # Coordinator doesn't need to send the file to itself continue conn = None try: conn = networking.getConnection(node, LISTEN_PORT) # TODO: Can possibly update the list of members here as well networking.perfectSend('DWLD', conn) networking.perfectReceive(4, conn) except Exception: # Desperate hack as except socket.error was somehow not catching "Connection refused" exceptions exctype, value = sys.exc_info()[:2] logging.debug("General exception: " + str(exctype) + " Value: " + str(value)) logging.critical(node + " missed out on an updated copy of members file") finally: if conn: networking.closeConnection(conn)
def coordinatorLookup(): global Coordinator # Reference the global variable (Very Important) f = open(SEEDS, 'r') me = socket.gethostname() for x in range (1,2): for node in f: node = node.strip() # Remove leading and trailing whitespace if node != me: logging.debug("Checking if " + node + " knows the coordinator") conn = None try: conn = networking.getConnection(node, LISTEN_PORT) networking.perfectSend('LKUP', conn) response = networking.perfectReceive(4, conn) # response can be None as well if response == 'RPLY': nameLen = int(networking.perfectReceive(2, conn)) Coordinator = networking.perfectReceive(nameLen, conn) logging.debug("Received RPLY " + Coordinator + " in coordinatorLookup()") elif response == 'NONE': logging.debug(node + " doesn't know about the coordinator") except Exception: exctype, value = sys.exc_info()[:2] logging.debug("General exception: " + str(exctype) + " Value: " + str(value)) Coordinator = '' # If we got error, the node we were talking wasn't a good candidate for coordinator anyway finally: if conn: networking.closeConnection(conn) # print "Coordinator :" + Coordinator if Coordinator != '': logging.debug("Found Coordinator") break if Coordinator == '': # Retry to query for coordinator after some interval time.sleep(COORDINATOR_CHECK_INTERVAL) if Coordinator == '': Coordinator = me logging.info("coordinatorLookup returning [COORDINATOR]: " + Coordinator) return Coordinator
def run(self): global Coordinator global NotReceivedCoordinatorUpdate MSGTYPE_LEN = 4 msgType = '' try: msgType = networking.perfectReceive(MSGTYPE_LEN, self.clientSocket) except Exception: exctype, value = sys.exc_info()[:2] logging.debug("General exception: " + str(exctype) + " Value: " + str(value)) if self.clientSocket: networking.closeConnection(self.clientSocket) return # msgType is either JOIN, LEAV OR LOOK (Coordinator Lookup) # Following two characters denote length of nodename if msgType == 'JOIN' or msgType == 'LEAV': # FIXME: Only Coordinator should receive such message try: logging.info("Received " + msgType + " message") nameLen = int( networking.perfectReceive(NAMELEN_LEN, self.clientSocket) ) # NAMELEN_HDR is 2 characters. Example: 25 nodeName = networking.perfectReceive(nameLen, self.clientSocket) if msgType == 'JOIN': addMember(nodeName) memString = stringOfMembers() logging.info("\n[EVENT JOIN]: " + nodeName + "\n[MEMBERS]: " + memString) else: removeMembers(nodeName) memString = stringOfMembers() logging.info("\n[EVENT LEAVE]: " + nodeName + "\n[MEMBERS]: " + memString) except Exception: exctype, value = sys.exc_info()[:2] logging.debug("General exception: " + str(exctype) + " Value: " + str(value)) finally: if self.clientSocket: networking.closeConnection(self.clientSocket) elif msgType == 'LKUP': logging.debug("Received LKUP message") try: if Coordinator != '': networking.perfectSend('RPLY', self.clientSocket) networking.perfectSend(str(len(Coordinator)), self.clientSocket) networking.perfectSend(Coordinator, self.clientSocket) else: networking.perfectSend('NONE', self.clientSocket) except Exception: exctype, value = sys.exc_info()[:2] logging.debug("General exception: " + str(exctype) + " Value: " + str(value)) finally: if self.clientSocket: networking.closeConnection(self.clientSocket) elif msgType == 'DWLD': logging.debug("Received indication to download members.txt file") try: f = urllib2.urlopen( 'http://' + Coordinator + ':' + str(HTTP_PORT) + '/' + MEMBERS) # FIXME: Does this require closing? localMembersFile = open(MEMBERS, 'w') localMembersFile.write(f.read()) localMembersFile.close() memString = stringOfMembers() logging.info("\n[COORDINATOR]: " + Coordinator + "\n[UPDATE LOCAL MEMBERS FILE]: \n[MEMBERS]: " + memString) networking.perfectSend('DONE', self.clientSocket) except Exception: # Bare except!! exctype, value = sys.exc_info()[:2] logging.debug("General exception: " + str(exctype) + " Value: " + str(value)) logging.critical("Could not download members file from " + Coordinator) finally: if self.clientSocket: networking.closeConnection(self.clientSocket) elif msgType == 'NEWC': # New coordinator has announced its arrival try: nameLen = int( networking.perfectReceive(NAMELEN_LEN, self.clientSocket)) Coordinator = networking.perfectReceive( nameLen, self.clientSocket) networking.perfectSend('UPDC', self.clientSocket) NotReceivedCoordinatorUpdate = False # Breaks the loop of electCoordinator() function logging.info( "Received New Coordinator [NEWC] announcement from " + Coordinator) except Exception: exctype, value = sys.exc_info()[:2] logging.debug("General exception: " + str(exctype) + " Value: " + str(value)) logging.debug( "Failed to receive the new Coordinator [NEWC] announcement" ) Coordinator = '' NotReceivedCoordinatorUpdate = True finally: if self.clientSocket: networking.closeConnection(self.clientSocket) elif msgType == 'LTNC': logging.debug("Member sending latency measurements") try: numberOfEntries = int( networking.perfectReceive(2, self.clientSocket)) for i in range(numberOfEntries): measurementLen = int( networking.perfectReceive(2, self.clientSocket)) measurement = networking.perfectReceive( measurementLen, self.clientSocket) logging.debug(measurement) # Only logging for time being except Exception: exctype, value = sys.exc_info()[:2] logging.debug("General exception: " + str(exctype) + " Value: " + str(value)) logging.debug( "Did not receive all latency measurements from the member") finally: if self.clientSocket: networking.closeConnection(self.clientSocket) elif msgType == 'CHCK': # Message from a member checking if higher ranked nodes are still alive logging.debug("Received Check Alive [CHCK] message from a member") try: networking.perfectSend('LIVE', self.clientSocket) finally: networking.closeConnection(self.clientSocket)
def electCoordinator(): """Handles the node behavior when coordinator failure has been detected""" logging.debug("electCoordinator() begins") global Coordinator global NotReceivedCoordinatorUpdate activeMembers = listMembers() activeMembers = sorted(activeMembers) logging.debug(str(activeMembers)) me = socket.gethostname() while NotReceivedCoordinatorUpdate: logging.debug("Loop begins: " + str(activeMembers)) potentialActiveMembers = activeMembers # This node is the highest ranked node as known to this node. # Become the Coordinator and announce it to everyone else if me == activeMembers[0]: Coordinator = me logging.debug("Declared myself as the Coordinator: " + Coordinator) for node in potentialActiveMembers: node = node.strip() if node == Coordinator: continue else: conn = None try: conn = networking.getConnection(node, LISTEN_PORT) networking.perfectSend('NEWC', conn) networking.perfectSend(str(len(me)), conn) networking.perfectSend(me, conn) networking.perfectReceive(4, conn) except Exception: # Bare except!! exctype, value = sys.exc_info()[:2] logging.debug("General exception: " + str(exctype) + " Value: " + str(value)) activeMembers.remove(node) logging.debug("electCoordinator(): Removing " + node + " from the list of activeMembers") logging.debug(str(activeMembers)) finally: if conn: networking.closeConnection(conn) rewriteMembersFile(activeMembers) distributeMembersFile(activeMembers) break else: # Don't try to become a Coordinator until any of the higher ranked nodes is alive. # Until then, just probe each of the higher ranked nodes for node in potentialActiveMembers: node = node.strip() if node == me: break else: logging.debug("Sending LKUP to " + node) conn = None try: conn = networking.getConnection(node, LISTEN_PORT) networking.perfectSend( 'LKUP', conn) # Check if the node at other end is alive response = networking.perfectReceive(4, conn) if response == 'RPLY': nameLen = int(networking.perfectReceive(2, conn)) Coordinator = networking.perfectReceive( nameLen, conn) return # Can return safely. Found Coordinator elif response == 'NONE': pass except Exception: # FIXME: 'Bare' except is considered bad exctype, value = sys.exc_info()[:2] logging.debug("General exception: " + str(exctype) + " Value: " + str(value)) logging.debug("Before removing " + node + ":" + str(activeMembers)) activeMembers.remove(node) logging.debug("electCoordinator(): Removing " + node + " from the list of activeMembers") logging.debug(str(activeMembers)) finally: if conn: networking.closeConnection(conn) NotReceivedCoordinatorUpdate = True # For subsequent Coordinator Failures
def run(self): global Coordinator global NotReceivedCoordinatorUpdate MSGTYPE_LEN = 4 msgType = '' try: msgType = networking.perfectReceive(MSGTYPE_LEN, self.clientSocket) except Exception: exctype, value = sys.exc_info()[:2] logging.debug("General exception: " + str(exctype) + " Value: " + str(value)) if self.clientSocket: networking.closeConnection(self.clientSocket) return # msgType is either JOIN, LEAV OR LOOK (Coordinator Lookup) # Following two characters denote length of nodename if msgType == 'JOIN' or msgType == 'LEAV': # FIXME: Only Coordinator should receive such message try: logging.info("Received " + msgType + " message") nameLen = int(networking.perfectReceive(NAMELEN_LEN, self.clientSocket)) # NAMELEN_HDR is 2 characters. Example: 25 nodeName = networking.perfectReceive(nameLen, self.clientSocket) if msgType == 'JOIN': addMember(nodeName) memString = stringOfMembers() logging.info("\n[EVENT JOIN]: " + nodeName + "\n[MEMBERS]: " + memString) else: removeMembers(nodeName) memString = stringOfMembers() logging.info("\n[EVENT LEAVE]: " + nodeName + "\n[MEMBERS]: " + memString) except Exception: exctype, value = sys.exc_info()[:2] logging.debug("General exception: " + str(exctype) + " Value: " + str(value)) finally: if self.clientSocket: networking.closeConnection(self.clientSocket) elif msgType == 'LKUP': logging.debug("Received LKUP message") try: if Coordinator != '': networking.perfectSend('RPLY', self.clientSocket) networking.perfectSend(str(len(Coordinator)), self.clientSocket) networking.perfectSend(Coordinator, self.clientSocket) else: networking.perfectSend('NONE', self.clientSocket) except Exception: exctype, value = sys.exc_info()[:2] logging.debug("General exception: " + str(exctype) + " Value: " + str(value)) finally: if self.clientSocket: networking.closeConnection(self.clientSocket) elif msgType == 'DWLD': logging.debug("Received indication to download members.txt file") try: f = urllib2.urlopen('http://' + Coordinator + ':' + str(HTTP_PORT) + '/' + MEMBERS) # FIXME: Does this require closing? localMembersFile = open(MEMBERS, 'w') localMembersFile.write(f.read()) localMembersFile.close() memString = stringOfMembers() logging.info("\n[COORDINATOR]: " + Coordinator + "\n[UPDATE LOCAL MEMBERS FILE]: \n[MEMBERS]: " + memString) networking.perfectSend('DONE', self.clientSocket) except Exception: # Bare except!! exctype, value = sys.exc_info()[:2] logging.debug("General exception: " + str(exctype) + " Value: " + str(value)) logging.critical("Could not download members file from " + Coordinator) finally: if self.clientSocket: networking.closeConnection(self.clientSocket) elif msgType == 'NEWC': # New coordinator has announced its arrival try: nameLen = int(networking.perfectReceive(NAMELEN_LEN, self.clientSocket)) Coordinator = networking.perfectReceive(nameLen, self.clientSocket) networking.perfectSend('UPDC', self.clientSocket) NotReceivedCoordinatorUpdate = False # Breaks the loop of electCoordinator() function logging.info("Received New Coordinator [NEWC] announcement from " + Coordinator) except Exception: exctype, value = sys.exc_info()[:2] logging.debug("General exception: " + str(exctype) + " Value: " + str(value)) logging.debug("Failed to receive the new Coordinator [NEWC] announcement") Coordinator = '' NotReceivedCoordinatorUpdate = True finally: if self.clientSocket: networking.closeConnection(self.clientSocket) elif msgType == 'LTNC': logging.debug("Member sending latency measurements") try: numberOfEntries = int(networking.perfectReceive(2, self.clientSocket)) for i in range(numberOfEntries): measurementLen = int(networking.perfectReceive(2, self.clientSocket)) measurement = networking.perfectReceive(measurementLen, self.clientSocket) logging.debug(measurement) # Only logging for time being except Exception: exctype, value = sys.exc_info()[:2] logging.debug("General exception: " + str(exctype) + " Value: " + str(value)) logging.debug("Did not receive all latency measurements from the member") finally: if self.clientSocket: networking.closeConnection(self.clientSocket) elif msgType == 'CHCK': # Message from a member checking if higher ranked nodes are still alive logging.debug("Received Check Alive [CHCK] message from a member") try: networking.perfectSend('LIVE', self.clientSocket) finally: networking.closeConnection(self.clientSocket)
def electCoordinator(): """Handles the node behavior when coordinator failure has been detected""" logging.debug("electCoordinator() begins") global Coordinator global NotReceivedCoordinatorUpdate activeMembers = listMembers() activeMembers = sorted(activeMembers) logging.debug(str(activeMembers)) me = socket.gethostname() while NotReceivedCoordinatorUpdate: logging.debug("Loop begins: " + str(activeMembers)) potentialActiveMembers = activeMembers # This node is the highest ranked node as known to this node. # Become the Coordinator and announce it to everyone else if me == activeMembers[0]: Coordinator = me logging.debug("Declared myself as the Coordinator: " + Coordinator) for node in potentialActiveMembers: node = node.strip() if node == Coordinator: continue else: conn = None try: conn = networking.getConnection(node, LISTEN_PORT) networking.perfectSend('NEWC', conn) networking.perfectSend(str(len(me)), conn) networking.perfectSend(me, conn) networking.perfectReceive(4, conn) except Exception: # Bare except!! exctype, value = sys.exc_info()[:2] logging.debug("General exception: " + str(exctype) + " Value: " + str(value)) activeMembers.remove(node) logging.debug("electCoordinator(): Removing " + node + " from the list of activeMembers") logging.debug(str(activeMembers)) finally: if conn: networking.closeConnection(conn) rewriteMembersFile(activeMembers) distributeMembersFile(activeMembers) break else: # Don't try to become a Coordinator until any of the higher ranked nodes is alive. # Until then, just probe each of the higher ranked nodes for node in potentialActiveMembers: node = node.strip() if node == me: break else: logging.debug("Sending LKUP to " + node) conn = None try: conn = networking.getConnection(node, LISTEN_PORT) networking.perfectSend('LKUP', conn) # Check if the node at other end is alive response = networking.perfectReceive(4, conn) if response == 'RPLY': nameLen = int(networking.perfectReceive(2, conn)) Coordinator = networking.perfectReceive(nameLen, conn) return # Can return safely. Found Coordinator elif response == 'NONE': pass except Exception: # FIXME: 'Bare' except is considered bad exctype, value = sys.exc_info()[:2] logging.debug("General exception: " + str(exctype) + " Value: " + str(value)) logging.debug("Before removing " + node + ":" + str(activeMembers)) activeMembers.remove(node) logging.debug("electCoordinator(): Removing " + node + " from the list of activeMembers") logging.debug(str(activeMembers)) finally: if conn: networking.closeConnection(conn) NotReceivedCoordinatorUpdate = True # For subsequent Coordinator Failures