def sendLatencyMeasurements(coordinatorCopy): """ Send latency measurements to the Coordinator""" logging.debug("sendLatencyMeasurements() begins") me = socket.gethostname() conn = None try: conn = networking.getConnection(coordinatorCopy, LISTEN_PORT) numberOfEntries = len(latency) stringNumberOfEntries = '' if numberOfEntries < 10: stringNumberOfEntries = '0' + str(numberOfEntries) else: stringNumberOfEntries = str(numberOfEntries) networking.perfectSend('LTNC', conn) networking.perfectSend(stringNumberOfEntries, conn) for node, value in latency.iteritems(): measurement = "[" + me + ", " + node + ", " + str(value) + "]" networking.perfectSend(str(len(measurement)), conn) networking.perfectSend(measurement, conn) logging.debug("Printing latency dictionary:" + str(latency)) except Exception: exctype, value = sys.exc_info()[:2] logging.debug("General exception: " + str(exctype) + " Value: " + str(value)) logging.debug( "Failed in sending some or all latency measurements to " + coordinatorCopy) finally: if conn: networking.closeConnection(conn)
def sendLatencyMeasurements(coordinatorCopy): """ Send latency measurements to the Coordinator""" logging.debug("sendLatencyMeasurements() begins") me = socket.gethostname() conn = None try: conn = networking.getConnection(coordinatorCopy, LISTEN_PORT) numberOfEntries = len(latency) stringNumberOfEntries = '' if numberOfEntries < 10: stringNumberOfEntries = '0' + str(numberOfEntries) else: stringNumberOfEntries = str(numberOfEntries) networking.perfectSend('LTNC', conn) networking.perfectSend(stringNumberOfEntries, conn) for node, value in latency.iteritems(): measurement = "[" + me + ", " + node + ", " + str(value) + "]" networking.perfectSend(str(len(measurement)), conn) networking.perfectSend(measurement, conn) logging.debug("Printing latency dictionary:" + str(latency)) except Exception: exctype, value = sys.exc_info()[:2] logging.debug("General exception: " + str(exctype) + " Value: " + str(value)) logging.debug("Failed in sending some or all latency measurements to " + coordinatorCopy) finally: if conn: networking.closeConnection(conn)
def coordinatorLookup(): global Coordinator # Reference the global variable (Very Important) f = open(SEEDS, 'r') me = socket.gethostname() for x in range(1, 2): for node in f: node = node.strip() # Remove leading and trailing whitespace if node != me: logging.debug("Checking if " + node + " knows the coordinator") conn = None try: conn = networking.getConnection(node, LISTEN_PORT) networking.perfectSend('LKUP', conn) response = networking.perfectReceive( 4, conn) # response can be None as well if response == 'RPLY': nameLen = int(networking.perfectReceive(2, conn)) Coordinator = networking.perfectReceive(nameLen, conn) logging.debug("Received RPLY " + Coordinator + " in coordinatorLookup()") elif response == 'NONE': logging.debug(node + " doesn't know about the coordinator") except Exception: exctype, value = sys.exc_info()[:2] logging.debug("General exception: " + str(exctype) + " Value: " + str(value)) Coordinator = '' # If we got error, the node we were talking wasn't a good candidate for coordinator anyway finally: if conn: networking.closeConnection(conn) # print "Coordinator :" + Coordinator if Coordinator != '': logging.debug("Found Coordinator") break if Coordinator == '': # Retry to query for coordinator after some interval time.sleep(COORDINATOR_CHECK_INTERVAL) if Coordinator == '': Coordinator = me logging.info("coordinatorLookup returning [COORDINATOR]: " + Coordinator) return Coordinator
def distributeMembersFile(members): for node in members: node = node.strip() # Strip trailing \n if node == Coordinator: # Coordinator doesn't need to send the file to itself continue conn = None try: conn = networking.getConnection(node, LISTEN_PORT) # TODO: Can possibly update the list of members here as well networking.perfectSend('DWLD', conn) networking.perfectReceive(4, conn) except Exception: # Desperate hack as except socket.error was somehow not catching "Connection refused" exceptions exctype, value = sys.exc_info()[:2] logging.debug("General exception: " + str(exctype) + " Value: " + str(value)) logging.critical(node + " missed out on an updated copy of members file") finally: if conn: networking.closeConnection(conn)
def coordinatorLookup(): global Coordinator # Reference the global variable (Very Important) f = open(SEEDS, 'r') me = socket.gethostname() for x in range (1,2): for node in f: node = node.strip() # Remove leading and trailing whitespace if node != me: logging.debug("Checking if " + node + " knows the coordinator") conn = None try: conn = networking.getConnection(node, LISTEN_PORT) networking.perfectSend('LKUP', conn) response = networking.perfectReceive(4, conn) # response can be None as well if response == 'RPLY': nameLen = int(networking.perfectReceive(2, conn)) Coordinator = networking.perfectReceive(nameLen, conn) logging.debug("Received RPLY " + Coordinator + " in coordinatorLookup()") elif response == 'NONE': logging.debug(node + " doesn't know about the coordinator") except Exception: exctype, value = sys.exc_info()[:2] logging.debug("General exception: " + str(exctype) + " Value: " + str(value)) Coordinator = '' # If we got error, the node we were talking wasn't a good candidate for coordinator anyway finally: if conn: networking.closeConnection(conn) # print "Coordinator :" + Coordinator if Coordinator != '': logging.debug("Found Coordinator") break if Coordinator == '': # Retry to query for coordinator after some interval time.sleep(COORDINATOR_CHECK_INTERVAL) if Coordinator == '': Coordinator = me logging.info("coordinatorLookup returning [COORDINATOR]: " + Coordinator) return Coordinator
def join(Coordinator): """Called to join the overlay commanded by the Coordinator""" conn = None try: conn = networking.getConnection(Coordinator, LISTEN_PORT) networking.perfectSend('JOIN', conn) me = socket.gethostname() nameLen = str(len(me)) # FIXME: name length should be expressible in 2 digits (Node name of length 100 or 9 will screw things up!) networking.perfectSend(nameLen, conn) networking.perfectSend(me, conn) networking.closeConnection(conn) # TODO: Possibly use this connection to download updated members file except Exception: exctype, value = sys.exc_info()[:2] logging.debug("General exception: " + str(exctype) + " Value: " + str(value)) if conn: networking.closeConnection(conn) raise
def join(Coordinator): """Called to join the overlay commanded by the Coordinator""" conn = None try: conn = networking.getConnection(Coordinator, LISTEN_PORT) networking.perfectSend('JOIN', conn) me = socket.gethostname() nameLen = str( len(me) ) # FIXME: name length should be expressible in 2 digits (Node name of length 100 or 9 will screw things up!) networking.perfectSend(nameLen, conn) networking.perfectSend(me, conn) networking.closeConnection( conn ) # TODO: Possibly use this connection to download updated members file except Exception: exctype, value = sys.exc_info()[:2] logging.debug("General exception: " + str(exctype) + " Value: " + str(value)) if conn: networking.closeConnection(conn) raise
def main(argv): startAsCoordinator = False logLevel = logging.INFO # Default logging level f = open(MEMBERS, 'w') # Node creates an empty members file f.write(socket.gethostname() + '\n') f.close() try: opts, args = getopt.getopt(argv, "cmvh") except getopt.GetoptError: print 'Usage: python member.py -cmvh' sys.exit(2) for opt, arg in opts: if opt == '-c': startAsCoordinator = True elif opt == '-m': startAsCoordinator = False elif opt == '-v': logLevel = logging.DEBUG logging.basicConfig(filename="log.txt", level=logLevel, format='%(asctime)s %(levelname)s %(message)s') logging.info("\n========== NEW EXPERIMENT STARTS ==========\n\n") global Coordinator messageHandler, isCoordinator, Coordinator = bootstrap(startAsCoordinator) pingHandler = pingUtil(2, "Ping handler thread", isCoordinator) pingHandler.daemon = True pingHandler.start() do_exit = False interval = 0 while do_exit == False: try: if messageHandler.err != 0: do_exit = True senseLatency() interval = interval + 1 if interval == 1: if Coordinator != socket.gethostname() and Coordinator != '': sendLatencyMeasurements(Coordinator) interval = 0 time.sleep(5) except KeyboardInterrupt: print "Ctrl-C caught. Exit would terminate daemon threads..." do_exit = True # Stop running threads # messageHandler.stop() if isCoordinator == False and Coordinator != '': try: conn = networking.getConnection(Coordinator, LISTEN_PORT) networking.perfectSend('LEAV', conn) me = socket.gethostname() nameLen = len(me) networking.perfectSend(str(nameLen), conn) networking.perfectSend(me, conn) logging.debug("Sent LEAV message to " + Coordinator) networking.closeConnection(conn) except Exception: exctype, value = sys.exc_info()[:2] logging.debug("General exception: " + str(exctype) + " Value: " + str(value)) logging.debug("Failed to send LEAV message to " + Coordinator) sys.exit(0)
def run(self): global Coordinator global NotReceivedCoordinatorUpdate MSGTYPE_LEN = 4 msgType = '' try: msgType = networking.perfectReceive(MSGTYPE_LEN, self.clientSocket) except Exception: exctype, value = sys.exc_info()[:2] logging.debug("General exception: " + str(exctype) + " Value: " + str(value)) if self.clientSocket: networking.closeConnection(self.clientSocket) return # msgType is either JOIN, LEAV OR LOOK (Coordinator Lookup) # Following two characters denote length of nodename if msgType == 'JOIN' or msgType == 'LEAV': # FIXME: Only Coordinator should receive such message try: logging.info("Received " + msgType + " message") nameLen = int( networking.perfectReceive(NAMELEN_LEN, self.clientSocket) ) # NAMELEN_HDR is 2 characters. Example: 25 nodeName = networking.perfectReceive(nameLen, self.clientSocket) if msgType == 'JOIN': addMember(nodeName) memString = stringOfMembers() logging.info("\n[EVENT JOIN]: " + nodeName + "\n[MEMBERS]: " + memString) else: removeMembers(nodeName) memString = stringOfMembers() logging.info("\n[EVENT LEAVE]: " + nodeName + "\n[MEMBERS]: " + memString) except Exception: exctype, value = sys.exc_info()[:2] logging.debug("General exception: " + str(exctype) + " Value: " + str(value)) finally: if self.clientSocket: networking.closeConnection(self.clientSocket) elif msgType == 'LKUP': logging.debug("Received LKUP message") try: if Coordinator != '': networking.perfectSend('RPLY', self.clientSocket) networking.perfectSend(str(len(Coordinator)), self.clientSocket) networking.perfectSend(Coordinator, self.clientSocket) else: networking.perfectSend('NONE', self.clientSocket) except Exception: exctype, value = sys.exc_info()[:2] logging.debug("General exception: " + str(exctype) + " Value: " + str(value)) finally: if self.clientSocket: networking.closeConnection(self.clientSocket) elif msgType == 'DWLD': logging.debug("Received indication to download members.txt file") try: f = urllib2.urlopen( 'http://' + Coordinator + ':' + str(HTTP_PORT) + '/' + MEMBERS) # FIXME: Does this require closing? localMembersFile = open(MEMBERS, 'w') localMembersFile.write(f.read()) localMembersFile.close() memString = stringOfMembers() logging.info("\n[COORDINATOR]: " + Coordinator + "\n[UPDATE LOCAL MEMBERS FILE]: \n[MEMBERS]: " + memString) networking.perfectSend('DONE', self.clientSocket) except Exception: # Bare except!! exctype, value = sys.exc_info()[:2] logging.debug("General exception: " + str(exctype) + " Value: " + str(value)) logging.critical("Could not download members file from " + Coordinator) finally: if self.clientSocket: networking.closeConnection(self.clientSocket) elif msgType == 'NEWC': # New coordinator has announced its arrival try: nameLen = int( networking.perfectReceive(NAMELEN_LEN, self.clientSocket)) Coordinator = networking.perfectReceive( nameLen, self.clientSocket) networking.perfectSend('UPDC', self.clientSocket) NotReceivedCoordinatorUpdate = False # Breaks the loop of electCoordinator() function logging.info( "Received New Coordinator [NEWC] announcement from " + Coordinator) except Exception: exctype, value = sys.exc_info()[:2] logging.debug("General exception: " + str(exctype) + " Value: " + str(value)) logging.debug( "Failed to receive the new Coordinator [NEWC] announcement" ) Coordinator = '' NotReceivedCoordinatorUpdate = True finally: if self.clientSocket: networking.closeConnection(self.clientSocket) elif msgType == 'LTNC': logging.debug("Member sending latency measurements") try: numberOfEntries = int( networking.perfectReceive(2, self.clientSocket)) for i in range(numberOfEntries): measurementLen = int( networking.perfectReceive(2, self.clientSocket)) measurement = networking.perfectReceive( measurementLen, self.clientSocket) logging.debug(measurement) # Only logging for time being except Exception: exctype, value = sys.exc_info()[:2] logging.debug("General exception: " + str(exctype) + " Value: " + str(value)) logging.debug( "Did not receive all latency measurements from the member") finally: if self.clientSocket: networking.closeConnection(self.clientSocket) elif msgType == 'CHCK': # Message from a member checking if higher ranked nodes are still alive logging.debug("Received Check Alive [CHCK] message from a member") try: networking.perfectSend('LIVE', self.clientSocket) finally: networking.closeConnection(self.clientSocket)
def electCoordinator(): """Handles the node behavior when coordinator failure has been detected""" logging.debug("electCoordinator() begins") global Coordinator global NotReceivedCoordinatorUpdate activeMembers = listMembers() activeMembers = sorted(activeMembers) logging.debug(str(activeMembers)) me = socket.gethostname() while NotReceivedCoordinatorUpdate: logging.debug("Loop begins: " + str(activeMembers)) potentialActiveMembers = activeMembers # This node is the highest ranked node as known to this node. # Become the Coordinator and announce it to everyone else if me == activeMembers[0]: Coordinator = me logging.debug("Declared myself as the Coordinator: " + Coordinator) for node in potentialActiveMembers: node = node.strip() if node == Coordinator: continue else: conn = None try: conn = networking.getConnection(node, LISTEN_PORT) networking.perfectSend('NEWC', conn) networking.perfectSend(str(len(me)), conn) networking.perfectSend(me, conn) networking.perfectReceive(4, conn) except Exception: # Bare except!! exctype, value = sys.exc_info()[:2] logging.debug("General exception: " + str(exctype) + " Value: " + str(value)) activeMembers.remove(node) logging.debug("electCoordinator(): Removing " + node + " from the list of activeMembers") logging.debug(str(activeMembers)) finally: if conn: networking.closeConnection(conn) rewriteMembersFile(activeMembers) distributeMembersFile(activeMembers) break else: # Don't try to become a Coordinator until any of the higher ranked nodes is alive. # Until then, just probe each of the higher ranked nodes for node in potentialActiveMembers: node = node.strip() if node == me: break else: logging.debug("Sending LKUP to " + node) conn = None try: conn = networking.getConnection(node, LISTEN_PORT) networking.perfectSend( 'LKUP', conn) # Check if the node at other end is alive response = networking.perfectReceive(4, conn) if response == 'RPLY': nameLen = int(networking.perfectReceive(2, conn)) Coordinator = networking.perfectReceive( nameLen, conn) return # Can return safely. Found Coordinator elif response == 'NONE': pass except Exception: # FIXME: 'Bare' except is considered bad exctype, value = sys.exc_info()[:2] logging.debug("General exception: " + str(exctype) + " Value: " + str(value)) logging.debug("Before removing " + node + ":" + str(activeMembers)) activeMembers.remove(node) logging.debug("electCoordinator(): Removing " + node + " from the list of activeMembers") logging.debug(str(activeMembers)) finally: if conn: networking.closeConnection(conn) NotReceivedCoordinatorUpdate = True # For subsequent Coordinator Failures
def main(argv): startAsCoordinator = False logLevel = logging.INFO # Default logging level f = open(MEMBERS, 'w') # Node creates an empty members file f.write(socket.gethostname() + '\n') f.close() try: opts, args = getopt.getopt(argv, "cmvh") except getopt.GetoptError: print 'Usage: python member.py -cmvh' sys.exit(2) for opt, arg in opts: if opt == '-c': startAsCoordinator = True elif opt == '-m': startAsCoordinator = False elif opt == '-v': logLevel = logging.DEBUG logging.basicConfig(filename="log.txt", level = logLevel, format = '%(asctime)s %(levelname)s %(message)s') logging.info("\n========== NEW EXPERIMENT STARTS ==========\n\n") global Coordinator messageHandler, isCoordinator, Coordinator = bootstrap(startAsCoordinator) pingHandler = pingUtil(2, "Ping handler thread", isCoordinator) pingHandler.daemon = True pingHandler.start() do_exit = False interval = 0 while do_exit == False: try: if messageHandler.err != 0: do_exit = True senseLatency() interval = interval + 1 if interval == 1: if Coordinator != socket.gethostname() and Coordinator != '': sendLatencyMeasurements(Coordinator) interval = 0 time.sleep(5) except KeyboardInterrupt: print "Ctrl-C caught. Exit would terminate daemon threads..." do_exit = True # Stop running threads # messageHandler.stop() if isCoordinator == False and Coordinator != '': try: conn = networking.getConnection(Coordinator, LISTEN_PORT) networking.perfectSend('LEAV', conn) me = socket.gethostname() nameLen = len(me) networking.perfectSend(str(nameLen), conn) networking.perfectSend(me, conn) logging.debug("Sent LEAV message to " + Coordinator) networking.closeConnection(conn) except Exception: exctype, value = sys.exc_info()[:2] logging.debug("General exception: " + str(exctype) + " Value: " + str(value)) logging.debug("Failed to send LEAV message to " + Coordinator) sys.exit(0)
def run(self): global Coordinator global NotReceivedCoordinatorUpdate MSGTYPE_LEN = 4 msgType = '' try: msgType = networking.perfectReceive(MSGTYPE_LEN, self.clientSocket) except Exception: exctype, value = sys.exc_info()[:2] logging.debug("General exception: " + str(exctype) + " Value: " + str(value)) if self.clientSocket: networking.closeConnection(self.clientSocket) return # msgType is either JOIN, LEAV OR LOOK (Coordinator Lookup) # Following two characters denote length of nodename if msgType == 'JOIN' or msgType == 'LEAV': # FIXME: Only Coordinator should receive such message try: logging.info("Received " + msgType + " message") nameLen = int(networking.perfectReceive(NAMELEN_LEN, self.clientSocket)) # NAMELEN_HDR is 2 characters. Example: 25 nodeName = networking.perfectReceive(nameLen, self.clientSocket) if msgType == 'JOIN': addMember(nodeName) memString = stringOfMembers() logging.info("\n[EVENT JOIN]: " + nodeName + "\n[MEMBERS]: " + memString) else: removeMembers(nodeName) memString = stringOfMembers() logging.info("\n[EVENT LEAVE]: " + nodeName + "\n[MEMBERS]: " + memString) except Exception: exctype, value = sys.exc_info()[:2] logging.debug("General exception: " + str(exctype) + " Value: " + str(value)) finally: if self.clientSocket: networking.closeConnection(self.clientSocket) elif msgType == 'LKUP': logging.debug("Received LKUP message") try: if Coordinator != '': networking.perfectSend('RPLY', self.clientSocket) networking.perfectSend(str(len(Coordinator)), self.clientSocket) networking.perfectSend(Coordinator, self.clientSocket) else: networking.perfectSend('NONE', self.clientSocket) except Exception: exctype, value = sys.exc_info()[:2] logging.debug("General exception: " + str(exctype) + " Value: " + str(value)) finally: if self.clientSocket: networking.closeConnection(self.clientSocket) elif msgType == 'DWLD': logging.debug("Received indication to download members.txt file") try: f = urllib2.urlopen('http://' + Coordinator + ':' + str(HTTP_PORT) + '/' + MEMBERS) # FIXME: Does this require closing? localMembersFile = open(MEMBERS, 'w') localMembersFile.write(f.read()) localMembersFile.close() memString = stringOfMembers() logging.info("\n[COORDINATOR]: " + Coordinator + "\n[UPDATE LOCAL MEMBERS FILE]: \n[MEMBERS]: " + memString) networking.perfectSend('DONE', self.clientSocket) except Exception: # Bare except!! exctype, value = sys.exc_info()[:2] logging.debug("General exception: " + str(exctype) + " Value: " + str(value)) logging.critical("Could not download members file from " + Coordinator) finally: if self.clientSocket: networking.closeConnection(self.clientSocket) elif msgType == 'NEWC': # New coordinator has announced its arrival try: nameLen = int(networking.perfectReceive(NAMELEN_LEN, self.clientSocket)) Coordinator = networking.perfectReceive(nameLen, self.clientSocket) networking.perfectSend('UPDC', self.clientSocket) NotReceivedCoordinatorUpdate = False # Breaks the loop of electCoordinator() function logging.info("Received New Coordinator [NEWC] announcement from " + Coordinator) except Exception: exctype, value = sys.exc_info()[:2] logging.debug("General exception: " + str(exctype) + " Value: " + str(value)) logging.debug("Failed to receive the new Coordinator [NEWC] announcement") Coordinator = '' NotReceivedCoordinatorUpdate = True finally: if self.clientSocket: networking.closeConnection(self.clientSocket) elif msgType == 'LTNC': logging.debug("Member sending latency measurements") try: numberOfEntries = int(networking.perfectReceive(2, self.clientSocket)) for i in range(numberOfEntries): measurementLen = int(networking.perfectReceive(2, self.clientSocket)) measurement = networking.perfectReceive(measurementLen, self.clientSocket) logging.debug(measurement) # Only logging for time being except Exception: exctype, value = sys.exc_info()[:2] logging.debug("General exception: " + str(exctype) + " Value: " + str(value)) logging.debug("Did not receive all latency measurements from the member") finally: if self.clientSocket: networking.closeConnection(self.clientSocket) elif msgType == 'CHCK': # Message from a member checking if higher ranked nodes are still alive logging.debug("Received Check Alive [CHCK] message from a member") try: networking.perfectSend('LIVE', self.clientSocket) finally: networking.closeConnection(self.clientSocket)
def electCoordinator(): """Handles the node behavior when coordinator failure has been detected""" logging.debug("electCoordinator() begins") global Coordinator global NotReceivedCoordinatorUpdate activeMembers = listMembers() activeMembers = sorted(activeMembers) logging.debug(str(activeMembers)) me = socket.gethostname() while NotReceivedCoordinatorUpdate: logging.debug("Loop begins: " + str(activeMembers)) potentialActiveMembers = activeMembers # This node is the highest ranked node as known to this node. # Become the Coordinator and announce it to everyone else if me == activeMembers[0]: Coordinator = me logging.debug("Declared myself as the Coordinator: " + Coordinator) for node in potentialActiveMembers: node = node.strip() if node == Coordinator: continue else: conn = None try: conn = networking.getConnection(node, LISTEN_PORT) networking.perfectSend('NEWC', conn) networking.perfectSend(str(len(me)), conn) networking.perfectSend(me, conn) networking.perfectReceive(4, conn) except Exception: # Bare except!! exctype, value = sys.exc_info()[:2] logging.debug("General exception: " + str(exctype) + " Value: " + str(value)) activeMembers.remove(node) logging.debug("electCoordinator(): Removing " + node + " from the list of activeMembers") logging.debug(str(activeMembers)) finally: if conn: networking.closeConnection(conn) rewriteMembersFile(activeMembers) distributeMembersFile(activeMembers) break else: # Don't try to become a Coordinator until any of the higher ranked nodes is alive. # Until then, just probe each of the higher ranked nodes for node in potentialActiveMembers: node = node.strip() if node == me: break else: logging.debug("Sending LKUP to " + node) conn = None try: conn = networking.getConnection(node, LISTEN_PORT) networking.perfectSend('LKUP', conn) # Check if the node at other end is alive response = networking.perfectReceive(4, conn) if response == 'RPLY': nameLen = int(networking.perfectReceive(2, conn)) Coordinator = networking.perfectReceive(nameLen, conn) return # Can return safely. Found Coordinator elif response == 'NONE': pass except Exception: # FIXME: 'Bare' except is considered bad exctype, value = sys.exc_info()[:2] logging.debug("General exception: " + str(exctype) + " Value: " + str(value)) logging.debug("Before removing " + node + ":" + str(activeMembers)) activeMembers.remove(node) logging.debug("electCoordinator(): Removing " + node + " from the list of activeMembers") logging.debug(str(activeMembers)) finally: if conn: networking.closeConnection(conn) NotReceivedCoordinatorUpdate = True # For subsequent Coordinator Failures