def get_viewleader_info(viewleader_sock): args_dict = {'cmd': 'query_servers'} common_functions.send_msg(viewleader_sock, args_dict, True) viewleader_response = common_functions.recv_msg(viewleader_sock, True) epoch = viewleader_response['Current epoch'] active_servers = viewleader_response['Active servers'] return (active_servers, epoch)
def accept_and_handle_messages(self, bound_socket, src_port, view_leader_ip): # Accept connections forever while True: try: sock, (addr, accepted_port) = bound_socket.accept( ) # Returns the socket, address and port of the connection if (accepted_port is not None): # checks if there is an accepted_port print("Accepting connection from host " + addr) # receives decoded message length and message from client; if can't throw's an error try: recvd_msg = common_functions.recv_msg(sock) except ConnectionResetError: print("Connection dropped.") except AttributeError: print("Cannot decode message.") response_dict = self.process_msg_from_client(recvd_msg) # sends encoded message length and message to client; if can't throw's an error try: common_functions.send_msg(sock, response_dict) except: print("Can't send over whole message.") sock.close() except socket.timeout: try: print('Sending heartbeat msg to viewleader...') sock = common_functions.create_connection( 'localhost', 39000, 39010, 1, False) try: common_functions.send_msg( sock, { 'cmd': 'heartbeat', 'args': [ str(self.unique_id), src_port, self.view_leader_ip ] }) except: print("Can't send over whole message.") sock.close() try: recvd_msg = common_functions.recv_msg(sock) except ConnectionResetError: print("Connection dropped.") except AttributeError: print("Cannot decode message.") print('Receiving response...') if (recvd_msg is not None): print(str(recvd_msg)) sock.close() except Exception as e: print( 'Heartbeat rejected, will try again in 10 seconds...') continue
def send_and_recv_heartbeat(self, src_port): # print ('Sending heartbeat msg to viewleader...') try: sock = common_functions.create_connection(self.view_leader_ip, 39000, 39010, 1, False) except Exception as e: print("Couldn't establish a connection with viewleader: ", e) common_functions.send_msg(sock, { 'cmd': 'heartbeat', 'args': [str(self.unique_id), src_port] }, False) recvd_msg = common_functions.recv_msg(sock, False) sock.close() if (recvd_msg is not None): status = recvd_msg[0] try: sock = common_functions.create_connection(self.view_leader_ip, 39000, 39010, 1, False) except Exception as e: print("Couldn't establish a connection with viewleader: ", e) common_functions.send_msg(sock, {'cmd': 'update_view'}, False) curr_epoch = common_functions.recv_msg(sock, False)['Current Epoch'] # print ("Updating our epoch to {}...".format(curr_epoch)) self.epoch = curr_epoch sock.close()
def start(self): args = self.parse_cmd_arguments() # if the optional argument "--server" is used, # then set localhost as this computer's IP. else, return error and exit. if (args.server is not None): if (args.cmd is None): print ("RPC command not provided.") sys.exit() viewleader_args = ['query_servers', 'lock_get', 'lock_release'] self.timeout = 1 # sets destination port ranges and destination hosts based on the RPC functions called if (args.cmd in viewleader_args): self.dest_host = str(args.viewleader) self.dest_port_low = 39000 self.dest_port_high = 39010 self.dest_name = 'viewleader' elif (args.cmd == 'getr' or args.cmd == 'setr'): self.dest_host = str(args.viewleader) self.dest_port_low = 39000 self.dest_port_high = 39010 self.dest_name = 'viewleader_and_server' else: self.dest_host = str(args.server) self.dest_port_low = 38000 self.dest_port_high = 38010 self.dest_name = 'server' args_dict = self.create_dict(args) stop = False sock = None if (self.dest_name == 'viewleader_and_server'): stop = True if (args.cmd == 'getr'): print (client_rpc.getr(args.key, self.dest_host, self.dest_port_low, self.dest_port_high, self.timeout)) else: print (client_rpc.setr(args.key, args.val, self.dest_host, self.dest_port_low, self.dest_port_high, self.timeout)) while (stop == False): sock = common_functions.create_connection(self.dest_host, self.dest_port_low, self.dest_port_high, self.timeout, True) # sends encoded message length and message to server/viewleader; if can't throw's an error common_functions.send_msg(sock, args_dict, True) recvd_msg = common_functions.recv_msg(sock, True) if (recvd_msg == "{'status': 'retry'}"): print (str(recvd_msg)) time.sleep(5) # delays for 5 seconds and then tries again else: print (str(recvd_msg)) stop = True if (sock is not None): sock.close() sys.exit()
def run_consensus_alg(self, sock, cmd, args): proposal_num = len(self.log) # proposal number is equivalent to length of log msg = {'cmd': 'prepare', 'proposal_num': proposal_num} # prepare message; Phase 1 print ("Broadcasting prepare message to replicas...") responses = self.broadcast(msg, self.view_leader_list) if (self.has_quorum(responses)): for response in responses: status = None num_logs_replica_needs = None logs_leader_is_missing = None try: replica_addr = response['addr'] replica_port = response['port'] status = response['status'] if (status == 'ok'): status = 'accept' # accept message; Phase 2 else: status = 'reject' except Exception: continue try: num_logs_replica_needs = response['num_logs_replica_needs'] except Exception: pass try: logs_leader_is_missing = response['logs_leader_is_missing'] except Exception: pass # if there are logs that the replicas need, sends it to them. Also # sends updated proposal numbers and the requested command if (num_logs_replica_needs): print ("num_logs_replica_needs: {}".format(num_logs_replica_needs)) logs_to_replay = self.log[len(self.log) - num_logs_replica_needs:] print ("logs_to_replay: {}".format(len(logs_to_replay))) msg = {'cmd' : status, 'logs_replica_needs' : logs_to_replay, 'new_proposal_num': len(self.log), 'new_cmd': cmd, 'args': args} elif (logs_leader_is_missing): msg = {'cmd' : status, 'new_proposal_num': len(self.log), 'new_cmd': cmd, 'args': args} elif (status): msg = {'cmd' : status, 'new_proposal_num': len(self.log), 'new_cmd': cmd, 'args': args} if (status == 'accept'): sock = common_functions.create_connection(replica_addr, replica_port, replica_port, 1, False) common_functions.send_msg(sock, msg, False) if (sock): sock.close() return True else: return False
def rebalance(self, old_view, new_view, epoch_op): for ((addr, port), server_id) in new_view: try: server_sock = common_functions.create_connection( addr, port, port, None, False) except Exception as e: print("Couldn't establish a connection with replica: ", e) common_functions.send_msg( server_sock, { 'cmd': 'rebalance', 'old_view': old_view, 'new_view': new_view, 'op': epoch_op }, False) server_sock.close()
def broadcast(self, msg, replicas): responses = [] leader_hostname = self.leader[0] leader_port = str(self.leader[1]) for replica in replicas: addr, port = replica # checks to see if the replica has the same addr/port as the leader; if so, # don't broadcast to it if ((addr, port) != (leader_hostname, leader_port)): sock = common_functions.create_connection(addr, port, port, 1, False) if (sock): common_functions.send_msg(sock, msg, False) recvd_msg = common_functions.recv_msg(sock, False) if (recvd_msg): responses.append(recvd_msg) sock.close() return responses
def broadcast(replicas, object_to_send, epoch, timeout): response_key = None rpc_command = object_to_send['cmd'] abort = False votes = [] for ((addr, port), server_id) in replicas: if (rpc_command == 'request_vote'): object_to_send['epoch'] = epoch object_to_send['server_id'] = str(server_id) try: server_sock = common_functions.create_connection( addr, port, port, timeout, False) common_functions.send_msg(server_sock, object_to_send, False) if (rpc_command == 'request_vote'): vote = common_functions.recv_msg(server_sock, False) print("Accepting vote from " + addr) if (vote == 'abort'): abort = True return {'cmd': 'abort'} if (rpc_command == 'getr'): response_key = common_functions.recv_msg( server_sock, False ) # desired value associated with the given key from DHT if (response_key is not None): return response_key elif (rpc_command == 'setr'): response_key = common_functions.recv_msg(server_sock, False) server_sock.close() except socket.timeout: if (rpc_command == 'request_vote'): abort = True return {'cmd': 'abort'} if (sock is None): print( "Couldn't connect to current replica server...will continue on remaining replicas: " ) if (rpc_command == 'request_vote'): return {'cmd': 'commit'} if (response_key is not None): result = "No key found in any of the replica servers." response_key = {'status': 'fail', 'result': result} return response_key
def accept_and_handle_messages(self, bound_socket, src_port): # Accept connections forever while True: # sends an heartbeat after 10 seconds, if socket doesn't timeout if (time.time() - self.last_heartbeat_time >= 10.0): try: self.last_heartbeat_time = time.time() self.send_and_recv_heartbeat(src_port) except Exception as e: print( 'Heartbeat rejected, will try again in 10 seconds...') try: sock, (addr, accepted_port) = bound_socket.accept( ) # Returns the socket, address and port of the connection if (accepted_port is not None): # checks if there is an accepted_port # print ("Accepting connection from host " + addr) recvd_msg = common_functions.recv_msg(sock, False) response = self.process_msg_from_client(recvd_msg) common_functions.send_msg(sock, response, False) # print ("Finished sending message ({}) from server to dest...".format(response)) if (time.time() - self.last_heartbeat_time >= 10.0): # print ("Sending RPC Message and a RPC heartbeat...") try: self.last_heartbeat_time = time.time() self.send_and_recv_heartbeat(src_port) except Exception as e: print('RPC Heartbeat rejected...') except socket.timeout: if (time.time() - self.last_heartbeat_time >= 10.0): try: self.last_heartbeat_time = time.time() self.send_and_recv_heartbeat(src_port) except Exception as e: print( 'Heartbeat rejected, will try again in 10 seconds...' ) continue
def send_and_recv_heartbeat(self): try: sock = common_functions.contact_leader(self.view_leader_list) common_functions.send_msg( sock, { 'cmd': 'heartbeat', 'args': [str(self.unique_id), socket.gethostname(), self.src_port] }, False) recvd_msg = common_functions.recv_msg(sock, False) status = recvd_msg['status'] curr_epoch = recvd_msg['Current Epoch'] if (status == 'not ok'): raise Exception sock.close() self.epoch = curr_epoch # updates epoch except Exception as e: print('Heartbeat rejected, will try again in 10 seconds...{}', e) finally: if (sock): sock.close()
def accept_and_handle_messages(self, bound_socket): # Accept connections forever while True: # sends an heartbeat after 10 seconds, if socket doesn't timeout if (time.time() - self.last_heartbeat_time >= 10.0): self.last_heartbeat_time = time.time() self.send_and_recv_heartbeat() try: sock, (addr, accepted_port) = bound_socket.accept( ) # Returns the socket, address and port of the connection if (accepted_port is not None): # checks if there is an accepted_port recvd_msg = common_functions.recv_msg(sock, False) response = self.process_msg_from_client(recvd_msg) common_functions.send_msg(sock, response, False) if (time.time() - self.last_heartbeat_time >= 10.0): self.last_heartbeat_time = time.time() self.send_and_recv_heartbeat() except socket.timeout: if (time.time() - self.last_heartbeat_time >= 10.0): self.last_heartbeat_time = time.time() self.send_and_recv_heartbeat() continue
def heartbeat(new_id, port, addr, sock): # tuple that we set the corresponding server addr:port to if the heartbeat is accepted heartbeats_value = (time.time(), 'working', new_id) if ((addr, port) in heartbeats): last_timestamp, status, current_id = heartbeats[(addr, port)] if (new_id == current_id): if (status == 'working'): print ("Accepting heartbeat from host: " + addr + ":" + str(port)) common_functions.send_msg(sock, "Heartbeat was accepted.") heartbeats[(addr, port)] = heartbeats_value else: print ("Rejecting heartbeat from host: " + addr + ":" + str(port) + " because server failed.") common_functions.send_msg(sock, "Heartbeat was rejected.") else: print ("Accepting heartbeat from host: " + addr + ":" + str(port)) common_functions.send_msg(sock, "Heartbeat was accepted.") heartbeats[(addr, port)] = heartbeats_value else: print ("Accepting heartbeat from host: " + addr + ":" + str(port)) common_functions.send_msg(sock, "Heartbeat was accepted.") heartbeats[(addr, port)] = heartbeats_value
def get_replica_buckets(viewleader_sock, args_dict): common_functions.send_msg(viewleader_sock, args_dict, True) # list of (server_hash, (addr, port)) for all replica servers associated with the given key replica_buckets = common_functions.recv_msg(viewleader_sock, True) return replica_buckets
def process_msg(self, recvd_msg, addr, sock): function_from_cmd = recvd_msg["cmd"] # takes function arguments from received dict if (function_from_cmd == 'query_servers'): common_functions.send_msg(sock, viewleader_rpc.query_servers(self.log), False) elif (function_from_cmd == 'heartbeat'): new_id = recvd_msg["args"][0] src_addr = recvd_msg["args"][1] src_port = recvd_msg["args"][2] timestamp = time.time() args = [new_id, src_addr, src_port, timestamp] # Runs consensus algorithm which checks to see if there is a quorum between viewleaders # before applying the given command. if (self.run_consensus_alg(sock, function_from_cmd, args)): # leader applies given command and updates view is_accepted = viewleader_rpc.heartbeat(new_id, src_port, src_addr, sock, timestamp) if (not(is_accepted)): timestamp = 0 # adds the applied command to log; rejected heartbeats have timestamp = 0 and accepted ones have their inital # received timestamp self.log.append({'cmd': 'heartbeat', 'id': new_id, 'addr': src_addr, 'port': src_port, 'timestamp': timestamp}.copy()) self.update_view() curr_epoch = viewleader_rpc.query_servers(self.log)['Current epoch'] common_functions.send_msg(sock, {'status': 'ok', 'Current Epoch': curr_epoch}, False) else: curr_epoch = viewleader_rpc.query_servers(self.log)['Current epoch'] common_functions.send_msg(sock, {'status': 'not ok', 'Current Epoch': curr_epoch}, False) elif (function_from_cmd == 'lock_get'): lock_name = recvd_msg["lock_name"] requester_id = recvd_msg["requester_id"] args = [lock_name, requester_id] print ("Running consensus algorithm...") if (self.run_consensus_alg(sock, function_from_cmd, args)): self.log.append({'cmd': 'lock_get', 'lock': lock_name, 'requester': requester_id}.copy()) is_got = viewleader_rpc.lock_get(lock_name, requester_id) # leader adds the applied command to log if the lock was successfully obtained if (is_got): common_functions.send_msg(sock, {'status': 'granted'}, False) else: common_functions.send_msg(sock, {'status': 'retry'}, False) print ("Sending retry message to client.") else: common_functions.send_msg(sock, {'status': 'not ok'}, False) elif (function_from_cmd == 'lock_release'): lock_name = recvd_msg["lock_name"] requester_id = recvd_msg["requester_id"] args = [lock_name, requester_id] print ("Running consensus algorithm...") if (self.run_consensus_alg(sock, function_from_cmd, args)): self.log.append({'cmd': 'lock_release', 'lock': lock_name, 'requester': requester_id}.copy()) is_released = viewleader_rpc.lock_release(lock_name, requester_id) # leader adds the applied command to log if the the lock was successfully released if (is_released): common_functions.send_msg(sock, {'status': 'ok'}, False) else: common_functions.send_msg(sock, {'status': 'not ok'}, False) else: common_functions.send_msg(sock, {'status': 'not ok'}, False) elif (function_from_cmd == 'rebalance'): msg = recvd_msg['msg'] print (msg) elif (function_from_cmd == 'prepare'): # replica sees the prepare message from the leader proposal_num = recvd_msg['proposal_num'] length_of_log = len(self.log) # sets the last seen proposal number equal to the # received proposal number if it is greater than # the last value that it held. global last_seen_proposal_num if (last_seen_proposal_num < proposal_num): last_seen_proposal_num = proposal_num # Promise message Phase 1: # Determines whether to send a promise message back to # the leader or not. # # Cases: # 1. If the received proposal number is equal to the length of the current log, then # it does. # 2. If the proposal num is less than the length of the current log, then this replica # send a promise message with the missing logs that the leader is missing, back to the leader so it can update itself. # 3. If the proposal num is greater, then it sends a promise message back to the leader with the num of logs that it needs from the leader # to update itself. if (proposal_num == length_of_log): common_functions.send_msg(sock, {'status': 'ok', 'addr': self.hostname, 'port': self.port}, False) elif (proposal_num < length_of_log): logs_for_leader = self.log[proposal_num:length_of_log] common_functions.send_msg(sock, {'status': 'ok', 'logs_leader_is_missing': logs_for_leader, 'addr': self.hostname, 'port': self.port}, False) else: common_functions.send_msg(sock, {'status': 'ok', 'num_logs_replica_needs': proposal_num - length_of_log, 'addr': self.hostname, 'port': self.port}, False) elif (function_from_cmd == 'accept'): # replica sees the accept message from the leader new_proposal_num = recvd_msg['new_proposal_num'] new_cmd = recvd_msg['new_cmd'] args = recvd_msg['args'] try: logs_for_replica = recvd_msg['logs_replica_needs'] # determines if the replica can accept this accept message; it can if it has # not seen a higher proposal number since it last sent a promise message to this proposer/leader. # i.e. the new proposal num is greater or equal to the last seen proposal number. if (last_seen_proposal_num <= new_proposal_num): # replays the logs received from leader to catch back up self.replay(logs_for_replica) if (len(self.log) != 0): print ("Updated log: {}".format(self.log)) pass except Exception: print ("No logs missing.") # replica applies the given command to its log and updates view if (new_cmd == 'heartbeat'): new_id = args[0] src_addr = args[1] src_port = args[2] timestamp = args[3] else: lock_name = args[0] requester_id = args[1] if (new_cmd == 'heartbeat'): is_accepted = viewleader_rpc.heartbeat(new_id, src_port, src_addr, sock, timestamp) if (not(is_accepted)): timestamp = 0 self.log.append({'cmd': 'heartbeat', 'id': new_id, 'addr': src_addr, 'port': src_port, 'timestamp': timestamp}.copy()) self.update_view() elif (new_cmd == 'lock_get'): self.log.append({'cmd': 'lock_get', 'lock': lock_name, 'requester': requester_id}.copy()) is_got = viewleader_rpc.lock_get(lock_name, requester_id) if (is_got): common_functions.send_msg(sock, {'status': 'granted'}, False) else: common_functions.send_msg(sock, {'status': 'retry'}, False) print ("Sending retry message to client.") elif (new_cmd == 'lock_release'): self.log.append({'cmd': 'lock_release', 'lock': lock_name, 'requester': requester_id}.copy()) is_released = viewleader_rpc.lock_release(lock_name, requester_id) if (is_released): common_functions.send_msg(sock, {'status': 'ok'}, False) else: common_functions.send_msg(sock, {'status': 'not ok'}, False) else: print ("Rejecting RPC request because function is unknown.")
def rebalance(self, old_view, new_view, epoch_op): for ((addr, port), server_id) in new_view: server_sock = common_functions.create_connection(addr, port, port, None, False) common_functions.send_msg(server_sock, {'cmd': 'rebalance', 'old_view': old_view, 'new_view': new_view, 'op': epoch_op}, False) server_sock.close()
def start(self): args = self.parse_cmd_arguments() # if the optional argument "--server" is used, # then set localhost as this computer's IP. else, return error and exit. if (args.server is not None): if (args.cmd is None): print("RPC command not provided.") sys.exit() # sets destination port ranges and destination hosts based on the RPC functions called if (args.cmd == 'query_servers') or (args.cmd == 'lock_get') or ( args.cmd == 'lock_release'): dest_host = str(args.viewleader) dest_port_low = 39000 dest_port_high = 39010 timeout = 1 else: dest_host = str(args.server) dest_port_low = 38000 dest_port_high = 38010 timeout = 1 args_dict = self.create_dict(args) stop = False sock = None while (stop == False): sock = common_functions.create_connection(dest_host, dest_port_low, dest_port_high, timeout, True) try: print("Sending RPC msg to viewleader...") # sends encoded message length and message to server/viewleader; if can't throw's an error common_functions.send_msg(sock, args_dict) # receives decoded message length and message from server/viewleader; if can't throw's an error try: recvd_msg = common_functions.recv_msg(sock) if (recvd_msg == "{'status': 'retry'}"): print(str(recvd_msg)) time.sleep( 5) # delays for 5 seconds and then tries again else: print(str(recvd_msg)) stop = True except ConnectionResetError: print("Connection dropped.") sys.exit() except AttributeError: print("Cannot decode message.") if (sock is not None): sock.close() sys.exit() except Exception as e: print("Failed send over whole message.", e) if (sock is not None): sock.close() sys.exit() if (sock is not None): sock.close() sys.exit()
def process_msg(self, recvd_msg, addr, sock): function_from_cmd = recvd_msg[ "cmd"] # takes function arguments from received dict if (function_from_cmd == 'query_servers'): common_functions.send_msg(sock, viewleader_rpc.query_servers(self.epoch), False) elif (function_from_cmd == 'heartbeat'): new_id = recvd_msg["args"][0] port = recvd_msg["args"][1] # src port viewleader_rpc.heartbeat(new_id, port, addr, sock) elif (function_from_cmd == 'lock_get'): lock_name = recvd_msg["lock_name"] requester_id = recvd_msg["requester_id"] if (viewleader_rpc.lock_get(lock_name, requester_id) == True): common_functions.send_msg(sock, "{'status': 'granted'}", False) else: common_functions.send_msg(sock, "{'status': 'retry'}", False) print("Sending retry message to client.") elif (function_from_cmd == 'lock_release'): lock_name = recvd_msg["lock_name"] requester_id = recvd_msg["requester_id"] if (viewleader_rpc.lock_release(lock_name, requester_id) == True): common_functions.send_msg(sock, "{'status': 'ok'}", False) else: common_functions.send_msg(sock, "{'status': 'not ok'}", False) elif (function_from_cmd == 'get_buckets'): key = recvd_msg["key"] replica_buckets = DHT.bucket_allocator(key, viewleader_rpc.view) common_functions.send_msg(sock, replica_buckets, False) elif (function_from_cmd == 'update_view'): curr_epoch = self.update_view() common_functions.send_msg(sock, {'Current Epoch': curr_epoch}, False) elif (function_from_cmd == 'rebalance'): msg = recvd_msg['msg'] print(msg) else: print("Rejecting RPC request because function is unknown.")
def rebalance(self, new_view, old_view, epoch_op): key_to_delete = '' global key_value_replica global data_in_view for [[addr, port], server_id] in new_view: try: sock = common_functions.create_connection( addr, port, port, 5, False) except Exception as e: print("Couldn't establish a connection with replica: ", e) common_functions.send_msg(sock, {'cmd': 'get_data'}, False) recvd_msg = common_functions.recv_msg(sock, False) if (recvd_msg is not None): for key, value in recvd_msg.items(): if (key not in data_in_view): with self.lock: data_in_view[key] = value sock.close() for key, value in data_in_view.items(): old_replicas = DHT.bucket_allocator(key, old_view) new_replicas = DHT.bucket_allocator(key, new_view) for [[addr, port], server_id] in new_replicas: try: sock = common_functions.create_connection( addr, port, port, 5, False) except Exception as e: print("Couldn't establish a connection with replica: ", e) common_functions.send_msg(sock, { 'cmd': 'get_data', 'key': key }, False) recvd_msg = common_functions.recv_msg(sock, False) if (recvd_msg is not None) or (recvd_msg != ''): key_value_replica = recvd_msg sock.close() with self.lock: # print (key_value_replica) try: new_key, new_value = key_value_replica if (new_key not in self.bucket): try: self.bucket[new_key] = new_value print("Adding {}:{} to current replica...".format( new_key, new_value)) except LookupError as e: print( "Couldn't set the key since there was no such key..." ) else: if (epoch_op == 'add'): if (old_view is not None): # print ("Old view: {}".format(old_view)) # print ("New view: {}".format(new_view)) # print ("unique_id: {}".format(self.unique_id)) for [[addr, port], server_id] in old_view: # print ("tuple: {}".format([[addr, port], server_id])) if (self.unique_id == server_id) and ([[ addr, port ], server_id] not in new_view): print( "Deleting {}:{} on old replica...". format(new_key, new_value)) key_to_delete = new_key try: del self.bucket[key_to_delete] except LookupError: print( "Couldn't delete the key since there was no such key..." ) except Exception as e: print("No key_value found: ", e)