class Server: TIME_HEARTBEAT = 3 ''' @id: [0 .. num_nodes-1]. @is_leader is set to `True` when creating Server0, otherwise, to `False`. @is_replica is set to `True` if node_id is less than f+1 @num_nodes is used to identify the broadcast range. @uid is used for printing the messages. @current_leader is updated when receiving heartbeat from the leader. Every server is a replica. ''' def __init__(self, node_id, is_leader, num_nodes, num_clients, is_recover): self.node_id = node_id self.uid = "Server#" + str(node_id) self.num_nodes = num_nodes self.num_clients = num_clients # network controller self.nt = Network(self.uid, num_nodes, num_clients) try: self.t_recv = Thread(target=self.receive) self.t_recv.start() except: print(self.uid, "error: unable to start new thread") # acceptor # acceptor must be started first self.acceptor = Acceptor(self.node_id, self.nt) # Leaders self.count_heartbeat = 4 # greater than 3 self.is_leader = is_leader self.leader_dead = False if is_leader: time.sleep(2) # wait for other servers to start self.leader = Leader(node_id, self.num_nodes, self.nt) self.leader.init_scout() if TERM_LOG: print(self.uid, "is leader") self.current_leader = -1 # updated when receiving leader's heartbeat # remember to update replica.leader_id and leader # Replicas max_faulty = (num_nodes - 1) / 2 # f in the paper # if (node_id <= max_faulty): if node_id < num_nodes: # set all servers as replicas here # f+1 servers are replicas # 2f+1 (all) servers are acceptors self.is_replica = False if is_recover: self.waitfor_replica = True self.replica_all_down = False self.broadcast_to_server( str(("requestReplicaInfo", self.node_id))) threading.Timer(self.TIME_HEARTBEAT*2, self.set_replica_all_down).start() while self.waitfor_replica and not self.replica_all_down: pass if not self.replica_all_down: self.replica = Replica(node_id, self.nt, self.recv_slot_num, self.recv_decisions) else: self.replica = Replica(node_id, self.nt) else: self.replica = Replica(node_id, self.nt) self.is_replica = True else: self.is_replica = False # leader broadcasts heartbeat self.rev_heartbeat = True # whether receive heartbeat in current period if is_leader: self.broadcast_heartbeat() self.check_heartbeat() # notice Master that it starts self.nt.send_to_master(str(("serverStarted", self.node_id))) def set_replica_all_down(self): self.replica_all_down = True def broadcast_to_server(self, message): self.nt.broadcast_to_server(message) def broadcast_to_client(self, message): self.nt.broadcast_to_client(message) def send_to_server(self, dest_id, message): self.nt.send_to_server(dest_id, message) def send_to_client(self, dest_id, message): self.nt.send_to_client(dest_id, message) def receive(self): while 1: buf = self.nt.receive() if len(buf) > 0: # TODO: handle the received value message = list(literal_eval(buf)) # to replica if (message[0] in ['request', 'decision', 'requestReplicaInfo']): self.replica_operation(message) # to leader if (message[0] in ['propose', 'adopted', 'preempted', 'leaderAlive', 'initLeader']): self.leader_operation(message) # to scout if (message[0] == "p1b" and self.is_leader): self.leader.scout_operation(message) # to commander if (message[0] == "p2b" and self.is_leader): self.leader.commander_operation(message) # to acceptor if (message[0] in ['p1a', 'p2a']): self.acceptor_operation(message) # to server if (message[0] == "heartbeat"): self.receive_heartbeat(message) if (message[0] == "election"): self.rev_heartbeat = True self.broadcast_to_server("'heartbeat', "+str(self.node_id)) if (message[0] == "timeBombLeader"): if (self.is_leader): self.nt.set_remain_message(int(message[1])) if (message[0] == "replicaInfo" and self.waitfor_replica): self.recv_slot_num = message[1] self.recv_decisions = set(message[2]) self.waitfor_replica = False def replica_operation(self, message): if not self.is_replica: return # request from client: ['request', (k, cid, message)] if (message[0] == "request"): self.replica.propose(message[1]) # decision from leader: ['decision', (slot_num, proposal)] elif (message[0] == "decision"): self.replica.decide(message[1]) # state request from server __init__: ['requestReplicaInfo', sender_id] elif (message[0] == "requestReplicaInfo"): self.send_to_server(message[1], str(("replicaInfo", self.replica.slot_num, list(self.replica.decisions)))) def leader_operation(self, message): if not self.is_leader: return # proposal from replica: ['propose', (slot_num, proposal)] if message[0] == 'propose': self.leader.process_propose(message) # adoption from scout: ['adopted', ballot_num, pvalue] elif message[0] == 'adopted': self.leader.process_adopted(message) # preemption from commander: ['preempted', ballot] elif message[0] == 'preempted': self.leader.process_preempted(message) elif message[0] == 'leaderAlive': self.nt.send_to_master(str(("leaderAlive", self.node_id))) elif message[0] == 'initLeader': # Leaders self.count_heartbeat = 4 # greater than 3 self.is_leader = False self.leader_dead = False self.leader = Leader(self.node_id, self.num_nodes, self.nt) self.is_leader = True self.leader.init_scout() if TERM_LOG: print(self.uid, "resets leader info") self.current_leader = -1 def acceptor_operation(self, message): # request from scout: ['p1a', (sender_id, scout_id), ballot_num] if message[0] == 'p1a': self.acceptor.process_p1a(message) # request from commander: # ['p2a', (sender_id, commander_id), (ballot_num, slot_num, proposal)] elif message[0] == 'p2a': self.acceptor.process_p2a(message) def broadcast_heartbeat(self): if self.is_leader or \ self.current_leader == self.node_id: # others may be elected self.broadcast_to_server(str(("heartbeat", self.node_id))) threading.Timer(self.TIME_HEARTBEAT, self.broadcast_heartbeat).start() def receive_heartbeat(self, message): # heartbeat from leader: ['heartbeat', leader_id] candidate = int(message[1]) if candidate == self.current_leader: # DB: print(self.uid, "set rev_heartbeat", "true", time.strftime("%M:%S", time.gmtime())) self.rev_heartbeat = True self.leader_dead = False else: if self.current_leader >= 0: if candidate < self.current_leader: self.rev_heartbeat = True self.current_leader = candidate if candidate == self.node_id: self.count_heartbeat = 0 else: self.is_leader = False if self.is_replica: self.replica.set_leader(candidate) if TERM_LOG: print(self.uid, " updates Server#", candidate, " as leader candidate", sep="") else: self.rev_heartbeat = True #self.leader_dead = True self.current_leader = candidate if candidate == self.node_id: self.count_heartbeat = 0 else: self.is_leader = False if self.is_replica: self.replica.set_leader(candidate) if TERM_LOG: print(self.uid, " updates Server#", candidate, " as leader candidate", sep="") if self.current_leader == self.node_id: self.count_heartbeat = self.count_heartbeat + 1 if (self.count_heartbeat == 3): self.nt.set_remain_message() self.is_leader = True self.leader = Leader(self.node_id, self.num_nodes, self.nt) self.leader.init_scout() self.broadcast_to_client(str(("leaderElected", self.node_id))) self.nt.send_to_master(str(("leaderElected", self.node_id))) ''' Starts leader election whenever the leader's heartbeat timeouts. ''' def check_heartbeat(self): if (not self.is_leader) and (not self.rev_heartbeat): # TODO: leader election if TERM_LOG: print(self.uid, " starts election Server#", self.node_id, sep="") self.current_leader = self.node_id self.count_heartbeat = 0 if self.is_replica: self.replica.set_leader(self.node_id) self.broadcast_heartbeat() #self.broadcast_to_server(str(("heartbeat", self.node_id))) self.rev_heartbeat = False threading.Timer(self.TIME_HEARTBEAT+1, self.check_heartbeat).start()