class Node(object): def __init__(self, conf): self.role = 'follower' self.id = conf['id'] self.addr = conf['addr'] self.peers = conf['peers'] # persistent state self.current_term = 0 self.voted_for = None if not os.path.exists(self.id): os.mkdir(self.id) # init persistent state self.load() self.log = Log(self.id) # volatile state # rule 1, 2 self.commit_index = 0 self.last_applied = 0 # volatile state on leaders # rule 1, 2 self.next_index = { _id: self.log.last_log_index + 1 for _id in self.peers } self.match_index = {_id: -1 for _id in self.peers} # append entries self.leader_id = None # request vote self.vote_ids = {_id: 0 for _id in self.peers} # client request self.client_addr = None # tick self.wait_ms = (10, 20) self.next_leader_election_time = time.time() + random.randint( *self.wait_ms) self.next_heartbeat_time = 0 # msg send and recv self.ss = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) self.ss.bind(self.addr) self.ss.settimeout(2) self.cs = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) def load(self): file_path = self.id + '/key.json' if os.path.exists(file_path): with open(file_path, 'r') as f: data = json.load(f) self.current_term = data['current_term'] self.voted_for = data['voted_for'] else: self.save() def save(self): data = { 'current_term': self.current_term, 'voted_for': self.voted_for, } file_path = self.id + '/key.json' with open(file_path, 'w') as f: json.dump(data, f) def send(self, msg, addr): msg = json.dumps(msg).encode('utf-8') self.cs.sendto(msg, addr) def recv(self): msg, addr = self.ss.recvfrom(65535) return json.loads(msg), addr def redirect(self, data, addr): if data == None: return None if data['type'] == 'client_append_entries': if self.role != 'leader': if self.leader_id: logging.info('redirect: client_append_entries to leader') self.send(data, self.peers[self.leader_id]) return None else: self.client_addr = addr return data if data['dst_id'] != self.id: logging.info('redirect: to ' + data['dst_id']) # logging.info('redirec to leader') self.send(data, self.peers[data['dst_id']]) return None else: return data return data def append_entries(self, data): ''' append entries rpc only used in follower state ''' response = { 'type': 'append_entries_response', 'src_id': self.id, 'dst_id': data['src_id'], 'term': self.current_term, 'success': False } # append_entries: rule 1 if data['term'] < self.current_term: logging.info(' 2. smaller term') logging.info(' 3. success = False: smaller term') logging.info( ' 4. send append_entries_response to leader ' + data['src_id']) response['success'] = False self.send(response, self.peers[data['src_id']]) return self.leader_id = data['leader_id'] # heartbeat if data['entries'] == []: logging.info(' 4. heartbeat') return prev_log_index = data['prev_log_index'] prev_log_term = data['prev_log_term'] tmp_prev_log_term = self.log.get_log_term(prev_log_index) # append_entries: rule 2, 3 # append_entries: rule 3 if tmp_prev_log_term != prev_log_term: logging.info( ' 4. success = False: index not match or term not match' ) logging.info( ' 5. send append_entries_response to leader ' + data['src_id']) logging.info(' 6. log delete_entries') logging.info(' 6. log save') response['success'] = False self.send(response, self.peers[data['src_id']]) self.log.delete_entries(prev_log_index) # append_entries rule 4 else: logging.info(' 4. success = True') logging.info( ' 5. send append_entries_response to leader ' + data['src_id']) logging.info(' 6. log append_entries') logging.info(' 7. log save') response['success'] = True self.send(response, self.peers[data['src_id']]) self.log.append_entries(prev_log_index, data['entries']) # append_entries rule 5 leader_commit = data['leader_commit'] if leader_commit > self.commit_index: commit_index = min(leader_commit, self.log.last_log_index) self.commit_index = commit_index logging.info(' 8. commit_index = ' + str(commit_index)) return def request_vote(self, data): ''' request vote rpc only used in follower state ''' response = { 'type': 'request_vote_response', 'src_id': self.id, 'dst_id': data['src_id'], 'term': self.current_term, 'vote_granted': False } # request vote: rule 1 if data['term'] < self.current_term: logging.info(' 2. smaller term') logging.info(' 3. success = False') logging.info( ' 4. send request_vote_response to candidate ' + data['src_id']) response['vote_granted'] = False self.send(response, self.peers[data['src_id']]) return logging.info(' 2. same term') candidate_id = data['candidate_id'] last_log_index = data['last_log_index'] last_log_term = data['last_log_term'] if self.voted_for == None or self.voted_for == candidate_id: if last_log_index >= self.log.last_log_index and last_log_term >= self.log.last_log_term: self.voted_for = data['src_id'] self.save() response['vote_granted'] = True self.send(response, self.peers[data['src_id']]) logging.info( ' 3. success = True: candidate log is newer') logging.info( ' 4. send request_vote_response to candidate ' + data['src_id']) else: self.voted_for = None self.save() response['vote_granted'] = False self.send(response, self.peers[data['src_id']]) logging.info( ' 3. success = False: candidate log is older') logging.info( ' 4. send request_vote_response to candidate ' + data['src_id']) else: response['vote_granted'] = False self.send(response, self.peers[data['src_id']]) logging.info(' 3. success = False: has vated for ' + self.voted_for) logging.info( ' 4. send request_vote_response to candidate ' + data['src_id']) return def all_do(self, data): ''' all servers: rule 1, 2 ''' logging.info( '-------------------------------all------------------------------------------' ) if self.commit_index > self.last_applied: self.last_applied = self.commit_index logging.info('all: 1. last_applied = ' + str(self.last_applied)) if data == None: return if data['type'] == 'client_append_entries': return if data['term'] > self.current_term: logging.info('all: 1. bigger term') logging.info(' 2. become follower') self.role = 'follower' self.current_term = data['term'] self.voted_for = None self.save() return def follower_do(self, data): ''' rules for servers: follower ''' logging.info( '-------------------------------follower-------------------------------------' ) t = time.time() # follower rules: rule 1 if data != None: if data['type'] == 'append_entries': logging.info('follower: 1. recv append_entries from leader ' + data['src_id']) if data['term'] == self.current_term: logging.info(' 2. same term') logging.info( ' 3. reset next_leader_election_time') self.next_leader_election_time = t + random.randint( *self.wait_ms) self.append_entries(data) elif data['type'] == 'request_vote': logging.info('follower: 1. recv request_vote from candidate ' + data['src_id']) self.request_vote(data) # follower rules: rule 2 if t > self.next_leader_election_time: logging.info('follower:1. become candidate') self.next_leader_election_time = t + random.randint(*self.wait_ms) self.role = 'candidate' self.current_term += 1 self.voted_for = self.id self.save() self.vote_ids = {_id: 0 for _id in self.peers} return def candidate_do(self, data): ''' rules for fervers: candidate ''' logging.info( '-------------------------------candidate------------------------------------' ) t = time.time() # candidate rules: rule 1 for dst_id in self.peers: if self.vote_ids[dst_id] == 0: logging.info('candidate: 1. send request_vote to peer ' + dst_id) request = { 'type': 'request_vote', 'src_id': self.id, 'dst_id': dst_id, 'term': self.current_term, 'candidate_id': self.id, 'last_log_index': self.log.last_log_index, 'last_log_term': self.log.last_log_term } # logging.info(request) self.send(request, self.peers[dst_id]) # if data != None and data['term'] < self.current_term: # logging.info('candidate: 1. smaller term from ' + data['src_id']) # logging.info(' 2. ignore') # return if data != None and data['term'] == self.current_term: # candidate rules: rule 2 if data['type'] == 'request_vote_response': logging.info( 'candidate: 1. recv request_vote_response from follower ' + data['src_id']) self.vote_ids[data['src_id']] = data['vote_granted'] vote_count = sum(list(self.vote_ids.values())) if vote_count >= len(self.peers) // 2: logging.info(' 2. become leader') self.role = 'leader' self.voted_for = None self.save() self.next_heartbeat_time = 0 self.next_index = { _id: self.log.last_log_index + 1 for _id in self.peers } self.match_index = {_id: 0 for _id in self.peers} return # candidate rules: rule 3 elif data['type'] == 'append_entries': logging.info('candidate: 1. recv append_entries from leader ' + data['src_id']) logging.info(' 2. become follower') self.next_leader_election_time = t + random.randint( *self.wait_ms) self.role = 'follower' self.voted_for = None self.save() return # candidate rules: rule 4 if t > self.next_leader_election_time: logging.info('candidate: 1. leader_election timeout') logging.info(' 2. become candidate') self.next_leader_election_time = t + random.randint(*self.wait_ms) self.role = 'candidate' self.current_term += 1 self.voted_for = self.id self.save() self.vote_ids = {_id: 0 for _id in self.peers} return def leader_do(self, data): ''' rules for fervers: leader ''' logging.info( '-------------------------------leader---------------------------------------' ) # leader rules: rule 1, 3 t = time.time() if t > self.next_heartbeat_time: self.next_heartbeat_time = t + random.randint(0, 5) for dst_id in self.peers: logging.info('leader:1. send append_entries to peer ' + dst_id) request = { 'type': 'append_entries', 'src_id': self.id, 'dst_id': dst_id, 'term': self.current_term, 'leader_id': self.id, 'prev_log_index': self.next_index[dst_id] - 1, 'prev_log_term': self.log.get_log_term(self.next_index[dst_id] - 1), 'entries': self.log.get_entries(self.next_index[dst_id]), 'leader_commit': self.commit_index } self.send(request, self.peers[dst_id]) # leader rules: rule 2 if data != None and data['type'] == 'client_append_entries': data['term'] = self.current_term self.log.append_entries(self.log.last_log_index, [data]) logging.info('leader:1. recv append_entries from client') logging.info(' 2. log append_entries') logging.info(' 3. log save') return # leader rules: rule 3.1, 3.2 if data != None and data['term'] == self.current_term: if data['type'] == 'append_entries_response': logging.info( 'leader:1. recv append_entries_response from follower ' + data['src_id']) if data['success'] == False: self.next_index[data['src_id']] -= 1 logging.info(' 2. success = False') logging.info(' 3. next_index - 1') else: self.match_index[data['src_id']] = self.next_index[ data['src_id']] self.next_index[ data['src_id']] = self.log.last_log_index + 1 logging.info(' 2. success = True') logging.info(' 3. match_index = ' + str(self.match_index[data['src_id']]) + ' next_index = ' + str(self.next_index[data['src_id']])) # leader rules: rule 4 while True: N = self.commit_index + 1 count = 0 for _id in self.match_index: if self.match_index[_id] >= N: count += 1 if count >= len(self.peers) // 2: self.commit_index = N logging.info('leader:1. commit + 1') if self.client_addr: response = {'index': self.commit_index} self.send(response, (self.client_addr[0], 10000)) break else: logging.info('leader:2. commit = ' + str(self.commit_index)) break def run(self): while True: try: try: data, addr = self.recv() except Exception as e: # logging.info(e) data, addr = None, None data = self.redirect(data, addr) self.all_do(data) if self.role == 'follower': self.follower_do(data) if self.role == 'candidate': self.candidate_do(data) if self.role == 'leader': self.leader_do(data) except Exception as e: logging.info(e) self.ss.close() self.cs.close()
class Server(threading.Thread): def __init__(self, queue, port, id): self.port = port self.id = id self.queue = queue self.title = constants.TITLE_FOLLOWER self.channel = network.Network(port, id) self.channel.start() self.leader = None self.running = True self.connected_servers = [] self.last_heartbeat = 0 self.heartbeat_timeout = 0 self.process_heartbeat() self.heartbeat_frequency = 0.5 self.election_start_time = 0 self.election_timeout = 0 # Time to wait for heartbeat or voting for a candidate before calling election self.set_election_timeout() # Election variables self.id_received_votes = set() # Id of servers who granted you votes self.id_refused_votes = set( ) # Id of servers who refused to vote for you self.num_received_votes = 0 # Number of votes received in current election # Persistent state variables # TODO: PERSIST; On server boot, retrieve information from disk self.current_term = 0 # Latest term server has seen self.voted_for = None # CandidateId that received vote in current term self.log = Log() self.next_index = None # For leader: indices for updating follower logs self.latest_index_term = None # For leader: tuples of latest entry index and term for each follower. Used for commit self.load_state() threading.Thread.__init__(self) def set_election_timeout(self): self.election_timeout = 1.5 * random() + 1.5 def process_heartbeat(self): self.last_heartbeat = time.time() self.heartbeat_timeout = 1.5 * random() + 1.5 def request_votes(self): if not self.log.data: # Log is empty last_log_index = -1 last_log_term = -1 else: last_log_index = self.log.get(-1).index last_log_term = self.log.get(-1).term msg = RequestVoteMessage(self.id, self.current_term, last_log_index, last_log_term) for server in self.connected_servers: self.channel.send(msg, id=host_to_id[server[0]]) # print "Requesting vote from server", host_to_id[server[0]] print "Vote requests sent to other servers" def request_remaining_votes(self, id_all_voters): if not self.log.data: # Log is empty last_log_index = -1 last_log_term = -1 else: last_log_index = self.log.get(-1).index last_log_term = self.log.get(-1).term msg = RequestVoteMessage(self.id, self.current_term, last_log_index, last_log_term) for server in self.connected_servers: server_id = host_to_id[server[0]] if server_id not in id_all_voters: self.channel.send(msg, id=server_id) # print "Requesting vote from server", host_to_id[server[0]] print "Vote requests sent to remaining servers who have not responded" def check_status(self): current_time = time.time() if self.title == constants.TITLE_LEADER: # Send AppendEntries to update follower logs for server in self.connected_servers: server_id = host_to_id[server[0]] next_index = self.next_index[server_id] # Send entries that the server has not received yet, if any if self.log.last_log_index() >= next_index: entries = self.construct_entries_list(next_index) if next_index == 0: prev_log_index = -1 prev_log_term = -1 else: prev_log_index = self.log.get(next_index - 1).index prev_log_term = self.log.get(next_index - 1).term msg = AppendEntriesMessage(self.current_term, self.id, prev_log_index, prev_log_term, entries, self.log.last_commit_index) self.channel.send(msg, id=server_id) print "AppendEntries sent to ", server_id if current_time - self.last_heartbeat >= self.heartbeat_frequency: self.send_heartbeats() elif self.title == constants.TITLE_FOLLOWER and current_time - self.last_heartbeat > self.heartbeat_timeout: # Heartbeat timeout passed as follower: Start election print "Election timeout as follower. No heartbeat. Become candidate and start new election" self.start_election() elif self.title == constants.TITLE_CANDIDATE and current_time - self.election_start_time > self.election_timeout: # Election timeout passed as candidate, without conclusion of election: Start new election print "Election timeout as candidate. Election has not yet led to new leader. Starting new election" self.set_election_timeout() self.start_election() elif self.title == constants.TITLE_CANDIDATE and current_time - self.election_start_time < self.election_timeout: # Election timeout has not passed as candidate print "As candidate, election timeout has not passed. Request votes from servers that have not responded" id_all_voters = self.id_received_votes.union(self.id_refused_votes) self.request_remaining_votes(id_all_voters) def construct_entries_list(self, index): entries = [] for i in range(index, len(self.log)): entries.append(self.log.get(i)) return entries def start_election(self): self.title = constants.TITLE_CANDIDATE self.reset_election_info() self.current_term += 1 self.save_state() # TODO: Voted_for must persist self.voted_for = self.id self.save_state() self.update_votes(self.id, True) self.election_start_time = time.time() self.check_election_status() self.request_votes() def send_heartbeats(self): heartbeat = AppendEntriesMessage(self.current_term, self.id, -1, -1, [], self.log.last_commit_index) for server in self.connected_servers: self.channel.send(heartbeat, id=host_to_id[server[0]]) self.process_heartbeat() def step_down(self): # Step down as leader or candidate, convert to follower # Reset various election variables if self.title == constants.TITLE_LEADER or self.title == constants.TITLE_CANDIDATE: self.title = constants.TITLE_FOLLOWER self.process_heartbeat() self.reset_election_info() def grant_vote(self, candidate_id): # TODO: Voted_for must persist self.voted_for = candidate_id self.save_state() print "Grant vote to", candidate_id self.channel.send(VoteReplyMessage(self.id, self.current_term, True), id=candidate_id) def refuse_vote(self, candidate_id): self.channel.send(VoteReplyMessage(self.id, self.current_term, False), id=candidate_id) print "Refuse vote to", candidate_id def majority(self): return (len(self.connected_servers) + 1) / 2 + 1 def check_election_status(self): if self.num_received_votes >= self.majority(): # Become leader when granted majority of votes self.become_leader() def become_leader(self): self.title = constants.TITLE_LEADER self.leader = self.id print "Election won - I am now LEADER" # TODO: Implement rest of leader initialization self.next_index = [len(self.log) for _ in range(len(addr_to_id))] if self.log.last_commit_index == -1: latest_index = None else: latest_index = self.log.last_commit_index if latest_index is None: latest_term = 0 elif self.log.contains_at_index(latest_index): latest_term = self.log.get(latest_index).term else: latest_term = 0 self.latest_index_term = [(latest_index, latest_term) for _ in range(len(addr_to_id))] self.latest_index_term[self.id] = (len(self.log) - 1, self.current_term) self.reset_election_info() self.send_heartbeats() def reset_election_info(self): self.id_received_votes = set() self.id_refused_votes = set() self.voted_for = None self.num_received_votes = 0 # server_id: server that sent vote reply; vote_granted: True if vote granted def update_votes(self, server_id, vote_granted): if vote_granted: print "Received vote from", server_id self.id_received_votes.add(server_id) self.num_received_votes = len(self.id_received_votes) print "Number of received votes is now", self.num_received_votes else: print "Denied vote from", server_id self.id_refused_votes.add(server_id) def update_commits(self): index = max(self.next_index) i_count = 0 t_count = 0 while i_count < self.majority() and index >= 0: if index < 0: print "Error: Update_commits: index is less than 0" index -= 1 t_count = 0 i_count = 0 for (i, t) in self.latest_index_term: if t == self.current_term: t_count += 1 if i >= index: i_count += 1 if t_count >= self.majority() and i_count >= self.majority(): if self.log.last_commit_index < index: self.log.last_commit_index = index self.save_state() elif self.log.last_commit_index > index: print "Error: Update_commits: new commit index is lower than current commit_index" for entry in self.log.data: if not entry.client_ack_sent: # TODO: Send client ack ack_message = AcknowledgeMessage(ack=True, msg_id=entry.msg_id) self.channel.send(ack_message, id=entry.author) entry.client_ack_sent = True def run(self): print "Server with id=", self.id, " up and running" while self.running: self.update_connected_servers() for server in list(addr_to_id.keys()): # if server not in self.connected_servers and not addr_to_id[server] == id: if server not in self.channel and not host_to_id[ server[0]] == self.id: connected = self.channel.connect(server) if connected: print str("Server: Connected to " + server[0]) if server not in self.connected_servers: self.connected_servers.append(server) # print "Connected: ", connected data = self.channel.receive(RECEIVE_FREQ) if data: # print "There is data on channel" for server_id, msg in data: self.process_msg(server_id, msg) else: self.check_status() def process_msg(self, sender_id, msg): #print "Processing message from", sender_id, "of type", msg.type if msg.type == constants.MESSAGE_TYPE_REQUEST_VOTE: self.process_request_vote(sender_id, msg) elif msg.type == constants.MESSAGE_TYPE_VOTE_REPLY: self.process_vote_reply(sender_id, msg) elif msg.type == constants.MESSAGE_TYPE_REQUEST_LEADER: msg = messages.RequestLeaderMessage(leader=self.leader) self.channel.send(msg, id=sender_id) elif msg.type == constants.MESSAGE_TYPE_LOOKUP: self.process_lookup(sender_id, msg) elif msg.type == constants.MESSAGE_TYPE_POST: self.process_post(sender_id, msg) elif msg.type == constants.MESSAGE_TYPE_APPEND_ENTRIES: self.process_append_entries(sender_id, msg) elif msg.type == constants.MESSAGE_TYPE_ACKNOWLEDGE: self.process_acknowledge(sender_id, msg) # Used for testing purposes elif msg.type == constants.MESSAGE_TYPE_TEXT: print "From", msg.sender_id, ":", msg.msg else: print "Error: Invalid message type" def process_lookup(self, sender_id, msg): if self.title == constants.TITLE_LEADER or msg.override: print "-----> Processing Lookup from client" posts = self.log.get_committed_entries() msg = messages.LookupMessage(msg_id=msg.msg_id, post=posts, server_id=self.id) self.channel.send(msg=msg, id=sender_id) else: print "Lookup to leader" msg = messages.RequestLeaderMessage(leader=self.leader) self.channel.send(msg=msg, id=sender_id) def process_post(self, sender_id, msg): if self.title == constants.TITLE_LEADER: # TODO: Implement adding entry # TODO: PERSIST; implement in log class? entry = Entry(msg.post, sender_id, self.current_term, len(self.log), msg_id=msg.msg_id) if self.log.append(entry): self.save_state() self.latest_index_term[self.id] = (len(self.log) - 1, self.current_term) print "---->Append entry from client to log" else: msg = messages.RequestLeaderMessage(leader=self.leader) self.channel.send(msg=msg, id=sender_id) def process_request_vote(self, sender_id, msg): if not self.log: # Log is empty last_log_index = -1 last_log_term = -1 else: last_log_index = self.log.get(-1).index last_log_term = self.log.get(-1).term # Handle message if msg.term < self.current_term: # If candidate's term is less than my term then refuse vote print "Refuse vote to server", sender_id, "because I have higher term" self.refuse_vote(msg.candidate_id) if msg.term > self.current_term: # If candidate's term is greater than my term then update current_term (latest term I've encountered), # Step down if leader or candidate self.current_term = msg.term self.save_state() # TODO: Step down if leader or candidate self.step_down() if msg.term >= self.current_term: # If candidate's term is at least as new as mine and I have granted anyone else a vote # and candidate's log is at least as complete as mine # then grant vote if self.voted_for is None or self.voted_for is msg.candidate_id: if last_log_term < msg.last_log_term or ( last_log_term == msg.last_log_term and last_log_index <= msg.last_log_index): self.grant_vote(msg.candidate_id) else: # print "Cand term, current_term:", msg.term, self.current_term # print "Voted for:", self.voted_for # print "Cand log term, last_log_term", msg.last_log_term, last_log_term # print "Cand log index, last_log_index", msg.last_log_index, last_log_index self.refuse_vote(msg.candidate_id) def process_vote_reply(self, sender_id, msg): if msg.term > self.current_term and not msg.vote_granted: # Step down if reply from someone with higher term # Extra condition for security. # If responder's term is higher, then vote should not be granted with correct execution self.current_term = msg.term self.save_state() print "Denied vote from", msg.follower_id self.step_down() else: # Take care of grant or refusal of vote self.update_votes(msg.follower_id, msg.vote_granted) self.check_election_status() def process_acknowledge(self, sender_id, msg): if msg.ack: print "Process Acknowledge from server. ACK == TRUE" self.next_index[sender_id] = msg.next_index self.latest_index_term[sender_id] = msg.latest_index_term self.update_commits() else: print "Process Acknowledge from server. ACK == FALSE" if self.next_index[sender_id] - 1 < 0: self.next_index[sender_id] = 0 else: self.next_index[sender_id] -= 1 if msg.term > self.current_term: self.current_term = msg.term self.save_state() self.step_down() def process_append_entries(self, sender_id, msg): if len(msg.entries) == 0: self.process_heartbeat() if msg.commit_index < len(self.log): self.log.last_commit_index = msg.commit_index self.save_state() self.leader = sender_id #print "Heartbeat received from server", sender_id if self.title == constants.TITLE_CANDIDATE or self.title == constants.TITLE_LEADER: self.step_down() elif self.title == constants.TITLE_LEADER: # TODO: If a "leader" receives a heartbeat, # it might have crashed and joined back in after an election (?) pass else: # TODO: Process AppendEntriesMessage print "-->Processing AppendEntriesMessage from leader" self.process_heartbeat() if msg.term > self.current_term: self.current_term = msg.term self.save_state() if self.title == constants.TITLE_CANDIDATE or self.title == constants.TITLE_LEADER: self.step_down() # Reject if my term is greater than leader term if self.current_term > msg.term: print "Error: Current term greater than leaders term" self.channel.send(AcknowledgeMessage(ack=False, term=self.current_term), id=sender_id) # Accept. Self.log is empty and leader is sending all entries elif self.log.is_empty() and msg.prev_log_index == -1: print "Appending entries" # First entry to append is at index 0 if self.log.append_entries(msg.entries): self.log.last_commit_index = msg.commit_index self.save_state() i = self.log.last_log_index() t = self.log.get(i).term self.channel.send(AcknowledgeMessage( ack=True, next_index=len(self.log), latest_index_term=(i, t)), id=sender_id) print "Log after appending entries:" self.log.show_data() else: print "DET HER SKAL IKKE SKJE 1" # Accept. Check if self.log has an element at msg.prev_log_index elif self.log.contains_at_index(msg.prev_log_index): # Check if the term corresponds with msg.prev_log_term if self.log.get(msg.prev_log_index).term == msg.prev_log_term: if self.log.append_entries(msg.entries): self.log.last_commit_index = msg.commit_index self.save_state() i = self.log.last_log_index() t = self.log.get(i).term self.channel.send(AcknowledgeMessage( ack=True, next_index=len(self.log), latest_index_term=(i, t)), id=sender_id) print "Log after appending entries:" self.log.show_data() else: print "DET HER SKAL IKKE SKJE NUMMER 2" else: self.log.remove(msg.prev_log_index) self.channel.send(AcknowledgeMessage(ack=False), id=sender_id) else: print "Send ACK-False" self.channel.send(AcknowledgeMessage(ack=False), id=sender_id) def save_state(self): storage.save(self.id, self.voted_for, self.current_term, self.log) def load_state(self): self.voted_for, self.current_term, self.log = storage.load(self.id) # print "voted for", self.voted_for print self.current_term print self.log def update_connected_servers(self): for addr in list(addr_to_id.keys()): if addr in self.channel.address_to_connection.keys( ) and addr not in self.connected_servers: self.connected_servers.append(id) if addr not in self.channel.address_to_connection.keys( ) and addr in self.connected_servers: self.connected_servers.remove(addr)
class Server(threading.Thread): def __init__(self, queue, port, id): self.port = port self.id = id self.queue = queue self.title = constants.TITLE_FOLLOWER self.channel = network.Network(port, id) self.channel.start() self.leader = None self.running = True self.connected_servers = [] self.last_heartbeat = 0 self.heartbeat_timeout = 0 self.process_heartbeat() self.heartbeat_frequency = 0.5 self.election_start_time = 0 self.election_timeout = 0 # Time to wait for heartbeat or voting for a candidate before calling election self.set_election_timeout() # Election variables self.id_received_votes = set() # Id of servers who granted you votes self.id_refused_votes = set() # Id of servers who refused to vote for you self.num_received_votes = 0 # Number of votes received in current election # Persistent state variables # TODO: PERSIST; On server boot, retrieve information from disk self.current_term = 0 # Latest term server has seen self.voted_for = None # CandidateId that received vote in current term self.log = Log() self.next_index = None # For leader: indices for updating follower logs self.latest_index_term = None # For leader: tuples of latest entry index and term for each follower. Used for commit self.load_state() threading.Thread.__init__(self) def set_election_timeout(self): self.election_timeout = 1.5 * random() + 1.5 def process_heartbeat(self): self.last_heartbeat = time.time() self.heartbeat_timeout = 1.5 * random() + 1.5 def request_votes(self): if not self.log.data: # Log is empty last_log_index = -1 last_log_term = -1 else: last_log_index = self.log.get(-1).index last_log_term = self.log.get(-1).term msg = RequestVoteMessage(self.id, self.current_term, last_log_index, last_log_term) for server in self.connected_servers: self.channel.send(msg, id=host_to_id[server[0]]) # print "Requesting vote from server", host_to_id[server[0]] print "Vote requests sent to other servers" def request_remaining_votes(self, id_all_voters): if not self.log.data: # Log is empty last_log_index = -1 last_log_term = -1 else: last_log_index = self.log.get(-1).index last_log_term = self.log.get(-1).term msg = RequestVoteMessage(self.id, self.current_term, last_log_index, last_log_term) for server in self.connected_servers: server_id = host_to_id[server[0]] if server_id not in id_all_voters: self.channel.send(msg, id=server_id) # print "Requesting vote from server", host_to_id[server[0]] print "Vote requests sent to remaining servers who have not responded" def check_status(self): current_time = time.time() if self.title == constants.TITLE_LEADER: # Send AppendEntries to update follower logs for server in self.connected_servers: server_id = host_to_id[server[0]] next_index = self.next_index[server_id] # Send entries that the server has not received yet, if any if self.log.last_log_index() >= next_index: entries = self.construct_entries_list(next_index) if next_index == 0: prev_log_index = -1 prev_log_term = -1 else: prev_log_index = self.log.get(next_index-1).index prev_log_term = self.log.get(next_index-1).term msg = AppendEntriesMessage(self.current_term, self.id, prev_log_index, prev_log_term, entries, self.log.last_commit_index) self.channel.send(msg, id=server_id) print "AppendEntries sent to ", server_id if current_time - self.last_heartbeat >= self.heartbeat_frequency: self.send_heartbeats() elif self.title == constants.TITLE_FOLLOWER and current_time - self.last_heartbeat > self.heartbeat_timeout: # Heartbeat timeout passed as follower: Start election print "Election timeout as follower. No heartbeat. Become candidate and start new election" self.start_election() elif self.title == constants.TITLE_CANDIDATE and current_time - self.election_start_time > self.election_timeout: # Election timeout passed as candidate, without conclusion of election: Start new election print "Election timeout as candidate. Election has not yet led to new leader. Starting new election" self.set_election_timeout() self.start_election() elif self.title == constants.TITLE_CANDIDATE and current_time - self.election_start_time < self.election_timeout: # Election timeout has not passed as candidate print "As candidate, election timeout has not passed. Request votes from servers that have not responded" id_all_voters = self.id_received_votes.union(self.id_refused_votes) self.request_remaining_votes(id_all_voters) def construct_entries_list(self, index): entries = [] for i in range(index, len(self.log)): entries.append(self.log.get(i)) return entries def start_election(self): self.title = constants.TITLE_CANDIDATE self.reset_election_info() self.current_term += 1 self.save_state() # TODO: Voted_for must persist self.voted_for = self.id self.save_state() self.update_votes(self.id, True) self.election_start_time = time.time() self.check_election_status() self.request_votes() def send_heartbeats(self): heartbeat = AppendEntriesMessage(self.current_term, self.id, -1, -1, [], self.log.last_commit_index) for server in self.connected_servers: self.channel.send(heartbeat, id=host_to_id[server[0]]) self.process_heartbeat() def step_down(self): # Step down as leader or candidate, convert to follower # Reset various election variables if self.title == constants.TITLE_LEADER or self.title == constants.TITLE_CANDIDATE: self.title = constants.TITLE_FOLLOWER self.process_heartbeat() self.reset_election_info() def grant_vote(self, candidate_id): # TODO: Voted_for must persist self.voted_for = candidate_id self.save_state() print "Grant vote to", candidate_id self.channel.send(VoteReplyMessage(self.id, self.current_term, True), id=candidate_id) def refuse_vote(self, candidate_id): self.channel.send(VoteReplyMessage(self.id, self.current_term, False), id=candidate_id) print "Refuse vote to", candidate_id def majority(self): return (len(self.connected_servers)+1) / 2 + 1 def check_election_status(self): if self.num_received_votes >= self.majority(): # Become leader when granted majority of votes self.become_leader() def become_leader(self): self.title = constants.TITLE_LEADER self.leader = self.id print "Election won - I am now LEADER" # TODO: Implement rest of leader initialization self.next_index = [len(self.log) for _ in range(len(addr_to_id))] if self.log.last_commit_index == -1: latest_index = None else: latest_index = self.log.last_commit_index if latest_index is None: latest_term = 0 elif self.log.contains_at_index(latest_index): latest_term = self.log.get(latest_index).term else: latest_term = 0 self.latest_index_term = [(latest_index, latest_term) for _ in range(len(addr_to_id))] self.latest_index_term[self.id] = (len(self.log)-1, self.current_term) self.reset_election_info() self.send_heartbeats() def reset_election_info(self): self.id_received_votes = set() self.id_refused_votes = set() self.voted_for = None self.num_received_votes = 0 # server_id: server that sent vote reply; vote_granted: True if vote granted def update_votes(self, server_id, vote_granted): if vote_granted: print "Received vote from", server_id self.id_received_votes.add(server_id) self.num_received_votes = len(self.id_received_votes) print "Number of received votes is now", self.num_received_votes else: print "Denied vote from", server_id self.id_refused_votes.add(server_id) def update_commits(self): index = max(self.next_index) i_count = 0 t_count = 0 while i_count < self.majority() and index >= 0: if index < 0: print "Error: Update_commits: index is less than 0" index -= 1 t_count = 0 i_count = 0 for (i, t) in self.latest_index_term: if t == self.current_term: t_count += 1 if i >= index: i_count += 1 if t_count >= self.majority() and i_count >= self.majority(): if self.log.last_commit_index < index: self.log.last_commit_index = index self.save_state() elif self.log.last_commit_index > index: print "Error: Update_commits: new commit index is lower than current commit_index" for entry in self.log.data: if not entry.client_ack_sent: # TODO: Send client ack ack_message = AcknowledgeMessage(ack=True, msg_id=entry.msg_id) self.channel.send(ack_message, id=entry.author) entry.client_ack_sent = True def run(self): print "Server with id=", self.id, " up and running" while self.running: self.update_connected_servers() for server in list(addr_to_id.keys()): # if server not in self.connected_servers and not addr_to_id[server] == id: if server not in self.channel and not host_to_id[server[0]] == self.id: connected = self.channel.connect(server) if connected: print str("Server: Connected to "+server[0]) if server not in self.connected_servers: self.connected_servers.append(server) # print "Connected: ", connected data = self.channel.receive(RECEIVE_FREQ) if data: # print "There is data on channel" for server_id, msg in data: self.process_msg(server_id, msg) else: self.check_status() def process_msg(self, sender_id, msg): #print "Processing message from", sender_id, "of type", msg.type if msg.type == constants.MESSAGE_TYPE_REQUEST_VOTE: self.process_request_vote(sender_id, msg) elif msg.type == constants.MESSAGE_TYPE_VOTE_REPLY: self.process_vote_reply(sender_id, msg) elif msg.type == constants.MESSAGE_TYPE_REQUEST_LEADER: msg = messages.RequestLeaderMessage(leader=self.leader) self.channel.send(msg, id=sender_id) elif msg.type == constants.MESSAGE_TYPE_LOOKUP: self.process_lookup(sender_id, msg) elif msg.type == constants.MESSAGE_TYPE_POST: self.process_post(sender_id, msg) elif msg.type == constants.MESSAGE_TYPE_APPEND_ENTRIES: self.process_append_entries(sender_id, msg) elif msg.type == constants.MESSAGE_TYPE_ACKNOWLEDGE: self.process_acknowledge(sender_id, msg) # Used for testing purposes elif msg.type == constants.MESSAGE_TYPE_TEXT: print "From", msg.sender_id, ":", msg.msg else: print "Error: Invalid message type" def process_lookup(self, sender_id, msg): if self.title == constants.TITLE_LEADER or msg.override: print "-----> Processing Lookup from client" posts = self.log.get_committed_entries() msg = messages.LookupMessage(msg_id=msg.msg_id, post=posts, server_id=self.id) self.channel.send(msg=msg, id=sender_id) else: print "Lookup to leader" msg = messages.RequestLeaderMessage(leader=self.leader) self.channel.send(msg=msg, id=sender_id) def process_post(self, sender_id, msg): if self.title == constants.TITLE_LEADER: # TODO: Implement adding entry # TODO: PERSIST; implement in log class? entry = Entry(msg.post, sender_id, self.current_term, len(self.log), msg_id=msg.msg_id) if self.log.append(entry): self.save_state() self.latest_index_term[self.id] = (len(self.log) - 1, self.current_term) print "---->Append entry from client to log" else: msg = messages.RequestLeaderMessage(leader=self.leader) self.channel.send(msg=msg, id=sender_id) def process_request_vote(self, sender_id, msg): if not self.log: # Log is empty last_log_index = -1 last_log_term = -1 else: last_log_index = self.log.get(-1).index last_log_term = self.log.get(-1).term # Handle message if msg.term < self.current_term: # If candidate's term is less than my term then refuse vote print "Refuse vote to server", sender_id, "because I have higher term" self.refuse_vote(msg.candidate_id) if msg.term > self.current_term: # If candidate's term is greater than my term then update current_term (latest term I've encountered), # Step down if leader or candidate self.current_term = msg.term self.save_state() # TODO: Step down if leader or candidate self.step_down() if msg.term >= self.current_term: # If candidate's term is at least as new as mine and I have granted anyone else a vote # and candidate's log is at least as complete as mine # then grant vote if self.voted_for is None or self.voted_for is msg.candidate_id: if last_log_term < msg.last_log_term or ( last_log_term == msg.last_log_term and last_log_index <= msg.last_log_index): self.grant_vote(msg.candidate_id) else: # print "Cand term, current_term:", msg.term, self.current_term # print "Voted for:", self.voted_for # print "Cand log term, last_log_term", msg.last_log_term, last_log_term # print "Cand log index, last_log_index", msg.last_log_index, last_log_index self.refuse_vote(msg.candidate_id) def process_vote_reply(self, sender_id, msg): if msg.term > self.current_term and not msg.vote_granted: # Step down if reply from someone with higher term # Extra condition for security. # If responder's term is higher, then vote should not be granted with correct execution self.current_term = msg.term self.save_state() print "Denied vote from", msg.follower_id self.step_down() else: # Take care of grant or refusal of vote self.update_votes(msg.follower_id, msg.vote_granted) self.check_election_status() def process_acknowledge(self, sender_id, msg): if msg.ack: print "Process Acknowledge from server. ACK == TRUE" self.next_index[sender_id] = msg.next_index self.latest_index_term[sender_id] = msg.latest_index_term self.update_commits() else: print "Process Acknowledge from server. ACK == FALSE" if self.next_index[sender_id]-1 < 0: self.next_index[sender_id] = 0 else: self.next_index[sender_id] -= 1 if msg.term > self.current_term: self.current_term = msg.term self.save_state() self.step_down() def process_append_entries(self, sender_id, msg): if len(msg.entries) == 0: self.process_heartbeat() if msg.commit_index < len(self.log): self.log.last_commit_index = msg.commit_index self.save_state() self.leader = sender_id #print "Heartbeat received from server", sender_id if self.title == constants.TITLE_CANDIDATE or self.title == constants.TITLE_LEADER: self.step_down() elif self.title == constants.TITLE_LEADER: # TODO: If a "leader" receives a heartbeat, # it might have crashed and joined back in after an election (?) pass else: # TODO: Process AppendEntriesMessage print "-->Processing AppendEntriesMessage from leader" self.process_heartbeat() if msg.term > self.current_term: self.current_term = msg.term self.save_state() if self.title == constants.TITLE_CANDIDATE or self.title == constants.TITLE_LEADER: self.step_down() # Reject if my term is greater than leader term if self.current_term > msg.term: print "Error: Current term greater than leaders term" self.channel.send(AcknowledgeMessage(ack=False, term=self.current_term), id=sender_id) # Accept. Self.log is empty and leader is sending all entries elif self.log.is_empty() and msg.prev_log_index == -1: print "Appending entries" # First entry to append is at index 0 if self.log.append_entries(msg.entries): self.log.last_commit_index = msg.commit_index self.save_state() i = self.log.last_log_index() t = self.log.get(i).term self.channel.send(AcknowledgeMessage( ack=True, next_index=len(self.log), latest_index_term=(i, t)), id=sender_id) print "Log after appending entries:" self.log.show_data() else: print "DET HER SKAL IKKE SKJE 1" # Accept. Check if self.log has an element at msg.prev_log_index elif self.log.contains_at_index(msg.prev_log_index): # Check if the term corresponds with msg.prev_log_term if self.log.get(msg.prev_log_index).term == msg.prev_log_term: if self.log.append_entries(msg.entries): self.log.last_commit_index = msg.commit_index self.save_state() i = self.log.last_log_index() t = self.log.get(i).term self.channel.send( AcknowledgeMessage(ack=True, next_index=len(self.log), latest_index_term=(i, t)), id=sender_id) print "Log after appending entries:" self.log.show_data() else: print "DET HER SKAL IKKE SKJE NUMMER 2" else: self.log.remove(msg.prev_log_index) self.channel.send(AcknowledgeMessage(ack=False), id=sender_id) else: print "Send ACK-False" self.channel.send(AcknowledgeMessage(ack=False),id=sender_id) def save_state(self): storage.save(self.id, self.voted_for, self.current_term, self.log) def load_state(self): self.voted_for, self.current_term, self.log = storage.load(self.id) # print "voted for", self.voted_for print self.current_term print self.log def update_connected_servers(self): for addr in list(addr_to_id.keys()): if addr in self.channel.address_to_connection.keys() and addr not in self.connected_servers: self.connected_servers.append(id) if addr not in self.channel.address_to_connection.keys() and addr in self.connected_servers: self.connected_servers.remove(addr)
class Node(rpcService_pb2_grpc.RPCServicer): def __init__(self, conf): self.role = 'follower' self.id = conf['id'] self.addr = conf['addr'] self.peers = conf['peers'] # persistent state self.current_term = 0 self.voted_for = None self.kv = {} if not os.path.exists(self.id): os.mkdir(self.id) # init persistent state self.load() self.log = Log(self.id) # volatile state # rule 1, 2 self.commit_index = 0 self.last_applied = 0 # volatile state on leaders # rule 1, 2 self.next_index = {_id: self.log.last_log_index + 1 for _id in self.peers} #log_list最后一个index的下一个,也就是还是空的 #{16+1,16+1,16+1, ...} self.match_index = {_id: -1 for _id in self.peers} # append entries self.leader_id = None # request vote self.vote_ids = {_id: 0 for _id in self.peers} # client request self.client_port = None # tick self.t_heartbeat = 0.15#2 self.next_leader_election = 1.5#3 self.connect_timeout_in_seconds=0.1 self.wait_s = (0,1.5)#(1, 2) ## 心跳或者entry msg计时器/或者叫选举计时器 self.next_leader_election_timer_restart() self.cmd_output = {'msgType':'info', 'msg':'become follower', 'localID':self.id, 'currentTerm':self.current_term} print(str(self.cmd_output)) self.stub_dict={} self.channel_dict={} for dst_id in self.peers: # logging.info('leader:1. send append_entries to peer ' + dst_id) peer_addr = str(self.peers[dst_id][0]) + ':' + str(self.peers[dst_id][1]) self.channel_dict[peer_addr] = grpc.insecure_channel(peer_addr) self.stub_dict[peer_addr] = rpcService_pb2_grpc.RPCStub(self.channel_dict[peer_addr]) self.channel_dict['localhost:10005'] = grpc.insecure_channel('localhost:10005') self.stub_dict['localhost:10005'] = rpcService_pb2_grpc.RPCStub(self.channel_dict['localhost:10005']) self.channel_dict['localhost:10000'] = grpc.insecure_channel('localhost:10000') self.stub_dict['localhost:10000'] = rpcService_pb2_grpc.RPCStub(self.channel_dict['localhost:10000']) # tick func def next_leader_election_timer_restart(self): self.next_leader_election_timer = Timer(self.next_leader_election + random.uniform(*self.wait_s), self.election_timeoutStep) self.next_leader_election_timer.start() def next_leader_election_timer_cancel(self): self.next_leader_election_timer.cancel() def next_heartbeat_timer_restart(self): self.next_heartbeat_timer = Timer(self.t_heartbeat, self.next_heartbeat_timeoutStep) #+ random.uniform(*self.wait_s) self.next_heartbeat_timer.start() def next_heartbeat_timer_cancel(self): self.next_heartbeat_timer.cancel() def load(self): file_path = self.id + '/key.json' if os.path.exists(file_path): with open(file_path, 'r') as f: data = json.load(f) self.current_term = data['current_term'] self.voted_for = data['voted_for'] self.kv = data['kv'] else: self.save() def save(self): data = {'current_term': self.current_term, 'voted_for': self.voted_for, 'kv': self.kv, } file_path = self.id + '/key.json' with open(file_path, 'w') as f: json.dump(data, f) def election_timeoutStep(self): ''' 开始选举前的timeout处理(长时间未收到消息), 只有follower会触发 选举中的timeout处理, 只有candidate会触发 ''' # logging.info('ENTER election_timeoutStep') if self.role == 'follower': self.follower_do(task='becomeCandidate') # print("\n") return if self.role == 'candidate': self.candidate_do(task='reElection') # print("\n") return def next_heartbeat_timeoutStep(self): ''' 只有leader会触发 ''' # logging.info('ENTER next_heartbeat_timeoutStep') if self.role == 'leader': self.next_heartbeat_timer_cancel() for dst_id in self.peers: # logging.info('leader:1. send append_entries to peer ' + dst_id) peer_addr = str(self.peers[dst_id][0]) + ':' + str(self.peers[dst_id][1]) # with grpc.insecure_channel(peer_addr) as channel: stub = self.stub_dict[peer_addr]#rpcService_pb2_grpc.RPCStub(channel) response = None try: response = stub.AppendEntries(rpcService_pb2.appendEntriesRequest(term=self.current_term, leaderId=self.id, prev_log_index=self.next_index[dst_id] - 1, prev_log_term=self.log.get_log_term(self.next_index[dst_id] - 1), entries=self.log.get_entries(self.next_index[dst_id]), #(16+1,就是我这个leader的log里面的最后一个的下一个|第0个) leader_commit=self.commit_index), self.connect_timeout_in_seconds) except: # print("----------------------------send appendrpc connect error!") self.cmd_output = {} self.cmd_output['msgType']='info' self.cmd_output['msg']='send msg fail' self.cmd_output['localID']=self.id self.cmd_output['to']=dst_id self.cmd_output['send_msg_type']='MsgHeartBeat' if self.log.get_entries(self.next_index[dst_id]) == [] else 'MsgApp' print(str(self.cmd_output)) if response !=None and response.type!='heartbeat_response': self.leader_do(data=response) # 发送完心跳包,重新开始计时 self.next_heartbeat_timer_restart() # print("\n") def AppendEntries(self, request, context): ''' # 不为空,也不是从client发来的信息 # 根据是headbeart还是append,决定要做什么事情 ''' self.all_do() self.next_leader_election_timer_cancel() self.next_leader_election_timer_restart() self.leader_id = request.leaderId # logging.info(self.role + '1. recv append_entries from leader ' + self.leader_id) self.cmd_output={} self.cmd_output={ 'msgType':'MsgApp_or_MsgHeartBeat', 'msg':'received from leader', 'localID':self.id, 'leaderID':self.leader_id, 'currentTerm':self.current_term, 'msgTerm':request.term } if request.entries != []: self.cmd_output['msgType']='MsgApp'#不为空,也可能是心跳包,所以定个标准,为空就是心跳包 print(str(self.cmd_output)) if request.term < self.current_term: # logging.info(self.role + ': 2. smaller term') # logging.info(self.role + ': 3. success = False: smaller term') # logging.info(self.role + ': 4. send append_entries_response to leader ' + self.leader_id) # print("\n") return rpcService_pb2.appendEntriesResponse(responserId=self.id, success=False, responserTerm=self.current_term, type='append_entries_response') elif request.term > self.current_term: #针对candidate收到leader的消息的情况(别人先竞选成功了) # logging.info(self.role + ': 2. bigger term') # logging.info(self.role + ': 3. become follower') self.role = 'follower' self.current_term = request.term self.voted_for = None self.save() if self.role == 'candidate': #不管是不是心跳包,收到了就得转为follower self.candidate_do(task='convertToFollower') prev_log_index = request.prev_log_index prev_log_term = request.prev_log_term tmp_prev_log_term = self.log.get_log_term(prev_log_index) # self.cmd_output={} # self.cmd_output={ # 'msgType':'info', # 'msg':'call maybeAppend', # 'localID':self.id, # 'leaderID':self.leader_id, # 'prevLogIndex':request.prev_log_index, # 'prevLogTerm':request.prev_log_term, # 'leaderCommitted':request.leader_commit # } # print(str(self.cmd_output)) # append_entries: rule 2, 3 # append_entries: rule 3 if tmp_prev_log_term != prev_log_term: # logging.info(self.role + ': 5. success = False: index not match or term not match') # logging.info(self.role + ': 6. send append_entries_response to leader ' + self.leader_id) # logging.info(self.role + ': 7. log delete_entries') # logging.info(self.role + ': 8. log save') self.cmd_output={} self.cmd_output={ 'msgType':'info', 'msg':'handleAppendEntries append failed and need to be consistent', 'localID':self.id, 'leaderID':self.leader_id, 'prevLogIndex':request.prev_log_index, 'prevLogTerm':request.prev_log_term, 'leaderCommitted':request.leader_commit, 'last_applied':self.last_applied, 'lastLogIndex':self.log.last_log_index, 'lastLogTerm':self.log.last_log_term } print(str(self.cmd_output)) self.log.delete_entries(prev_log_index) # print("\n") return rpcService_pb2.appendEntriesResponse(responserId=self.id, success=False, responserTerm=self.current_term, type='append_entries_response') else: # logging.info(self.role + ': 5. success = True') # logging.info(self.role + ': 6. send append_entries_response to leader ' + self.leader_id) # 先把当前follower从prev_log_index开始的后面的部分(不包括prev_log_index)去掉,也就是强制覆盖的原则,比leader多出的log要全部去掉 self.log.delete_entries(prev_log_index+1) if request.entries != []: #不为心跳 # logging.info(self.role + ': 7. append_entries not None') # logging.info(self.role + ': 8. log append_entries') # logging.info(self.role + ': 9. log save') self.log.append_entries(prev_log_index, list(request.entries)) self.cmd_output={} self.cmd_output={ 'msgType':'info', 'msg':'handleAppendEntries append success', 'localID':self.id, 'leaderID':self.leader_id, 'prevLogIndex':request.prev_log_index, 'prevLogTerm':request.prev_log_term, 'leaderCommitted':request.leader_commit, 'last_applied':self.last_applied, 'lastLogIndex':self.log.last_log_index, 'lastLogTerm':self.log.last_log_term } print(str(self.cmd_output)) # append_entries rule 5 leader_commit = request.leader_commit if leader_commit > self.commit_index: commit_index = min(leader_commit, self.log.last_log_index) self.commit_index = commit_index+1###debug # logging.info(self.role + ': 7. commit_index = ' + str(commit_index)) self.all_do() # print("\n") return rpcService_pb2.appendEntriesResponse(responserId=self.id, success=True, responserTerm=self.current_term, type='append_entries_response') def RequestVote(self, request, context): ''' #不为空,也不是从client发来的信息 ''' self.next_leader_election_timer_cancel() self.next_leader_election_timer_restart() self.cmd_output = {} self.cmd_output = { 'msgType':'MsgVote', 'msg':'received MsgVote', 'localID':self.id, 'msgFrom':request.candidateId, 'msgTerm':request.term, 'msgLastLogTerm':request.last_log_term, 'msgLastLogIndex':request.last_log_index, 'LastLogTerm':self.log.last_log_term, 'LastLogIndex':self.log.last_log_index } # logging.info(self.role + ': 1. recv request_vote from candidate ' + request.candidateId) if request.term < self.current_term: # logging.info(self.role + ': 2. smaller term, request.term: '+ str(request.term)) # logging.info(self.role + ': 3. success = False') # logging.info(self.role + ': 4. send request_vote_response to candidate ' + request.candidateId) # logging.info(self.role + ': 5. current term: ' + str(self.current_term)) # print(self.role, ': 6. voted for: ', self.voted_for) self.cmd_output['msg']='received MsgVote and vote reject' self.cmd_output['vote_for']=self.voted_for self.cmd_output['current_term']=self.current_term print(str(self.cmd_output)) # print("\n") return rpcService_pb2.requsetVoteResponse(responserId=self.id, responserTerm=self.current_term, votedGranted=False) elif request.term > self.current_term: #针对follower收到candidate的消息的情况 # logging.info(self.role + ': 2. bigger term') # logging.info(self.role + ': 3. become follower') self.role = 'follower' self.current_term = request.term self.voted_for = None self.save() candidate_id = request.candidateId last_log_index = request.last_log_index last_log_term = request.last_log_term # self.current_term = request.term if self.voted_for == None or self.voted_for == candidate_id: if self.role == 'leader' and request.last_log_index == self.log.last_log_index and request.last_log_term == self.log.last_log_term: # logging.info(self.role + ': 4. same log and ' + self.id + ' is leader already.') # logging.info(self.role + ': 5. send request_vote_response to candidate ' + request.candidateId) # logging.info(self.role + ': 6. current term: ' + str(self.current_term)) # print(self.role, ': 7. voted for: ', self.voted_for) self.cmd_output['msg']='received MsgVote and vote reject' self.cmd_output['vote_for']=self.voted_for self.cmd_output['current_term']=self.current_term print(str(self.cmd_output)) # print("\n") return rpcService_pb2.requsetVoteResponse(responserId=self.id, responserTerm=self.current_term, votedGranted=False) elif request.last_log_index >= self.log.last_log_index and request.last_log_term >= self.log.last_log_term: self.voted_for = request.candidateId self.save() # logging.info(self.role + ': 4. success = True: candidate log is newer') # logging.info(self.role + ': 5. send request_vote_response to candidate ' + request.candidateId) # logging.info(self.role + ': 6. current term: ' + str(self.current_term)) # print(self.role, ': 7. voted for: ', self.voted_for) self.cmd_output['msg']='received MsgVote and vote granted' self.cmd_output['vote_for']=self.voted_for self.cmd_output['current_term']=self.current_term print(str(self.cmd_output)) # print("\n") return rpcService_pb2.requsetVoteResponse(responserId=self.id, responserTerm=self.current_term, votedGranted=True) else: self.voted_for = None self.save() # logging.info(self.role + ': 4. success = False: candidate log is older') # logging.info(self.role + ': 5. send request_vote_response to candidate ' + request.candidateId) # logging.info(self.role + ': 6. current term: ' + str(self.current_term)) # print(self.role, ': 7. voted for: ', self.voted_for) self.cmd_output['msg']='received MsgVote and vote reject' self.cmd_output['vote_for']=self.voted_for self.cmd_output['current_term']=self.current_term print(str(self.cmd_output)) # print("\n") return rpcService_pb2.requsetVoteResponse(responserId=self.id, responserTerm=self.current_term, votedGranted=False) else: # logging.info(self.role + ': 4. success = False: has voted for ' + self.voted_for) # logging.info(self.role + ': 5. send request_vote_response to candidate ' + request.candidateId) # logging.info(self.role + ': 6. current term: ' + str(self.current_term)) # print(self.role, ': 7. voted for: ', self.voted_for) self.cmd_output['msg']='received MsgVote and vote reject' self.cmd_output['vote_for']=self.voted_for self.cmd_output['current_term']=self.current_term print(str(self.cmd_output)) # print("\n") return rpcService_pb2.requsetVoteResponse(responserId=self.id, responserTerm=self.current_term, votedGranted=False) def Get(self, request, context): # logging.info('ENTER Get') # TODO: 如果是leader,更新自己的entries,通知peers,大部分回复后执行并返回结果 # TODO: 如果不是leader,则转发给leader self.client_port = request.clientport # logging.info(self.role + ': ' + 'recv GET from client ' + self.client_port + '!!!!!!!!!!') if self.role =='follower' and self.leader_id: if request.opera_type == 'get': self.cmd_output = {} self.cmd_output['msgType']='get' self.cmd_output['msg']='received GET command from client' self.cmd_output['localID']=self.id print(str(self.cmd_output)) # logging.info(self.role + ': ' + 'Getredirect: GET client_append_entries to leader ' + self.leader_id) #with grpc.insecure_channel('localhost:'+str(self.peers[self.leader_id][1])) as channel: stub = self.stub_dict['localhost:'+str(self.peers[self.leader_id][1])] # stub = rpcService_pb2_grpc.RPCStub(channel) try: getRedirect_response = stub.GetRedirect(rpcService_pb2.getRedirectRequest(key=request.key, \ value=request.value, type=request.type, clientport=request.clientport, opera_type=request.opera_type), self.connect_timeout_in_seconds) # print("\n") self.cmd_output = {} self.cmd_output['msgType']='info' self.cmd_output['msg']='redirect clientRequest to leader success' self.cmd_output['localID']=self.id self.cmd_output['leaderID']=self.leader_id print(str(self.cmd_output)) if getRedirect_response.success: return rpcService_pb2.getResponse(success=True, error_msg=None, value=getRedirect_response.value) else: return rpcService_pb2.getResponse(success=False, error_msg=getRedirect_response.error_msg, value=None) except: self.cmd_output = {} self.cmd_output['msgType']='info' self.cmd_output['msg']=' redirect clientRequest to leader failed, maybe connect error' self.cmd_output['localID']=self.id self.cmd_output['leaderID']=self.leader_id print(str(self.cmd_output)) # print("----------------------------getRedirect connect error!") # print("\n") return rpcService_pb2.getResponse(success=False, error_msg='getRedirect connect error', value=None) elif request.opera_type == 'get_noredirect': self.cmd_output = {} self.cmd_output['msgType']='get_noredirect' self.cmd_output['msg']='received GET command from client' self.cmd_output['localID']=self.id print(str(self.cmd_output)) if request.key not in self.kv: return rpcService_pb2.getResponse(success=False, error_msg="GET Defeated!! Key not in leader state machine!!!", value=None) return rpcService_pb2.getResponse(success=True, error_msg=None, value=self.kv[request.key]) elif self.role == 'candidate': # logging.info(self.role + ': ' + 'no leader!!!') # print("\n") if request.opera_type=='get': self.cmd_output = {} self.cmd_output['msgType']='get' self.cmd_output['msg']="received GET command from client, however, there is no leader" self.cmd_output['localID']=self.id print(str(self.cmd_output)) return rpcService_pb2.getResponse(success=False, error_msg='no leader!!!', value=None) elif request.opera_type == 'get_noredirect': self.cmd_output = {} self.cmd_output['msgType']='get_noredirect' self.cmd_output['msg']='received GET command from client' self.cmd_output['localID']=self.id print(str(self.cmd_output)) if request.key not in self.kv: return rpcService_pb2.getResponse(success=False, error_msg="GET Defeated!! Key not in leader state machine!!!", value=None) return rpcService_pb2.getResponse(success=True, error_msg=None, value=self.kv[request.key]) else: key = request.key self.cmd_output = {} self.cmd_output['msgType']='get' self.cmd_output['msg']='received GET command from client' self.cmd_output['localID']=self.id print(str(self.cmd_output)) if key not in self.kv: # print("leader: GET Defeated!! Key not in leader state machine!!!") # print("\n") return rpcService_pb2.getResponse(success=False, error_msg="GET Defeated!! Key not in leader state machine!!!", value=None) # print("\n") return rpcService_pb2.getResponse(success=True, error_msg=None, value=self.kv[key]) def GetRedirect(self, request, context): # logging.info('ENTER GetRedirect') self.client_port = request.clientport self.cmd_output = {} self.cmd_output['msgType']='info' self.cmd_output['msg']="received GetRedirect command from follower's redirect" self.cmd_output['localID']=self.id print(str(self.cmd_output)) key = request.key if key not in self.kv: # print("leader: GET Defeated!! Key not in leader state machine!!!") # print("\n") return rpcService_pb2.getRedirectResponse(success=False, error_msg="GET Defeated!! Key not in leader state machine!!!", value=None) # print("\n") return rpcService_pb2.getRedirectResponse(success=True, error_msg=None, value=self.kv[key]) def PutDel(self, request, context): # 如果不是leader,则转发给leader self.client_port = request.clientport # logging.info(self.role + ': ' + 'recv PUTDEL from client ' + self.client_port + ' !!!!!!!!!!') if self.role =='follower' and self.leader_id: self.cmd_output = {} self.cmd_output['msgType']='put' if request.opera_type=='put' else 'del' self.cmd_output['msg']='received PUT command from client' if request.opera_type=='put' else 'received DEL command from client' self.cmd_output['localID']=self.id self.cmd_output['leaderID']=self.leader_id print(str(self.cmd_output)) # logging.info(self.role + ': ' + 'PutDelRedirect: client_append_entries to leader ' + self.leader_id) # with grpc.insecure_channel('localhost:'+str(self.peers[self.leader_id][1])) as channel: stub = self.stub_dict['localhost:'+str(self.peers[self.leader_id][1])]#rpcService_pb2_grpc.RPCStub(channel) try: putDelRedirect_response = stub.PutDelRedirect(rpcService_pb2.putDelRedirectRequest(key=request.key, \ value=request.value, type=request.type, clientport=request.clientport, opera_type=request.opera_type, start_time=request.start_time), self.connect_timeout_in_seconds) # print("\n") self.cmd_output = {} self.cmd_output['msgType']='info' self.cmd_output['msg']='redirect clientRequest to leader success' self.cmd_output['localID']=self.id self.cmd_output['leaderID']=self.leader_id print(str(self.cmd_output)) return rpcService_pb2.putDelResponse(success=putDelRedirect_response.success, error_msg=None) except: self.cmd_output = {} self.cmd_output['msgType']='info' self.cmd_output['msg']='redirect clientRequest to leader failed, maybe connect error' self.cmd_output['localID']=self.id self.cmd_output['leaderID']=self.leader_id print(str(self.cmd_output)) # print("----------------------------putDelRedirectResponse connect error!") # print("\n") return rpcService_pb2.putDelResponse(success=False, error_msg='putDelRedirectResponse connect error') elif self.role == 'candidate': # logging.info(self.role + ': ' + 'no leader!!!') # print("\n") self.cmd_output = {} self.cmd_output['msgType']='put' if request.opera_type == 'put' else 'del' self.cmd_output['msg']='received PUT/DEL command from client, however, there is no leader' self.cmd_output['localID']=self.id self.cmd_output['leaderID']=self.leader_id print(str(self.cmd_output)) return rpcService_pb2.putDelResponse(success=False, error_msg='no leader!!!') else: self.cmd_output = {} self.cmd_output['msgType']='put' if request.opera_type == 'put' else 'del' self.cmd_output['msg']='received PUT command from client' if request.opera_type == 'put' else 'received DEL command from client' self.cmd_output['localID']=self.id print(str(self.cmd_output)) self.leader_do(request) # print("\n") return rpcService_pb2.putDelResponse(success=True, error_msg=None) def PutDelRedirect(self, request, context): ''' 这个函数只有leader才会调用到 ''' self.cmd_output = {} self.cmd_output['msgType']='PutDelRedirect' self.cmd_output['msg']="received PutDelRedirect command from follower's redirect" self.cmd_output['localID']=self.id print(str(self.cmd_output)) self.client_port = request.clientport self.leader_do(request) # print("\n") return rpcService_pb2.putDelRedirectResponse(success=True) def all_do(self): # logging.info('ENTER all_do') # logging.info('check if now need to apply log to state machine!!') # logging.info(self.role + ': ' + "sever id: " + self.id + " kv: " + str(self.kv)) if self.commit_index > self.last_applied: self.last_applied = self.commit_index # logging.info('all: 1. last_applied = ' + str(self.last_applied)) # logging.info('all: 2. apply log[last_applied] to kv state machine') last_applied = self.log.get_entries(self.log.last_log_index)[0].split(" ") #"255 x 1578762179.732143 client_append_entries" opera_type = last_applied[-1] key = last_applied[1] value = last_applied[2] if opera_type == 'put': self.kv[key] = value self.save() elif opera_type == 'del': if key in self.kv: self.kv.pop(key) self.save() self.cmd_output = {} self.cmd_output['msgType']='info' self.cmd_output['msg']="advance applied log" self.cmd_output['localID']=self.id self.cmd_output['committedIndex']=self.commit_index self.cmd_output['last_applied']=self.last_applied self.cmd_output['lastLogIndex']=self.log.last_log_index self.cmd_output['lastLogTerm']=self.log.last_log_term print(str(self.cmd_output)) # logging.info(self.role + ': ' + "sever id: " + self.id + " kv: " + str(self.kv)) # print("\n") def follower_do(self, task='becomeCandidate'): ''' task: ['becomeCandidate'], 默认是处理消息 ''' self.cmd_output['msgType'] = 'info' self.cmd_output['msg'] = 'lost leader and start a new election' self.cmd_output['leaderID'] = self.leader_id print(str(self.cmd_output)) self.all_do() # 收不到任何消息(leader或者candiate??),自己变成candidate if task == 'becomeCandidate': # logging.info('follower:1. become candidate') self.cmd_output = {} self.cmd_output = {'msgType':'info', 'msg':'become candidate'} self.next_leader_election_timer_cancel() self.role = 'candidate' self.current_term += 1 self.voted_for = self.id self.save() self.vote_ids = {_id: 0 for _id in self.peers} self.cmd_output['localID'] = self.id self.cmd_output['currrentTerm'] = self.current_term self.cmd_output['vote_for'] = self.voted_for print(str(self.cmd_output)) # become candidate and begin election timer # election timer是为了设定选举时间,以便在超时后在candidate_do内部启动开始新一轮的leader选举 self.next_leader_election_timer_restart() # candidate开始发vote给all other servers self.candidate_do(task='sendVoteToPeers') # 处理从leader或者candidate发来的消息:这个在append那里已经处理了 # print("\n") return def candidate_do(self, task='sendVoteToPeers'): ''' task: ['sendVoteToPeers', 'reElection', 'convertToFollower'] ''' self.all_do() # 调用rpc向所有peers发送RequestVote请求 # 并更新voted_for,以及判断是否可以转换身份 if task == 'sendVoteToPeers': for dst_id in self.peers: if self.vote_ids[dst_id] == 0: # logging.info('candidate: 1. send request_vote to peer ' + dst_id) self.cmd_output = {} self.cmd_output = {'msgType':'info', 'msg':'send RequestVote', 'localID':self.id} self.cmd_output['to'] = dst_id self.cmd_output['currrentTerm'] = self.current_term self.cmd_output['lastLogTerm'] = self.log.last_log_term self.cmd_output['lastLogIndex'] = self.log.last_log_term print(str(self.cmd_output)) peer_addr = str(self.peers[dst_id][0]) + ':' + str(self.peers[dst_id][1]) msgReject = True # with grpc.insecure_channel(peer_addr) as channel: stub = self.stub_dict[peer_addr]#rpcService_pb2_grpc.RPCStub(channel) try: response = stub.RequestVote(rpcService_pb2.requestVoteRequest(term=self.current_term, candidateId=self.id, last_log_index=self.log.last_log_index, last_log_term=self.log.last_log_term), self.connect_timeout_in_seconds) if response.votedGranted: msgReject=False # logging.info('candidate: 1. recv request_vote_response from follower ' + str(self.peers[dst_id][0]) + ": " + str(self.peers[dst_id][1])) self.vote_ids[response.responserId] = response.votedGranted self.cmd_output = {} self.cmd_output = {'msgType':'MsgVoteResp', 'msg':'received MsgVoteResp', 'localID':self.id, 'currentTerm':self.current_term, 'msgFrom':response.responserId, 'msgReject':msgReject} print(str(self.cmd_output)) except: self.cmd_output = {} self.cmd_output = {'msgType':'info', 'msg':'send RequestVote error', 'localID':self.id, 'to':dst_id} print(str(self.cmd_output)) vote_count = sum(list(self.vote_ids.values())) if vote_count >= (len(self.peers)+1)//2: #2/3 3/4 # logging.info('candidate: 2. become leader') self.role = 'leader' self.voted_for = None self.save() self.next_index = {_id: self.log.last_log_index + 1 for _id in self.peers} self.match_index = {_id: 0 for _id in self.peers} self.cmd_output = {} self.cmd_output = {'msgType':'info', 'msg':'become Leader', 'localID':self.id} print(str(self.cmd_output)) self.next_leader_election_timer_cancel() # 作为leader的时候不需要timeout计时 # 成为leader,开始向其他peer发心跳包,不然别的node会一直candidate选举 # 且只需要在这里运行一次,next_heartbeat_timer_restart函数内部会进行递归操作 self.next_heartbeat_timer_restart() self.cmd_output = {} self.cmd_output = {'msgType':'info', 'msg':'bcastAppend', 'localID':self.id, 'committedIndex':self.commit_index, 'lastApplied':self.last_applied, 'lastLogTerm':self.log.last_log_term, 'lastLogIndex':self.log.last_log_index } print(str(self.cmd_output)) self.leader_do(data=None) # print("\n") return elif task=='convertToFollower': # logging.info('candidate: 1. recv append_entries from leader ' + self.leader_id) # logging.info('candidate: 2. candidate convertToFollower') self.cmd_output = {} self.cmd_output = {'msgType':'MsgHeartBeat', 'msg':'candidate received MsgHeartBeat, become follower', 'localID':self.id, 'currentTerm':self.current_term} print(str(self.cmd_output)) self.role = 'follower' self.voted_for = None self.save() # print("\n") return elif task=='reElection': self.cmd_output = {} self.cmd_output = {'msgType':'info', 'msg':'election timeout and start a new election', 'localID':self.id} print(str(self.cmd_output)) self.next_leader_election_timer_cancel() # logging.info('candidate: 1. leader_election timeout') # logging.info('candidate: 2. reElection') # logging.info('candidate: 3. become candidate') self.next_leader_election_timer_restart() self.role = 'candidate' self.current_term += 1 self.voted_for = self.id self.save() self.vote_ids = {_id: 0 for _id in self.peers} # candidate开始发vote给all other servers self.candidate_do(task='sendVoteToPeers') # print("\n") return def leader_do(self, data): # logging.info('ENTER leader_do') self.all_do() # 接收来自client的数据:client_append_entries if data!=None and data.type=='client_append_entries': entry_list=[] entry_list.append(str(self.current_term)) entry_list.append(data.key) entry_list.append(data.value) entry_list.append(data.type) entry_list.append(data.start_time) entry_list.append(data.opera_type) entry_str = ' '.join(entry_list) self.log.append_entries(self.log.last_log_index, [entry_str]) # logging.info('leader:1. recv append_entries from client ' + data.clientport) # logging.info('leader:2. log append_entries') # logging.info('leader:3. log save') # print("\n") return if data !=None and data.type == 'append_entries_response': if data.responserTerm==self.current_term:#不管是heartbeat or not都要处理 # logging.info('leader:1. recv append_entries_response from follower ' + data.responserId) if data.success == False: self.next_index[data.responserId] -= 1 # logging.info('leader:2. success = False') # logging.info('leader:3. next_index - 1') else: self.match_index[data.responserId] = self.next_index[data.responserId] self.next_index[data.responserId] = self.log.last_log_index + 1 # logging.info('leader:2. success = True') # logging.info('leader:3. match_index = ' + str(self.match_index[data.responserId]) + ' next_index = ' + str(self.next_index[data.responserId])) # 下面这段是leader把大部分server已经复制到log的数据,提交反馈到client while True: N = self.commit_index + 1 count = 0 for _id in self.match_index: #这里仿佛是next id if self.match_index[_id] >= N: count += 1 if count >= (len(self.peers)+1)//2: #2/3 3/4 self.commit_index = N start_time = self.log.get_one_entries(self.commit_index-1).split(' ')[-2] if self.client_port: # with grpc.insecure_channel('localhost:'+self.client_port) as channel: stub = self.stub_dict['localhost:'+self.client_port]#rpcService_pb2_grpc.RPCStub(channel) try: response = stub.Apply(rpcService_pb2.applyRequest(commit_index=self.commit_index, start_time=start_time), self.connect_timeout_in_seconds) except: pass break else: break