def execute(self): if len(self.executed_log) == 0: next_unexecuted_slot_idx = 0 else: next_unexecuted_slot_idx = max(self.executed_log.keys()) while next_unexecuted_slot_idx in self.decided_log: self.executed_log[next_unexecuted_slot_idx] = self.decided_log[ next_unexecuted_slot_idx] next_unexecuted_slot_idx += 1 MyLogging.info("learner id %s executed values: %s" % (str(self.learner_id), str(self.executed_log))) self.file_logger.info("learner id %s executed values: %s" % (str(self.learner_id), str(self.executed_log))) MyLogging.info("learner id %s executed hash: %s" % (str( self.learner_id), str(hash(tuple(self.executed_log.items()))))) self.file_logger.info("learner id %s executed hash: %s" % (str( self.learner_id), str(hash(tuple(self.executed_log.items()))))) result_file = '../result/{}.yml'.format(self.learner_id) tmp = result_file + '.tmp' with open(tmp, 'w') as f: yaml.dump( dict(learner_id=self.learner_id, executed_log=self.executed_log, executed_hash=hash(tuple(self.executed_log.items()))), f) # , default_flow_style=False) f.flush() os.fsync(f.fileno()) os.rename(tmp, result_file)
def testcase3(server_id, msg, view): MyLogging.debug("This is test case 3") #primary dies server_crash_on_msg(server_id, msg) #new primary dies again if view == 1: MyLogging.error("server id %s crashes" % (str(server_id))) exit()
def send_msg(self, host, port, msg): delay = np.random.rand() time.sleep(1 * delay) # if msg['type'] != 'request': if self.message_loss() is True: MyLogging.debug("DROP: " + str(msg)) return s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) try: s.connect((host, port)) except socket.error: MyLogging.debug("listening port closed, ignore this msg") return MyLogging.debug("SEND: " + str(msg)) try: s.sendall(pickle.dumps(msg)) except socket.error: MyLogging.debug("try to resend due to socket error") time.sleep(0.1) s.sendall(pickle.dumps(msg)) s.close()
def getProposalPack(self, decided_log): #for all previous accepted values, not only for holes(need to change the name) msg_list = self.msg_collection[self.proposal_id] largest_accepted_id = {} #map from slot_idx to proposal_id accepted_val_with_largest_id = {} #map from slot_idx to accepted_val accepted_client_info = {} #get info for slot holes with accepted val for msg in msg_list: for slot_idx, accepted_id in msg['accepted_id'].items(): #if slot_idx not in decided_log: #the proposer doesn't know about the decided val for this slot if slot_idx not in largest_accepted_id or accepted_id >= largest_accepted_id[ slot_idx]: #if slot_idx in largest_accepted_id and accepted_id == largest_accepted_id[slot_idx]: # if accepted_val_with_largest_id[slot_idx] != accepted_id: # raise ValueError("Different accepted values from accepted id: %d for slot %d"%(largest_accepted_id[slot_idx], slot_idx)) largest_accepted_id[slot_idx] = accepted_id accepted_val_with_largest_id[slot_idx] = msg[ 'accepted_val'][slot_idx] accepted_client_info[slot_idx] = msg[ 'accepted_client_info'][slot_idx] if len(decided_log) == 0: last_slot_decided_log = -1 else: last_slot_decided_log = max(decided_log.keys()) if len(accepted_val_with_largest_id) == 0: last_slot_accepted_val = -1 else: last_slot_accepted_val = max(accepted_val_with_largest_id.keys()) self.next_slot = max(last_slot_decided_log, last_slot_accepted_val) + 1 MyLogging.debug("reset next slot to be %s" % (str(self.next_slot))) proposal_pack_for_holes = {} for slot_idx in range(self.next_slot - 1, -1, -1): if slot_idx in accepted_val_with_largest_id: proposal_pack_for_holes[slot_idx] = { 'val': accepted_val_with_largest_id[slot_idx], 'client_info': accepted_client_info[slot_idx] } elif slot_idx not in decided_log: #noop proposal_pack_for_holes[slot_idx] = { 'val': 'no-op', 'client_info': None } return proposal_pack_for_holes
def decide(self, slot_idx): """ when learner receives accept message from a quorum, he knows that a value has been decided. (It is not the same as executed) """ #if self.slots[slot_idx].decided_id is not None and self.slots[slot_idx].proposal_id <= self.slots[slot_idx].decided_id: # #no need to commit again # return self.slots[slot_idx].decided_id = self.slots[slot_idx].proposal_id decided_val = self.slots[slot_idx].msg_collection[ self.slots[slot_idx].proposal_id][0]['val'] client_info = self.slots[slot_idx].msg_collection[ self.slots[slot_idx].proposal_id][0]['client_info'] # save updated state first state = load_state(self.state_backup) state['decided_log'][slot_idx] = decided_val save_state(self.state_backup, state) self.decided_log[slot_idx] = decided_val if decided_val != 'no-op': client_host = client_info['client_host'] client_port = client_info['client_port'] client_id = client_info['client_id'] client_seq = client_info['clt_seq_num'] self.decided_clt_seq[client_id] = client_seq msg = { 'type': 'ack', 'val': decided_val, 'client_info': client_info } self.messenger.send_msg(client_host, client_port, msg) MyLogging.debug( "==========================learner id %s decide the value: %s" % (str(self.learner_id), str(decided_val))) MyLogging.debug( "++++++++++++++++++++++++++learner id %s decide values:" % (str(self.learner_id))) MyLogging.debug(self.decided_log) self.execute()
def server_crash(server_id, crash_rate): if np.random.rand() < crash_rate: MyLogging.error("server id %s crashes" % (str(server_id))) exit()
def server_crash_on_msg(server_id, msg): client_info = msg['client_info'] if client_info['client_id'] == 0 and client_info[ 'clt_seq_num'] == 2 and msg['resend_idx'] == 0: MyLogging.error("server id %s crashes" % (str(server_id))) exit()
def testcase4(msg, proposer): MyLogging.debug("This is test case 4") if skipSlot(msg): MyLogging.debug("skip slot %s" % (str(proposer.next_slot))) proposer.next_slot += 1
def testcase2(server_id, msg): MyLogging.debug("This is test case 2") server_crash_on_msg(server_id, msg)
def server(server_id, config_file='../config/servers.yaml'): server_id = int(server_id) #load config file with open(config_file, 'r') as config_handler: config = yaml.load(config_handler) f = int(config['f']) #the number of failure that can be tolerated state_backup_folder = config['state_backup_folder'] if not os.path.exists(state_backup_folder): call(['mkdir', '-p', state_backup_folder]) num_server = 2 * f + 1 servers_list = { server_idx: config['servers_list'][server_idx] for server_idx in range(num_server) } quorum = num_server / 2 + 1 # load state state_backup = get_state_backup(server_id, state_backup_folder) if not os.path.exists(state_backup): state = dict(view=0, decided_log={}, promised_proposal_id=None, accepted_proposal_id={}, accepted_proposal_val={}, accepted_client_info={}) save_state(state_backup, state) else: MyLogging.info("Recovering server") state = load_state(state_backup) loss_rate = config['msg_drop_rate'] proposer = Proposer(server_id, servers_list, loss_rate) acceptor = Acceptor(server_id, servers_list, state['promised_proposal_id'], state['accepted_proposal_id'], state['accepted_proposal_val'], state['accepted_client_info'], state_backup, loss_rate) learner = Learner(server_id, quorum, state['decided_log'], state_backup, loss_rate) #initialize view. The view will be used for proposal_id for elected leader view = state['view'] num_acceptors = num_server HOST = servers_list[server_id]['host'] PORT = servers_list[server_id]['port'] s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) s.bind((HOST, PORT)) s.listen(100) #for test case 4 to skip slot x if 'x' in config and config['x'] >= 0: x = int(config['x']) else: x = None if 'num_failed_primary' in config and config['num_failed_primary'] >= 0: num_failed_primary = int(config['num_failed_primary']) else: num_failed_primary = None #for test 4 to specify the server on which the skipped slot occurs server_skip = 0 request_val_queue = collections.deque() client_info_queue = collections.deque() while True: #try to crash if view % num_acceptors == server_id: server_crash(server_id, crash_rate) MyLogging.debug("wait for connection") conn, addr = s.accept() MyLogging.debug('Connection by ' + str(addr)) data = conn.recv(4096 * 2) msg = pickle.loads(data) MyLogging.debug('RCVD: ' + str(msg)) if msg['type'] == 'request': if msg['resend_idx'] != 0: #if this is an resent message, triger view change # save updated state first state = load_state(state_backup) state['view'] = view + 1 save_state(state_backup, state) view += 1 #new leader clears the request queue request_val_queue.clear() client_info_queue.clear() proposer.need_prepare = True MyLogging.debug("change to view %s" % (str(view))) if view % num_acceptors == server_id: #this is leader #testcase 2 and 3 if num_failed_primary is not None and server_id < num_failed_primary: MyLogging.info("force the primary %s to crash" % (str(server_id))) MyLogging.error("server id %s crashes" % (str(server_id))) exit() #testcase 4 if x is not None and x + 1 in learner.decided_log and server_skip == server_id: #server_skip = server_id MyLogging.info('server id %s has learned slot %s' % (str(server_id), str(x + 1))) MyLogging.error("server id %s crashes" % (str(server_id))) exit() request_val_queue.append(msg['request_val']) client_info_queue.append(msg['client_info']) if proposer.need_prepare is True: proposer.prepare(view) else: #directly propose without prepare stage proposal_pack = {} MyLogging.debug("no need to prepare") MyLogging.debug(request_val_queue) for _ in range(len(request_val_queue)): request_val = request_val_queue.popleft() client_info = client_info_queue.popleft() proposal_pack = proposer.addNewRequest( proposal_pack, request_val, client_info) #testcase 4 if x is not None and x in proposal_pack and server_skip == server_id: MyLogging.debug('At slot %s: %s' % (str(x), str(proposal_pack[x]))) MyLogging.debug( 'proposer %s skips slot %s for server_skip %s' % (str(server_id), str(x), str(server_skip))) del proposal_pack[x] proposer.propose(proposal_pack, without_prepare=True) elif msg['type'] == 'promise': proposer.addVote(msg) if proposer.checkQuorumSatisfied() is True: if proposer.need_prepare is True: proposal_pack = proposer.getProposalPack( learner.getDecidedLog()) MyLogging.debug("proposal pack for holes: %s" % (str(proposal_pack))) for _ in range(len(request_val_queue)): request_val = request_val_queue.popleft() client_info = client_info_queue.popleft() proposal_pack = proposer.addNewRequest( proposal_pack, request_val, client_info) #testcase 4 if x is not None and x in proposal_pack and server_skip == server_id: MyLogging.debug('At slot %s: %s' % (str(x), str(proposal_pack[x]))) MyLogging.debug( 'proposer %s skips slot %s for server_skip %s' % (str(server_id), str(x), str(server_skip))) del proposal_pack[x] proposer.propose(proposal_pack) proposer.need_prepare = False elif msg['type'] == 'prepare': # save updated state first state = load_state(state_backup) state['view'] = max(view, msg['proposal_id']) save_state(state_backup, state) view = max(view, msg['proposal_id'] ) # try to catch up with the most recent view MyLogging.debug("change to max view %s" % (str(view))) acceptor.promise(msg) elif msg['type'] == 'propose': acceptor.accept(msg) elif msg['type'] == 'accept': slot_idx = msg['slot_idx'] learner.addVote(msg, slot_idx) if learner.checkQuorumSatisfied(slot_idx) is True: learner.decide(slot_idx) conn.close()
def waitForAck(client_host, client_port, timeout, clt_seq_num): s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) s.bind((client_host, client_port)) s.listen(100) while True: MyLogging.debug('set timeout for %s s' % str(timeout)) s.settimeout(timeout) MyLogging.debug("wwwwwwwwwwwwwwwait for ack") try: conn, addr = s.accept() except socket.timeout: MyLogging.debug("timeout on ack") return False MyLogging.debug('Connected by ' + str(addr)) data = conn.recv(4096 * 2) msg = pickle.loads(data) MyLogging.debug('RCVD: ' + str(msg)) conn.close() #wait for the right clt_seq_num if msg['type'] == 'ack' and msg['client_info'][ 'clt_seq_num'] == clt_seq_num: MyLogging.debug( 'client %s received ack for request (clt seq num) %s' % (str(msg['client_info']['client_id']), str(msg['client_info']['clt_seq_num']))) return True
def client(client_idx, config_file_server='../config/servers.yaml'): host_name = 'localhost' clients_list = { idx: { 'host': host_name, 'port': 40000 + idx } for idx in range(10) } with open(config_file_server, 'r') as config_handler: config = yaml.load(config_handler) f = int(config['f']) #the number of failure that can be tolerated num_server = 2 * f + 1 servers_list = { server_idx: config['servers_list'][server_idx] for server_idx in range(num_server) } loss_rate = config['msg_drop_rate'] my_messenger = Messenger(loss_rate) client_idx = int(client_idx) client_host = clients_list[client_idx]['host'] client_port = clients_list[client_idx]['port'] request_size = 5 request_list = [ 'client, seq: (%s, %s)' % (str(client_idx), str(request_idx)) for request_idx in range(request_size) ] for request_idx in range(len(request_list)): clt_seq_num = request_idx val = request_list[request_idx] resend_idx = 0 while True: client_info = { 'clt_seq_num': clt_seq_num, 'client_id': client_idx, 'client_host': client_host, 'client_port': client_port } msg = { 'type': 'request', 'request_val': val, 'resend_idx': resend_idx, 'client_info': client_info } for server_id in servers_list: host = servers_list[server_id]['host'] port = servers_list[server_id]['port'] # send msg to (host, port) my_messenger.send_msg(host, port, msg) if waitForAck(client_host, client_port, timeout, clt_seq_num) is True: break resend_idx += 1 MyLogging.info('client %s finished sending all %s requests' % (str(client_idx), str(len(request_list))))