def get_write_req(self, client_id, write_size, downstream_dn_list): """ Note: here we simplify a bit in real hdfs system, writes are divided into multiple packets and datanode flush packets to the downstream datanodes before it writes it locally but here we treat write as a single req (only ack once), and does local write before downstream pushing """ xceive_req = StageReq(self.env, self.xceive_stage, client_id, {self.phy_node.io_res: write_size, self.phy_node.net_res: write_size, self.phy_node.cpu_res: DataNode.DN_PER_REQ_CPU_TIME}, [], []) packet_ack_req = StageReq(self.env, self.packet_ack_stage, client_id, {self.phy_node.net_res: write_size * DataNode.ACK_SIZE_RATIO, self.phy_node.cpu_res: write_size * DataNode.ACK_CPU_RATIO}, [], []) xceive_req.downstream_reqs.append(packet_ack_req) if len(downstream_dn_list) != 0: downstream_dn = downstream_dn_list[0] d_xceive_req, d_packet_ack_done = downstream_dn.get_write_req(client_id, write_size, [] if len( downstream_dn_list) == 1 else downstream_dn_list[1:]) xceive_req.downstream_reqs.append(d_xceive_req) packet_ack_req.blocking_evts.append(d_packet_ack_done) return xceive_req, packet_ack_req.done
def get_two_stage_req(self, client_id, r1_amt, r2_amt): if r2_amt == 0: s1_req = StageReq(self.env, self.stage1, client_id, { self.res1: r1_amt, self.res2: 0 }, [], []) return s1_req, s1_req.done s2_req = StageReq(self.env, self.stage2, client_id, { self.res2: r2_amt, self.res1: 0 }, [], []) s1_req = StageReq(self.env, self.stage1, client_id, { self.res1: r1_amt, self.res2: 0 }, [], []) s1_req.downstream_reqs.append(s2_req) return s1_req, s2_req.done
def get_metadata_req(self, client_id, lock_time): """ :param client_id: :param lock_time: time spent holding the namespace lock :return: (req, done), where req is a StageReq that represents looking up (or modifying) the namespace metadata, done is a simpy event indicating the completion of the matadata operation """ metadata_req = StageReq(self.env, self.rpc_process_stage, client_id, {self.lock_res: lock_time}, [], []) return metadata_req, metadata_req.done
def run_sim(): """ Step 1: initialize simulation environment of simpy""" env = simpy.Environment() """ Step 2: create resource of the system """ resource = Resource(env, "resource", 1, 10, WFQScheduler(env, float('inf'))) """ Step 3: define stages that takes requests """ # handle_req_cost_func maps a request to a cost # the scheduler of the stage will use that cost to make scheduling decisions handle_req_cost_func = lambda req: req.resource_profile[resource] stage = Stage(env, "stage", 10, WFQScheduler(env, float('inf')), handle_req_cost_func) """ Setp 4: define clients that issue requests""" new_req_func = lambda: StageReq(env, stage, 1, {resource: 1}, [], []) # Client takes a new_req_func, continuously generate new requests using new_req_func, # and waits for their completion. client = Client(env, "client", 1, 10, new_req_func, 0) """ Finally , run the simulation""" env.run(until=100)
def get_short_circuit_req(self, client_id, read_size): xceive_req = StageReq(self.env, self.xceive_stage, client_id, {self.phy_node.io_res: read_size, self.phy_node.net_res: 0, self.phy_node.cpu_res: 0}, [], []) return xceive_req, xceive_req.done
def get_read_req(self, client_id, read_size): xceive_req = StageReq(self.env, self.xceive_stage, client_id, {self.phy_node.io_res: read_size, self.phy_node.net_res: read_size, self.phy_node.cpu_res: DataNode.DN_PER_REQ_CPU_TIME}, [], []) return xceive_req, xceive_req.done
def submit_req_impl(self, cassandra_req, vn_id, is_async_sys=False): submit_time = self.env.now c_req_handle_res_cost_list = [ {self.get_cpu_res(): cassandra_req.get_stg_cpu_time(guess_stage_name('c_req_handle')), self.get_net_res(): cassandra_req.req_sz}] c_req_handle_res_profile = ResBlockingProfile(c_req_handle_res_cost_list) c_req_handle_req = BlkStageReq(self.env, self.get_stage(guess_stage_name('c_req_handle')), cassandra_req.client_id) c_req_handle_req.append_res_blk_profile(c_req_handle_res_profile) c_respond_res_dict = {self.get_cpu_res(): cassandra_req.get_stg_cpu_time(guess_stage_name('c_respond')), self.get_net_res(): cassandra_req.reply_sz} c_respond_req = StageReq( self.env, self.get_stage(guess_stage_name('c_respond')), cassandra_req.client_id, c_respond_res_dict, [], []) access_vn_sum = cassandra_req.consistency_access_sum access_vn_id_list = self.cassandra_sys.compute_vn_id_list(vn_id, access_vn_sum) # blk_evt_list = [] request_response_req = ConditionalStageReq(self.env, self.get_stage(guess_stage_name('request_response')), cassandra_req.client_id, {}, [], [], condition_func=None) if not is_async_sys: # Real Original Version that c-req-handle stage block on req-response stage c_req_handle_res_profile.add_blocking_event(request_response_req.done) # the 2nd pharse of c-rea-handle stage, downstream stage is c-req-respond c_req_handle_callback_res_profile = ResBlockingProfile([]) c_req_handle_req.append_res_blk_profile(c_req_handle_callback_res_profile) c_req_handle_callback_res_profile.add_downstream_req(c_respond_req) else: # For async version system, request_response_req directly send req to respond request_response_req.add_downstream_req(c_respond_req) ############################################################################################ # For Conditional Stage, set the succeed condition request_response_req.total_vn_sum = access_vn_sum request_response_req.current_vn_sum = 0 request_response_req.unsatisfy_res_profile = { self.get_cpu_res(): cassandra_req.get_stg_cpu_time(guess_stage_name('request_response'))} request_response_req.satisfy_res_profile = { self.get_cpu_res(): cassandra_req.get_stg_cpu_time(guess_stage_name('request_response'))} request_response_req.can_be_done = False request_response_req.cur_client = cassandra_req.client request_response_req.client_submit_time = submit_time def get_conditional_res_profile(tmp_request_response_req): # callback to get the resource consumption of each submission of this request tmp_request_response_req.current_vn_sum += 1 assert tmp_request_response_req.current_vn_sum <= tmp_request_response_req.total_vn_sum if tmp_request_response_req.current_vn_sum == tmp_request_response_req.total_vn_sum: tmp_request_response_req.can_be_done = True return tmp_request_response_req, tmp_request_response_req.satisfy_res_profile else: no_downstream_tmp_req = copy.copy(tmp_request_response_req) no_downstream_tmp_req.reset_downstream_req() no_downstream_tmp_req.reset_blocking_event() return no_downstream_tmp_req, tmp_request_response_req.unsatisfy_res_profile request_response_req.condition_func = get_conditional_res_profile ############################################################################################# for i in range(access_vn_sum): # access all the replication followed consistency level cur_vn_id = access_vn_id_list[i] proc_cpu_usage = 0 for stg_name in cassandra_req.proc_stg_name_list: proc_cpu_usage += cassandra_req.get_stg_cpu_time(guess_stage_name(stg_name)) if self.is_vnode_local(cur_vn_id): # local vnode, no network usage proc_res_dict = {self.get_cpu_res(): proc_cpu_usage, self.get_io_res(): (cassandra_req.read_sz + cassandra_req.write_sz)} # now just use the first proc stage to charge all resource proc_req = StageReq(self.env, self.get_stage(guess_stage_name(cassandra_req.proc_stg_name_list[0])), cassandra_req.client_id, proc_res_dict, [], []) c_req_handle_res_profile.add_downstream_req(proc_req) proc_req.add_downstream_req(request_response_req) else: cur_pn = self.cassandra_sys.get_host_by_vn(cur_vn_id) # msg_out, msg_in is from the view of the first access node msg_out_send_res_cost_dict = { self.get_cpu_res(): cassandra_req.get_stg_cpu_time(guess_stage_name('msg_out')), self.get_net_res(): cassandra_req.msg_pass_req_sz} msg_out_send_req = StageReq(self.env, self.get_stage(guess_stage_name('msg_out')), cassandra_req.client_id, msg_out_send_res_cost_dict, [], []) msg_out_recv_res_cost_dict = { cur_pn.get_cpu_res(): cassandra_req.get_stg_cpu_time(guess_stage_name('msg_out')), cur_pn.get_net_res(): cassandra_req.msg_pass_req_sz} msg_out_recv_req = StageReq(cur_pn.env, cur_pn.get_stage(guess_stage_name('msg_in')), cassandra_req.client_id, msg_out_recv_res_cost_dict, [], []) msg_out_send_req.add_downstream_req(msg_out_recv_req) c_req_handle_res_profile.add_downstream_req(msg_out_send_req) proc_res_dict = {cur_pn.get_cpu_res(): proc_cpu_usage, cur_pn.get_io_res(): (cassandra_req.read_sz + cassandra_req.write_sz)} proc_req = StageReq(cur_pn.env, cur_pn.get_stage(guess_stage_name(cassandra_req.proc_stg_name_list[0])), cassandra_req.client_id, proc_res_dict, [], []) msg_out_recv_req.add_downstream_req(proc_req) msg_in_send_res_cost_dict = { cur_pn.get_cpu_res(): cassandra_req.get_stg_cpu_time(guess_stage_name('msg_in')), cur_pn.get_net_res(): cassandra_req.msg_pass_reply_sz} msg_in_send_req = StageReq(cur_pn.env, cur_pn.get_stage(guess_stage_name('msg_out')), cassandra_req.client_id, msg_in_send_res_cost_dict, [], []) proc_req.add_downstream_req(msg_in_send_req) msg_in_recv_res_cost_dict = { self.get_cpu_res(): cassandra_req.get_stg_cpu_time(guess_stage_name('msg_in')), self.get_net_res(): cassandra_req.msg_pass_req_sz} msg_in_recv_req = StageReq(self.env, self.get_stage(guess_stage_name('msg_in')), cassandra_req.client_id, msg_in_recv_res_cost_dict, [], []) msg_in_send_req.add_downstream_req(msg_in_recv_req) msg_in_recv_req.add_downstream_req(request_response_req) return c_req_handle_req.submit(), c_respond_req.done