def account_stack_ops(exec_file, mem_instr_addr_l): cfg = disassm.get_func_disassm(exec_file, mem_instr_addr_l[0]) stack_op_count = 0 for pc in mem_instr_addr_l: if pc in cfg.ins_tags_dict.keys(): if "Stack" in cfg.ins_tags_dict[pc]: stack_op_count += 1 elif pc != 0: cfg = disassm.get_func_disassm(exec_file, pc) print stack_op_count, len(mem_instr_addr_l) stack_op_ratio = float(stack_op_count)/float(len(mem_instr_addr_l)) print "Stack operation ratio: %lf"%(stack_op_ratio)
def record_time_to_update(delinq_load_addr, update_addr, trace_q, cfg, time_to_update_dict, delinq_loads_till_update, BBs_in_loop, delinq_loads_update_addr, prefetch_decisions, conf): if not delinq_load_addr in trace_q: return fwd_score = 0 fwd_delinq_loads = 0 BB_addr = static_BB_cfg.discover_BB_for_address(update_addr, cfg.BB_dict) while trace_q: if not BB_addr in BBs_in_loop: BBs_in_loop.append(BB_addr) pc_in_trace = trace_q.popleft() if pc_in_trace == 0: return if pc_in_trace == delinq_load_addr: break if pc_in_trace in cfg.ins_tags_dict: if pc_in_trace in conf.all_delinq_loads_list and prefetch_decisions: if prefetch_decisions[pc_in_trace].l3_mr >= 0.01: fwd_delinq_loads += 1 elif prefetch_decisions[pc_in_trace].l2_mr >= 0.05: fwd_delinq_loads += 1 elif prefetch_decisions[pc_in_trace].l1_mr >= 0.2: fwd_delinq_loads += 1 fwd_score += 1 BB_addr = static_BB_cfg.discover_BB_for_address(pc_in_trace, cfg.BB_dict) if BB_addr == None: cfg = disassm.get_func_disassm(cfg.exec_file, pc_in_trace) BB_addr = static_BB_cfg.discover_BB_for_address(pc_in_trace, cfg.BB_dict) if BB_addr == None: return record_update_time(delinq_load_addr, fwd_score, time_to_update_dict, fwd_delinq_loads, delinq_loads_till_update)
def pointer_analysis_with_trace_hints(track_reg, is_track_idx_reg, delinq_load_addr, BB_addr, trace_q, cfg, pointer_update_addr_dict, pointer_update_time_dict, time_to_update_dict, delinq_loads_till_update, delinq_loads_till_use, delinq_loads_update_addr, update_trace_dict, prefetch_decisions, conf): BBs_in_loop = [] update_trace_q = Queue.Queue(10) precomp_q = Queue.Queue(11) last_trace_pc = trace_q.popleft() if last_trace_pc != delinq_load_addr: return inter_delinq_loads = 0 score = 0 is_reg_pushed_on_stack = False equal_reg = None while trace_q: pc_in_trace = trace_q.popleft() if pc_in_trace == 0: return BB_addr = static_BB_cfg.discover_BB_for_address(pc_in_trace, cfg.BB_dict) if BB_addr == None: cfg = disassm.get_func_disassm(cfg.exec_file, pc_in_trace) BB_addr = static_BB_cfg.discover_BB_for_address(pc_in_trace, cfg.BB_dict) if BB_addr == None: return if not BB_addr in BBs_in_loop: BBs_in_loop.append(BB_addr) tag = None if pc_in_trace in cfg.ins_tags_dict: tag = cfg.ins_tags_dict[pc_in_trace] if pc_in_trace in conf.all_delinq_loads_list and prefetch_decisions: if prefetch_decisions[pc_in_trace].l3_mr >= 0.01: inter_delinq_loads += 1 elif prefetch_decisions[pc_in_trace].l2_mr >= 0.05: inter_delinq_loads += 1 elif prefetch_decisions[pc_in_trace].l1_mr >= 0.2: inter_delinq_loads += 1 score += 1 if update_trace_q.full(): print >> sys.stderr, "Too deep object nesting encountered: NestTrace @ \n" print >> sys.stderr, update_trace_q return BBs_in_loop #ignore writes if not pc_in_trace in cfg.ins_dst_regs_dict.keys(): continue reg_updated_curr_pc = cfg.ins_dst_regs_dict[pc_in_trace][0] # if pc_in_trace in update_trace_q.queue and tag != "Move": if reg_updated_curr_pc == track_reg and (is_track_idx_reg or ( any(pc_in_trace in x for x in update_trace_q.queue) and tag != "Move" )): if not pc_in_trace in pointer_update_addr_dict: pointer_update_addr_dict[pc_in_trace] = 1 else: pointer_update_addr_dict[pc_in_trace] += 1 # record_time_to_update(delinq_load_addr, pc_in_trace, trace_q, cfg, time_to_update_dict, delinq_loads_till_update, BBs_in_loop, delinq_loads_update_addr, prefetch_decisions, conf) # print >> sys.stderr, ''.join('0x%02x ' % b for b in update_trace_q.queue ) # print >> sys.stderr, "%lx"%(pc_in_trace) precomp_q.put(delinq_load_addr) up_score = 0 if not is_track_idx_reg: while any(pc_in_trace in x for x in update_trace_q.queue): #pc_in_trace in update_trace_q.queue: # while update_trace_q.queue: pc_score_t = update_trace_q.get() pc = pc_score_t[0] up_score = pc_score_t[1] precomp_q.put(pc) elif is_track_idx_reg: pc_score_t = (pc_in_trace, score) update_trace_q.put(pc_score_t) while not update_trace_q.empty(): pc_score_t = update_trace_q.get() pc = pc_score_t[0] precomp_q.put(pc) # precomp_q.put(pc_in_trace) # print >> sys.stderr, ''.join('0x%02x ' % b for b in precomp_q.queue ) # while update_trace_q.queue: # pc_score_t = update_trace_q.get() # pc = pc_score_t[0] # print >> sys.stderr,"%lx"%(pc) # print >> sys.stderr, "processed precomp queue!" record_update_time(delinq_load_addr, up_score, pointer_update_time_dict, inter_delinq_loads, delinq_loads_till_use) record_update_addr(delinq_load_addr, pc_in_trace, delinq_loads_update_addr) # while update_trace_q.queue: # loop_score = update_trace_q.get()[1] loop_score = score fwd_score = loop_score - up_score record_update_time(delinq_load_addr, fwd_score, time_to_update_dict, 0, delinq_loads_till_update) precomp_q_t = tuple(list(precomp_q.queue)) record_update_trace(precomp_q_t, update_trace_dict) return BBs_in_loop if reg_updated_curr_pc == track_reg and not is_reg_pushed_on_stack: #the register value should not be on the stack during this if tag == "Read": reg_read = cfg.ins_src_regs_dict[pc_in_trace][0] track_reg = reg_read pc_score_t = (pc_in_trace, score) update_trace_q.put(pc_score_t) if cfg.ins_idx_reg_dict[pc_in_trace] != 0: track_reg = cfg.ins_idx_reg_dict[pc_in_trace] is_track_idx_reg = True #print >> sys.stderr, "Tracking index register %s from pc @ %lx"%(cfg.regs_dict[track_reg], pc_in_trace) # move r1, r2 -- not mem op elif tag == "Move": if pc_in_trace in cfg.ins_src_regs_dict: reg_read = cfg.ins_src_regs_dict[pc_in_trace][0] equal_reg = track_reg track_reg = reg_read else: cfg.ins_tags_dict[pc_in_trace] = "MoveConst" pc_score_t = (pc_in_trace, score) update_trace_q.put(pc_score_t) #if the register being tracked is read from the stack elif tag == "StackR": print >> sys.stderr,"Register pushed on stack @ %lx"%(pc_in_trace) is_reg_pushed_on_stack = True #beyond this instruction the value is on the stack pc_score_t = (pc_in_trace, score) update_trace_q.put(pc_score_t) # Not move but some instruction that changes track_reg and not traceable further (for now) # modify this case for the LEA instruction elif tag == "Lea": reg_read = cfg.ins_src_regs_dict[pc_in_trace][0] track_reg = reg_read pc_score_t = (pc_in_trace, score) update_trace_q.put(pc_score_t) if cfg.ins_idx_reg_dict[pc_in_trace] != 0: track_reg = cfg.ins_idx_reg_dict[pc_in_trace] is_track_idx_reg = True #print >> sys.stderr, "Tracking index register %s from pc @ %lx"%(cfg.regs_dict[track_reg], pc_in_trace) else: # return # reg_read = cfg.ins_src_regs_dict[pc_in_trace][0] # track_reg = reg_read pc_score_t = (pc_in_trace, score) update_trace_q.put(pc_score_t) elif tag == "StackW": reg_read = cfg.ins_src_regs_dict[pc_in_trace][0] if track_reg == reg_read: is_reg_pushed_on_stack = False #beyond this the value is in **some** register pc_score_t = (pc_in_trace, score) update_trace_q.put(pc_score_t) # reaching this point is failure to find update instruction # print >> sys.stderr, "Failed to locate update point for %lx" % (delinq_load_addr) # print >> sys.stderr, update_trace_q return
def pointer_analysis_with_trace_hints(track_reg, delinq_load_addr, BB_addr, trace_q, cfg, pointer_update_addr_dict, pointer_update_time_dict, time_to_update_dict, delinq_loads_till_update, delinq_loads_till_use, delinq_loads_update_addr, prefetch_decisions, conf): # print BB_addr, BBs_inspected, pointer_update_addr_list BBs_in_loop = [] last_trace_pc = trace_q.popleft() if last_trace_pc != delinq_load_addr: return inter_delinq_loads = 0 score = 0 is_reg_pushed_on_stack = False while trace_q: pc_in_trace = trace_q.popleft() if pc_in_trace == 0: return BB_addr = static_BB_cfg.discover_BB_for_address(pc_in_trace, cfg.BB_dict) if BB_addr == None: cfg = disassm.get_func_disassm(cfg.exec_file, pc_in_trace) BB_addr = static_BB_cfg.discover_BB_for_address(pc_in_trace, cfg.BB_dict) if BB_addr == None: return if not BB_addr in BBs_in_loop: BBs_in_loop.append(BB_addr) tag = None if pc_in_trace in cfg.ins_tags_dict: tag = cfg.ins_tags_dict[pc_in_trace] if pc_in_trace in conf.all_delinq_loads_list and prefetch_decisions: if prefetch_decisions[pc_in_trace].l3_mr >= 0.01: inter_delinq_loads += 1 elif prefetch_decisions[pc_in_trace].l2_mr >= 0.05: inter_delinq_loads += 1 elif prefetch_decisions[pc_in_trace].l1_mr >= 0.2: inter_delinq_loads += 1 score += 1 if not pc_in_trace in cfg.ins_dst_regs_dict.keys(): continue reg_updated_curr_pc = cfg.ins_dst_regs_dict[pc_in_trace][0] if reg_updated_curr_pc == track_reg and not is_reg_pushed_on_stack: #the register value should not be on the track during this if tag == "Read": if not pc_in_trace in pointer_update_addr_dict: pointer_update_addr_dict[pc_in_trace] = 1 else: pointer_update_addr_dict[pc_in_trace] += 1 # if(is_nested_object(delinq_load_addr, pc_in_trace, cfg)): # print >> sys.stderr, "Nested Object found @ 0x%lx --> 0x%lx!!!" % (delinq_load_addr, pc_in_trace) # track_reg = cfg.ins_base_reg_dict[pc_in_trace] # continue track_reg = None #should not include the pointer update instruction, its latency should not be counted # score -= 1 record_update_time(delinq_load_addr, score, pointer_update_time_dict, inter_delinq_loads, delinq_loads_till_use) record_update_addr(delinq_load_addr, pc_in_trace, delinq_loads_update_addr) record_time_to_update(delinq_load_addr, pc_in_trace, trace_q, cfg, time_to_update_dict, delinq_loads_till_update, BBs_in_loop, delinq_loads_update_addr, prefetch_decisions, conf) return BBs_in_loop # return track_reg # move r1, r2 -- not mem op elif tag == "Move": reg_read = cfg.ins_src_regs_dict[pc_in_trace][0] track_reg = reg_read #if the register being tracked is read from the stack elif tag == "StackR": is_reg_pushed_on_stack = True #beyond this instruction the value is on the stack # Not move but some instruction that changes track_reg and not traceable further (for now) # modify this case for the LEA instruction else: if not pc_in_trace in pointer_update_addr_dict: pointer_update_addr_dict[pc_in_trace] = 1 else: pointer_update_addr_dict[pc_in_trace] += 1 track_reg = None # score -= 1 record_update_time(delinq_load_addr, score, pointer_update_time_dict, inter_delinq_loads, delinq_loads_till_use) record_update_addr(delinq_load_addr, pc_in_trace, delinq_loads_update_addr) record_time_to_update(delinq_load_addr, pc_in_trace, trace_q, cfg, time_to_update_dict, delinq_loads_till_update, BBs_in_loop, delinq_loads_update_addr, prefetch_decisions, conf) return BBs_in_loop elif tag == "StackW": reg_read = cfg.ins_src_regs_dict[pc_in_trace][0] if track_reg == reg_read: is_reg_pushed_on_stack = False #beyond this the value is in **some** register # reaching this point is failure to find update instruction return
def analyze_non_strided_delinq_loads(global_pc_smptrace_hist, global_pc_stride_hist, prefetch_decisions, exec_file, num_samples, avg_mem_latency): ins_src_regs_dict = {} ins_dst_regs_dict = {} ins_tags_dict = {} branch_dict = {} routine_BB_dict = {} # information maps for Memory operations ins_base_reg_dict = {} ins_mem_dis_dict = {} ins_idx_reg_dict = {} ins_mem_scale_dict = {} global_prefetchable_pcs = [] delinq_load_address_list = [] for delinq_load_addr in prefetch_decisions.keys(): pref_param = prefetch_decisions[delinq_load_addr] if "ptr" in pref_param.pf_type: delinq_load_address_list.append(delinq_load_addr) delinq_load_address_list = sorted(delinq_load_address_list) conf = Conf1(exec_file, delinq_load_address_list, num_samples, avg_mem_latency) irr_list = [] print >> sys.stderr, "\nSample freq irregular accesses!\n" for pc in delinq_load_address_list: pc_smptrace_hist = global_pc_smptrace_hist[pc] l3mr = prefetch_decisions[pc].l3_mr l2mr = prefetch_decisions[pc].l2_mr l1mr = prefetch_decisions[pc].l1_mr sample_freq = float(len(pc_smptrace_hist.keys()))/float(num_samples) score = float(sample_freq)*float(l3mr) irr_list += [(pc, sample_freq, l3mr, l2mr, l1mr, score)] sorted_irr_list = sorted(irr_list, key=operator.itemgetter(5), reverse=True) trimmed_delinq_load_addr_list = [] count = 0 for tup in sorted_irr_list: pc = tup[0] sample_freq = tup[1] l3mr = tup[2] l2mr = tup[3] l1mr = tup[4] score = tup[5] if count < 15: trimmed_delinq_load_addr_list += [pc] count = count + 1 pc_stride_hist = global_pc_stride_hist[pc] sorted_x = sorted(pc_stride_hist.iteritems(), key=operator.itemgetter(1), reverse=True) sample_count = sum([pair[1] for pair in sorted_x]) max_stride = sorted_x[0][0] max_stride_freq = float(sorted_x[0][1])/float(sample_count) print >> sys.stderr, "\npc:%lx freq:%lf l3mr:%lf l2mr:%lf l1mr:%lf score:%lf"%(pc, sample_freq, l3mr, l2mr, l1mr, score) for delinq_load_addr in trimmed_delinq_load_addr_list: #delinq_load_address_list: cfg = disassm.get_func_disassm(conf.exec_file, delinq_load_addr) if not (cfg.ins_tags_dict[delinq_load_addr] == 'Read' or cfg.ins_tags_dict[delinq_load_addr] == 'Write'): continue # if float(len(global_pc_smptrace_hist[delinq_load_addr].keys()))/float(conf.num_samples) < 0.005: # continue (pointer_update_addr_dict, pointer_update_time_dict, time_to_update_dict, delinq_loads_till_update, delinq_loads_till_use, all_BBs_in_loop, is_ind, stride) = ins_trace_ptr_nobj_analysis.detect_pointer_chasing(global_pc_smptrace_hist, global_pc_stride_hist, delinq_load_addr, None, cfg, conf) analyze_pointer_prefetch(pointer_update_addr_dict, prefetch_decisions, pointer_update_time_dict, time_to_update_dict, delinq_load_addr, delinq_loads_till_update, delinq_loads_till_use, all_BBs_in_loop, cfg, conf, is_ind, stride) # (pointer_update_addr_dict, pointer_update_time_dict, time_to_update_dict, delinq_loads_till_update, delinq_loads_till_use, all_BBs_in_loop) = ins_trace_analysis.detect_pointer_chasing(global_pc_smptrace_hist, delinq_load_addr, prefetch_decisions, cfg, conf) # analyze_pointer_prefetch(pointer_update_addr_dict, pointer_update_time_dict, time_to_update_dict, delinq_load_addr, delinq_loads_till_update, delinq_loads_till_use, all_BBs_in_loop, cfg, conf) if delinq_load_addr in conf.indirect_pref_decisions: do_cost_benefit_analysis(cfg, conf, delinq_load_addr, prefetch_decisions) decide_prefetch_schedules(cfg, conf) print_indirect_prefetch_decisions(conf)
except IOError, e: continue usf_file.close() for (pc_rdist_hist, pc_stride_hist, pc_freq_hist, pc_time_hist, pc_corr_hist, pc_fwd_rdist_hist, pc_smptrace_hist) in burst_hists: continue ins_trace_ptr_nobj_analysis.add_trace_to_global_pc_smptrace_hist(global_pc_smptrace_hist, pc_smptrace_hist) ins_trace_ptr_nobj_analysis.add_to_pc_stride_hist(pc_stride_hist, global_pc_stride_hist) print >> sys.stderr, "Starting trace analysis..." for delinq_load_addr in delinq_load_address_list: cfg = disassm.get_func_disassm(conf.exec_file, delinq_load_addr) if not (cfg.ins_tags_dict[delinq_load_addr] == 'Read' or cfg.ins_tags_dict[delinq_load_addr] == 'Write'): continue print >> sys.stderr, "Sample frequency %lx: %lf"%(delinq_load_addr, float(len(pc_smptrace_hist.keys()))/float(conf.num_samples)) (pointer_update_addr_dict, pointer_update_time_dict, time_to_update_dict, delinq_loads_till_update, delinq_loads_till_use, all_BBs_in_loop, is_ind, stride) = ins_trace_ptr_nobj_analysis.detect_pointer_chasing(global_pc_smptrace_hist, global_pc_stride_hist, delinq_load_addr, None, cfg, conf) # analyze_pointer_prefetch(pointer_update_addr_dict, pointer_update_time_dict, time_to_update_dict, delinq_load_addr, delinq_loads_till_update, delinq_loads_till_use, all_BBs_in_loop, cfg, conf, is_ind, stride) analyze_pointer_prefetch(pointer_update_addr_dict, [], pointer_update_time_dict, time_to_update_dict, delinq_load_addr, delinq_loads_till_update, delinq_loads_till_use, all_BBs_in_loop, cfg, conf, is_ind, stride) decide_prefetch_schedules(cfg, conf) print_indirect_prefetch_decisions(conf)