def detect_pointer_chasing(global_pc_smptrace_hist, delinq_load_addr, prefetch_decisions, cfg, conf): pc_mem_dis_weights_dict = {} pc_smptrace_hist = global_pc_smptrace_hist[delinq_load_addr] pointer_update_addr_dict = {} pointer_update_time_dict = {} time_to_update_dict = {} delinq_loads_till_update = {} delinq_loads_till_use = {} delinq_loads_update_addr = {} all_BBs_in_loop = [] reg_read_orig = cfg.ins_base_reg_dict[delinq_load_addr] for time in pc_smptrace_hist.keys(): trace = pc_smptrace_hist[time] trace_q = deque(trace) BB_addr = static_BB_cfg.discover_BB_for_address(delinq_load_addr, cfg.BB_dict) BBs_inspected = [] BBs_in_loop = pointer_analysis_with_trace_hints(reg_read_orig, delinq_load_addr, BB_addr, trace_q, cfg, pointer_update_addr_dict, pointer_update_time_dict, time_to_update_dict, delinq_loads_till_update, delinq_loads_till_use, delinq_loads_update_addr, prefetch_decisions, conf) if BBs_in_loop: all_BBs_in_loop += filter(lambda x: x not in all_BBs_in_loop, BBs_in_loop) return (pointer_update_addr_dict, pointer_update_time_dict, time_to_update_dict, delinq_loads_till_update, delinq_loads_till_use, all_BBs_in_loop)
def detect_pointer_chasing(global_pc_smptrace_hist, global_pc_stride_hist, delinq_load_addr, prefetch_decisions, cfg, conf): pc_mem_dis_weights_dict = {} pc_smptrace_hist = global_pc_smptrace_hist[delinq_load_addr] pointer_update_addr_dict = {} pointer_update_time_dict = {} time_to_update_dict = {} delinq_loads_till_update = {} delinq_loads_till_use = {} delinq_loads_update_addr = {} is_track_idx_reg = False update_trace_dict = {} all_BBs_in_loop = [] NestedObjFlag = False track_reg = cfg.ins_base_reg_dict[delinq_load_addr] print >> sys.stderr, "Tracing update point for pc %lx"%(delinq_load_addr) if cfg.ins_idx_reg_dict[delinq_load_addr] != 0: track_reg = cfg.ins_idx_reg_dict[delinq_load_addr] is_track_idx_reg = True # print >> sys.stderr, "Tracking index register %s for delinq load @ %lx"%(cfg.regs_dict[track_reg], delinq_load_addr) BB_addr = static_BB_cfg.discover_BB_for_address(delinq_load_addr, cfg.BB_dict) for time in pc_smptrace_hist.keys(): trace = pc_smptrace_hist[time] trace_q = deque(trace) BBs_inspected = [] status = pointer_analysis_with_trace_hints(track_reg, is_track_idx_reg, delinq_load_addr, BB_addr, trace_q, cfg, pointer_update_addr_dict, pointer_update_time_dict, time_to_update_dict, delinq_loads_till_update, delinq_loads_till_use, delinq_loads_update_addr, update_trace_dict, prefetch_decisions, conf) BBs_in_loop = status if BBs_in_loop: all_BBs_in_loop += filter(lambda x: x not in all_BBs_in_loop, BBs_in_loop) freq_update_trace_t = dominant_update_trace(update_trace_dict) is_ind = False stride = 0 if freq_update_trace_t: print >> sys.stderr, "Traceable delinq load: %lx"%(delinq_load_addr) (is_ind, stride) = is_indirect_addr(freq_update_trace_t, global_pc_stride_hist, cfg) return (pointer_update_addr_dict, pointer_update_time_dict, time_to_update_dict, delinq_loads_till_update, delinq_loads_till_use, all_BBs_in_loop, is_ind, stride)
def record_time_to_update(delinq_load_addr, update_addr, trace_q, cfg, time_to_update_dict, delinq_loads_till_update, BBs_in_loop, delinq_loads_update_addr, prefetch_decisions, conf): if not delinq_load_addr in trace_q: return fwd_score = 0 fwd_delinq_loads = 0 BB_addr = static_BB_cfg.discover_BB_for_address(update_addr, cfg.BB_dict) while trace_q: if not BB_addr in BBs_in_loop: BBs_in_loop.append(BB_addr) pc_in_trace = trace_q.popleft() if pc_in_trace == 0: return if pc_in_trace == delinq_load_addr: break if pc_in_trace in cfg.ins_tags_dict: if pc_in_trace in conf.all_delinq_loads_list and prefetch_decisions: if prefetch_decisions[pc_in_trace].l3_mr >= 0.01: fwd_delinq_loads += 1 elif prefetch_decisions[pc_in_trace].l2_mr >= 0.05: fwd_delinq_loads += 1 elif prefetch_decisions[pc_in_trace].l1_mr >= 0.2: fwd_delinq_loads += 1 fwd_score += 1 BB_addr = static_BB_cfg.discover_BB_for_address(pc_in_trace, cfg.BB_dict) if BB_addr == None: cfg = disassm.get_func_disassm(cfg.exec_file, pc_in_trace) BB_addr = static_BB_cfg.discover_BB_for_address(pc_in_trace, cfg.BB_dict) if BB_addr == None: return record_update_time(delinq_load_addr, fwd_score, time_to_update_dict, fwd_delinq_loads, delinq_loads_till_update)
def add_to_BB_indirect_pref_decisions(delinq_load_addr, schedule_addr, cfg, conf): BB_addr = static_BB_cfg.discover_BB_for_address(schedule_addr, cfg.BB_dict) base_reg_id = cfg.ins_base_reg_dict[delinq_load_addr] mem_dis = cfg.ins_mem_dis_dict[delinq_load_addr] if BB_addr in conf.BB_reg_prefetch_dict: if base_reg_id in conf.BB_reg_prefetch_dict[BB_addr]: conf.BB_reg_prefetch_dict[BB_addr][base_reg_id] += [(schedule_addr, mem_dis, delinq_load_addr)] else: conf.BB_reg_prefetch_dict[BB_addr][base_reg_id] = [(schedule_addr, mem_dis, delinq_load_addr)] else: conf.BB_reg_prefetch_dict[BB_addr] = {} conf.BB_reg_prefetch_dict[BB_addr][base_reg_id] = [(schedule_addr, mem_dis, delinq_load_addr)]
def BB_prefetch_status(delinq_load_addr, cfg, conf): pf_type = "ptr" BB_addr = static_BB_cfg.discover_BB_for_address(delinq_load_addr, cfg.BB_dict) base_reg_id = cfg.ins_base_reg_dict[delinq_load_addr] if BB_addr in conf.BB_reg_prefetch_dict: if base_reg_id in conf.BB_reg_prefetch_dict[BB_addr]: mem_dis_list = map(lambda x: x[1], conf.BB_reg_prefetch_dict[BB_addr][base_reg_id]) mem_dis = cfg.ins_mem_dis_dict[delinq_load_addr] max_dis = max(mem_dis_list) mem_dis_diff = max_dis - mem_dis if mem_dis_diff == 0: return pf_type elif mem_dis_diff < conf.line_size: pf_type = "ptradj" else: pf_type = "ptradj2" return pf_type
def pointer_analysis_with_trace_hints(track_reg, is_track_idx_reg, delinq_load_addr, BB_addr, trace_q, cfg, pointer_update_addr_dict, pointer_update_time_dict, time_to_update_dict, delinq_loads_till_update, delinq_loads_till_use, delinq_loads_update_addr, update_trace_dict, prefetch_decisions, conf): BBs_in_loop = [] update_trace_q = Queue.Queue(10) precomp_q = Queue.Queue(11) last_trace_pc = trace_q.popleft() if last_trace_pc != delinq_load_addr: return inter_delinq_loads = 0 score = 0 is_reg_pushed_on_stack = False equal_reg = None while trace_q: pc_in_trace = trace_q.popleft() if pc_in_trace == 0: return BB_addr = static_BB_cfg.discover_BB_for_address(pc_in_trace, cfg.BB_dict) if BB_addr == None: cfg = disassm.get_func_disassm(cfg.exec_file, pc_in_trace) BB_addr = static_BB_cfg.discover_BB_for_address(pc_in_trace, cfg.BB_dict) if BB_addr == None: return if not BB_addr in BBs_in_loop: BBs_in_loop.append(BB_addr) tag = None if pc_in_trace in cfg.ins_tags_dict: tag = cfg.ins_tags_dict[pc_in_trace] if pc_in_trace in conf.all_delinq_loads_list and prefetch_decisions: if prefetch_decisions[pc_in_trace].l3_mr >= 0.01: inter_delinq_loads += 1 elif prefetch_decisions[pc_in_trace].l2_mr >= 0.05: inter_delinq_loads += 1 elif prefetch_decisions[pc_in_trace].l1_mr >= 0.2: inter_delinq_loads += 1 score += 1 if update_trace_q.full(): print >> sys.stderr, "Too deep object nesting encountered: NestTrace @ \n" print >> sys.stderr, update_trace_q return BBs_in_loop #ignore writes if not pc_in_trace in cfg.ins_dst_regs_dict.keys(): continue reg_updated_curr_pc = cfg.ins_dst_regs_dict[pc_in_trace][0] # if pc_in_trace in update_trace_q.queue and tag != "Move": if reg_updated_curr_pc == track_reg and (is_track_idx_reg or ( any(pc_in_trace in x for x in update_trace_q.queue) and tag != "Move" )): if not pc_in_trace in pointer_update_addr_dict: pointer_update_addr_dict[pc_in_trace] = 1 else: pointer_update_addr_dict[pc_in_trace] += 1 # record_time_to_update(delinq_load_addr, pc_in_trace, trace_q, cfg, time_to_update_dict, delinq_loads_till_update, BBs_in_loop, delinq_loads_update_addr, prefetch_decisions, conf) # print >> sys.stderr, ''.join('0x%02x ' % b for b in update_trace_q.queue ) # print >> sys.stderr, "%lx"%(pc_in_trace) precomp_q.put(delinq_load_addr) up_score = 0 if not is_track_idx_reg: while any(pc_in_trace in x for x in update_trace_q.queue): #pc_in_trace in update_trace_q.queue: # while update_trace_q.queue: pc_score_t = update_trace_q.get() pc = pc_score_t[0] up_score = pc_score_t[1] precomp_q.put(pc) elif is_track_idx_reg: pc_score_t = (pc_in_trace, score) update_trace_q.put(pc_score_t) while not update_trace_q.empty(): pc_score_t = update_trace_q.get() pc = pc_score_t[0] precomp_q.put(pc) # precomp_q.put(pc_in_trace) # print >> sys.stderr, ''.join('0x%02x ' % b for b in precomp_q.queue ) # while update_trace_q.queue: # pc_score_t = update_trace_q.get() # pc = pc_score_t[0] # print >> sys.stderr,"%lx"%(pc) # print >> sys.stderr, "processed precomp queue!" record_update_time(delinq_load_addr, up_score, pointer_update_time_dict, inter_delinq_loads, delinq_loads_till_use) record_update_addr(delinq_load_addr, pc_in_trace, delinq_loads_update_addr) # while update_trace_q.queue: # loop_score = update_trace_q.get()[1] loop_score = score fwd_score = loop_score - up_score record_update_time(delinq_load_addr, fwd_score, time_to_update_dict, 0, delinq_loads_till_update) precomp_q_t = tuple(list(precomp_q.queue)) record_update_trace(precomp_q_t, update_trace_dict) return BBs_in_loop if reg_updated_curr_pc == track_reg and not is_reg_pushed_on_stack: #the register value should not be on the stack during this if tag == "Read": reg_read = cfg.ins_src_regs_dict[pc_in_trace][0] track_reg = reg_read pc_score_t = (pc_in_trace, score) update_trace_q.put(pc_score_t) if cfg.ins_idx_reg_dict[pc_in_trace] != 0: track_reg = cfg.ins_idx_reg_dict[pc_in_trace] is_track_idx_reg = True #print >> sys.stderr, "Tracking index register %s from pc @ %lx"%(cfg.regs_dict[track_reg], pc_in_trace) # move r1, r2 -- not mem op elif tag == "Move": if pc_in_trace in cfg.ins_src_regs_dict: reg_read = cfg.ins_src_regs_dict[pc_in_trace][0] equal_reg = track_reg track_reg = reg_read else: cfg.ins_tags_dict[pc_in_trace] = "MoveConst" pc_score_t = (pc_in_trace, score) update_trace_q.put(pc_score_t) #if the register being tracked is read from the stack elif tag == "StackR": print >> sys.stderr,"Register pushed on stack @ %lx"%(pc_in_trace) is_reg_pushed_on_stack = True #beyond this instruction the value is on the stack pc_score_t = (pc_in_trace, score) update_trace_q.put(pc_score_t) # Not move but some instruction that changes track_reg and not traceable further (for now) # modify this case for the LEA instruction elif tag == "Lea": reg_read = cfg.ins_src_regs_dict[pc_in_trace][0] track_reg = reg_read pc_score_t = (pc_in_trace, score) update_trace_q.put(pc_score_t) if cfg.ins_idx_reg_dict[pc_in_trace] != 0: track_reg = cfg.ins_idx_reg_dict[pc_in_trace] is_track_idx_reg = True #print >> sys.stderr, "Tracking index register %s from pc @ %lx"%(cfg.regs_dict[track_reg], pc_in_trace) else: # return # reg_read = cfg.ins_src_regs_dict[pc_in_trace][0] # track_reg = reg_read pc_score_t = (pc_in_trace, score) update_trace_q.put(pc_score_t) elif tag == "StackW": reg_read = cfg.ins_src_regs_dict[pc_in_trace][0] if track_reg == reg_read: is_reg_pushed_on_stack = False #beyond this the value is in **some** register pc_score_t = (pc_in_trace, score) update_trace_q.put(pc_score_t) # reaching this point is failure to find update instruction # print >> sys.stderr, "Failed to locate update point for %lx" % (delinq_load_addr) # print >> sys.stderr, update_trace_q return
def pointer_analysis_with_trace_hints(track_reg, delinq_load_addr, BB_addr, trace_q, cfg, pointer_update_addr_dict, pointer_update_time_dict, time_to_update_dict, delinq_loads_till_update, delinq_loads_till_use, delinq_loads_update_addr, prefetch_decisions, conf): # print BB_addr, BBs_inspected, pointer_update_addr_list BBs_in_loop = [] last_trace_pc = trace_q.popleft() if last_trace_pc != delinq_load_addr: return inter_delinq_loads = 0 score = 0 is_reg_pushed_on_stack = False while trace_q: pc_in_trace = trace_q.popleft() if pc_in_trace == 0: return BB_addr = static_BB_cfg.discover_BB_for_address(pc_in_trace, cfg.BB_dict) if BB_addr == None: cfg = disassm.get_func_disassm(cfg.exec_file, pc_in_trace) BB_addr = static_BB_cfg.discover_BB_for_address(pc_in_trace, cfg.BB_dict) if BB_addr == None: return if not BB_addr in BBs_in_loop: BBs_in_loop.append(BB_addr) tag = None if pc_in_trace in cfg.ins_tags_dict: tag = cfg.ins_tags_dict[pc_in_trace] if pc_in_trace in conf.all_delinq_loads_list and prefetch_decisions: if prefetch_decisions[pc_in_trace].l3_mr >= 0.01: inter_delinq_loads += 1 elif prefetch_decisions[pc_in_trace].l2_mr >= 0.05: inter_delinq_loads += 1 elif prefetch_decisions[pc_in_trace].l1_mr >= 0.2: inter_delinq_loads += 1 score += 1 if not pc_in_trace in cfg.ins_dst_regs_dict.keys(): continue reg_updated_curr_pc = cfg.ins_dst_regs_dict[pc_in_trace][0] if reg_updated_curr_pc == track_reg and not is_reg_pushed_on_stack: #the register value should not be on the track during this if tag == "Read": if not pc_in_trace in pointer_update_addr_dict: pointer_update_addr_dict[pc_in_trace] = 1 else: pointer_update_addr_dict[pc_in_trace] += 1 # if(is_nested_object(delinq_load_addr, pc_in_trace, cfg)): # print >> sys.stderr, "Nested Object found @ 0x%lx --> 0x%lx!!!" % (delinq_load_addr, pc_in_trace) # track_reg = cfg.ins_base_reg_dict[pc_in_trace] # continue track_reg = None #should not include the pointer update instruction, its latency should not be counted # score -= 1 record_update_time(delinq_load_addr, score, pointer_update_time_dict, inter_delinq_loads, delinq_loads_till_use) record_update_addr(delinq_load_addr, pc_in_trace, delinq_loads_update_addr) record_time_to_update(delinq_load_addr, pc_in_trace, trace_q, cfg, time_to_update_dict, delinq_loads_till_update, BBs_in_loop, delinq_loads_update_addr, prefetch_decisions, conf) return BBs_in_loop # return track_reg # move r1, r2 -- not mem op elif tag == "Move": reg_read = cfg.ins_src_regs_dict[pc_in_trace][0] track_reg = reg_read #if the register being tracked is read from the stack elif tag == "StackR": is_reg_pushed_on_stack = True #beyond this instruction the value is on the stack # Not move but some instruction that changes track_reg and not traceable further (for now) # modify this case for the LEA instruction else: if not pc_in_trace in pointer_update_addr_dict: pointer_update_addr_dict[pc_in_trace] = 1 else: pointer_update_addr_dict[pc_in_trace] += 1 track_reg = None # score -= 1 record_update_time(delinq_load_addr, score, pointer_update_time_dict, inter_delinq_loads, delinq_loads_till_use) record_update_addr(delinq_load_addr, pc_in_trace, delinq_loads_update_addr) record_time_to_update(delinq_load_addr, pc_in_trace, trace_q, cfg, time_to_update_dict, delinq_loads_till_update, BBs_in_loop, delinq_loads_update_addr, prefetch_decisions, conf) return BBs_in_loop elif tag == "StackW": reg_read = cfg.ins_src_regs_dict[pc_in_trace][0] if track_reg == reg_read: is_reg_pushed_on_stack = False #beyond this the value is in **some** register # reaching this point is failure to find update instruction return
def record_time_to_update(delinq_load_addr, update_addr, trace_q, cfg, time_to_update_dict, delinq_loads_till_update, BBs_in_loop, delinq_loads_update_addr, prefetch_decisions, conf): if not delinq_load_addr in trace_q: return fwd_score = 0 fwd_delinq_loads = 0 BB_addr = static_BB_cfg.discover_BB_for_address(update_addr, cfg.BB_dict) reversed_BB_addr_range = sorted(cfg.BB_dict[BB_addr], reverse=True) reversed_BB_addr_range = filter(lambda x: x < update_addr, reversed_BB_addr_range) pc_in_trace = trace_q.popleft() pc_in_BB = None # update_addr_list = delinq_loads_update_addr[delinq_load_addr] while trace_q: if not BB_addr in BBs_in_loop: BBs_in_loop.append(BB_addr) for pc_in_BB in reversed_BB_addr_range: if pc_in_BB == delinq_load_addr:# update_addr_list: break if trace_q and pc_in_BB == trace_q[0]: trace_q.popleft() if pc_in_BB in cfg.ins_tags_dict: if pc_in_BB in conf.all_delinq_loads_list and prefetch_decisions: if prefetch_decisions[pc_in_BB].l3_mr >= 0.01: fwd_delinq_loads += 1 elif prefetch_decisions[pc_in_BB].l2_mr >= 0.05: fwd_delinq_loads += 1 elif prefetch_decisions[pc_in_BB].l1_mr >= 0.2: fwd_delinq_loads += 1 fwd_score += 1 if pc_in_BB == delinq_load_addr: #update_addr: break # while pc_in_trace in reversed_BB_addr_range: if trace_q: pc_in_trace = trace_q.popleft() else: return if pc_in_trace == 0: return BB_addr = static_BB_cfg.discover_BB_for_address(pc_in_trace, cfg.BB_dict) if BB_addr == None: return reversed_BB_addr_range = sorted(cfg.BB_dict[BB_addr], reverse=True) record_update_time(delinq_load_addr, fwd_score, time_to_update_dict, fwd_delinq_loads, delinq_loads_till_update)
def pointer_analysis_with_trace_hints(track_reg, delinq_load_addr, BB_addr, trace_q, cfg, pointer_update_addr_dict, pointer_update_time_dict, time_to_update_dict, delinq_loads_till_update, delinq_loads_till_use, delinq_loads_update_addr, prefetch_decisions, conf): # print BB_addr, BBs_inspected, pointer_update_addr_list BBs_in_loop = [] pc_in_trace = last_trace_pc = trace_q.popleft() if last_trace_pc != delinq_load_addr: return inter_delinq_loads = 0 reversed_BB_addr_range = sorted(cfg.BB_dict[BB_addr], reverse=True) # x <= last_trace_pc means include self update analysis also. We want to avoid that reversed_BB_addr_range = filter(lambda x: x < last_trace_pc, reversed_BB_addr_range) score = 0 while True: if not BB_addr in BBs_in_loop: BBs_in_loop.append(BB_addr) for pc_in_BB in reversed_BB_addr_range: tag = None if pc_in_BB in cfg.ins_tags_dict: tag = cfg.ins_tags_dict[pc_in_BB] if pc_in_BB in conf.all_delinq_loads_list and prefetch_decisions: if prefetch_decisions[pc_in_BB].l3_mr >= 0.01: inter_delinq_loads += 1 elif prefetch_decisions[pc_in_BB].l2_mr >= 0.05: inter_delinq_loads += 1 elif prefetch_decisions[pc_in_BB].l1_mr >= 0.2: inter_delinq_loads += 1 score += 1 if trace_q and pc_in_BB == trace_q[0]: trace_q.popleft() if not pc_in_BB in cfg.ins_dst_regs_dict.keys(): continue reg_updated_curr_pc = cfg.ins_dst_regs_dict[pc_in_BB][0] if reg_updated_curr_pc == track_reg: if tag == "Read": if not pc_in_BB in pointer_update_addr_dict: pointer_update_addr_dict[pc_in_BB] = 1 else: pointer_update_addr_dict[pc_in_BB] += 1 track_reg = None #should not include the pointer update instruction, its latency should not be counted score -= 1 record_update_time(delinq_load_addr, score, pointer_update_time_dict, inter_delinq_loads, delinq_loads_till_use) record_update_addr(delinq_load_addr, pc_in_BB, delinq_loads_update_addr) record_time_to_update(delinq_load_addr, pc_in_BB, trace_q, cfg, time_to_update_dict, delinq_loads_till_update, BBs_in_loop, delinq_loads_update_addr, prefetch_decisions, conf) return BBs_in_loop # return track_reg # move r1, r2 -- not mem op elif tag == "Move": reg_read = cfg.ins_src_regs_dict[pc_in_BB][0] track_reg = reg_read # Not move but some instruction that changes track_reg and not traceable further # modify this case for the LEA instruction else: if not pc_in_BB in pointer_update_addr_dict: pointer_update_addr_dict[pc_in_BB] = 1 else: pointer_update_addr_dict[pc_in_BB] += 1 track_reg = None score -= 1 record_update_time(delinq_load_addr, score, pointer_update_time_dict, inter_delinq_loads, delinq_loads_till_use) record_update_addr(delinq_load_addr, pc_in_BB, delinq_loads_update_addr) record_time_to_update(delinq_load_addr, pc_in_BB, trace_q, cfg, time_to_update_dict, delinq_loads_till_update, BBs_in_loop, delinq_loads_update_addr, prefetch_decisions, conf) return BBs_in_loop # pc_in_trace = pc_in_BB # while pc_in_trace in reversed_BB_addr_range: if trace_q: pc_in_trace = trace_q.popleft() else: return if pc_in_trace == 0 or pc_in_trace in reversed_BB_addr_range: return BB_addr = static_BB_cfg.discover_BB_for_address(pc_in_trace, cfg.BB_dict) if BB_addr == None: return reversed_BB_addr_range = sorted(cfg.BB_dict[BB_addr], reverse=True) # reaching this point is failure to find update instruction return