Beispiel #1
0
def account_stack_ops(exec_file, mem_instr_addr_l):
    
    cfg = disassm.get_func_disassm(exec_file, mem_instr_addr_l[0])
    
    stack_op_count = 0
    
    for pc in mem_instr_addr_l:
        if pc in cfg.ins_tags_dict.keys():
                if "Stack" in cfg.ins_tags_dict[pc]:
                    stack_op_count += 1
        elif pc != 0:   
            cfg = disassm.get_func_disassm(exec_file, pc)


    print stack_op_count, len(mem_instr_addr_l)
    stack_op_ratio = float(stack_op_count)/float(len(mem_instr_addr_l))

    print "Stack operation ratio: %lf"%(stack_op_ratio)
def record_time_to_update(delinq_load_addr, update_addr, trace_q, cfg, time_to_update_dict, delinq_loads_till_update, BBs_in_loop, delinq_loads_update_addr, prefetch_decisions, conf):

    if not delinq_load_addr in trace_q:
        return

    fwd_score = 0
    fwd_delinq_loads = 0

    BB_addr = static_BB_cfg.discover_BB_for_address(update_addr, cfg.BB_dict)

    while trace_q:

        if not BB_addr in BBs_in_loop:
            BBs_in_loop.append(BB_addr)

        pc_in_trace =  trace_q.popleft()

        if pc_in_trace == 0:
            return

        if pc_in_trace == delinq_load_addr:
            break

        if pc_in_trace in cfg.ins_tags_dict:
            if pc_in_trace in conf.all_delinq_loads_list and prefetch_decisions:
                if prefetch_decisions[pc_in_trace].l3_mr >= 0.01:
                    fwd_delinq_loads += 1
                elif prefetch_decisions[pc_in_trace].l2_mr >= 0.05:
                    fwd_delinq_loads += 1
                elif prefetch_decisions[pc_in_trace].l1_mr >= 0.2:
                    fwd_delinq_loads += 1


        fwd_score += 1
        
        BB_addr = static_BB_cfg.discover_BB_for_address(pc_in_trace, cfg.BB_dict)
        
        if BB_addr == None:
            cfg = disassm.get_func_disassm(cfg.exec_file, pc_in_trace)
            BB_addr = static_BB_cfg.discover_BB_for_address(pc_in_trace, cfg.BB_dict)
            if BB_addr == None:
                return

    record_update_time(delinq_load_addr, fwd_score, time_to_update_dict, fwd_delinq_loads, delinq_loads_till_update)
def pointer_analysis_with_trace_hints(track_reg, is_track_idx_reg, delinq_load_addr, BB_addr, trace_q, cfg, pointer_update_addr_dict, pointer_update_time_dict, time_to_update_dict, delinq_loads_till_update, delinq_loads_till_use, delinq_loads_update_addr, update_trace_dict, prefetch_decisions, conf):

    BBs_in_loop = []

    update_trace_q = Queue.Queue(10)
    
    precomp_q = Queue.Queue(11)

    last_trace_pc = trace_q.popleft()


    if last_trace_pc != delinq_load_addr:
        return

    inter_delinq_loads = 0

    score = 0

    is_reg_pushed_on_stack = False

    equal_reg = None
    

    while trace_q:

        pc_in_trace = trace_q.popleft()

        if pc_in_trace == 0:
            return

        BB_addr = static_BB_cfg.discover_BB_for_address(pc_in_trace, cfg.BB_dict)

        if BB_addr == None:
            cfg = disassm.get_func_disassm(cfg.exec_file, pc_in_trace)
            BB_addr = static_BB_cfg.discover_BB_for_address(pc_in_trace, cfg.BB_dict)
            if BB_addr == None:
                return

        if not BB_addr in BBs_in_loop:
            BBs_in_loop.append(BB_addr)

        tag = None
        if pc_in_trace in cfg.ins_tags_dict:
            tag = cfg.ins_tags_dict[pc_in_trace]
            if pc_in_trace in conf.all_delinq_loads_list and prefetch_decisions:
                if prefetch_decisions[pc_in_trace].l3_mr >= 0.01:
                    inter_delinq_loads += 1
                elif prefetch_decisions[pc_in_trace].l2_mr >= 0.05:
                    inter_delinq_loads += 1
                elif prefetch_decisions[pc_in_trace].l1_mr >= 0.2:
                    inter_delinq_loads += 1

        score += 1


        if update_trace_q.full():
            print >> sys.stderr, "Too deep object nesting encountered: NestTrace @ \n"
            print >> sys.stderr, update_trace_q
            return BBs_in_loop

        #ignore writes
        if not pc_in_trace in cfg.ins_dst_regs_dict.keys():
            continue
        reg_updated_curr_pc = cfg.ins_dst_regs_dict[pc_in_trace][0]

#        if pc_in_trace in update_trace_q.queue and tag != "Move":
        if reg_updated_curr_pc == track_reg and (is_track_idx_reg or ( any(pc_in_trace in x for x in update_trace_q.queue) and tag != "Move" )):

            if not pc_in_trace in pointer_update_addr_dict:
                pointer_update_addr_dict[pc_in_trace] = 1
            else:
                pointer_update_addr_dict[pc_in_trace] += 1
              
#            record_time_to_update(delinq_load_addr, pc_in_trace, trace_q, cfg, time_to_update_dict, delinq_loads_till_update, BBs_in_loop, delinq_loads_update_addr, prefetch_decisions, conf)

#            print >> sys.stderr, ''.join('0x%02x ' % b for b in update_trace_q.queue )

#            print >> sys.stderr, "%lx"%(pc_in_trace)

            precomp_q.put(delinq_load_addr)

            up_score = 0

            if not is_track_idx_reg:
                while any(pc_in_trace in x for x in update_trace_q.queue): #pc_in_trace in update_trace_q.queue:
                    #            while update_trace_q.queue:
                    pc_score_t = update_trace_q.get()
                    pc = pc_score_t[0]
                    up_score = pc_score_t[1]
                    precomp_q.put(pc)

            elif is_track_idx_reg:
                pc_score_t = (pc_in_trace, score)
                update_trace_q.put(pc_score_t)
                while not update_trace_q.empty(): 
                    pc_score_t = update_trace_q.get()
                    pc = pc_score_t[0]
                    precomp_q.put(pc)

#                precomp_q.put(pc_in_trace)

#            print >> sys.stderr, ''.join('0x%02x ' % b for b in precomp_q.queue )
#            while update_trace_q.queue:
#                pc_score_t = update_trace_q.get()
#                pc = pc_score_t[0]
#                print >> sys.stderr,"%lx"%(pc)

#            print >> sys.stderr, "processed precomp queue!"

            record_update_time(delinq_load_addr, up_score, pointer_update_time_dict, inter_delinq_loads, delinq_loads_till_use)
            record_update_addr(delinq_load_addr, pc_in_trace, delinq_loads_update_addr)

#            while update_trace_q.queue:
#                loop_score = update_trace_q.get()[1]

            loop_score = score

            fwd_score = loop_score - up_score

            record_update_time(delinq_load_addr, fwd_score, time_to_update_dict, 0, delinq_loads_till_update)

            precomp_q_t = tuple(list(precomp_q.queue))

            record_update_trace(precomp_q_t, update_trace_dict)

            return BBs_in_loop


        if reg_updated_curr_pc == track_reg and not is_reg_pushed_on_stack: #the register value should not be on the stack during this

            if tag == "Read":
                reg_read = cfg.ins_src_regs_dict[pc_in_trace][0]
                track_reg = reg_read
                pc_score_t = (pc_in_trace, score)
                update_trace_q.put(pc_score_t)
                if cfg.ins_idx_reg_dict[pc_in_trace] != 0:
                    track_reg = cfg.ins_idx_reg_dict[pc_in_trace]
                    is_track_idx_reg = True
                    #print >> sys.stderr, "Tracking index register %s from pc @ %lx"%(cfg.regs_dict[track_reg], pc_in_trace)

            # move r1, r2  -- not mem op
            elif tag == "Move":
                if pc_in_trace in cfg.ins_src_regs_dict:
                    reg_read = cfg.ins_src_regs_dict[pc_in_trace][0]
                    equal_reg = track_reg
                    track_reg = reg_read
                else:
                    cfg.ins_tags_dict[pc_in_trace] = "MoveConst"
                pc_score_t = (pc_in_trace, score)
                update_trace_q.put(pc_score_t)
            
            #if the register being tracked is read from the stack
            elif tag == "StackR":
                print >> sys.stderr,"Register pushed on stack @ %lx"%(pc_in_trace)
                is_reg_pushed_on_stack = True #beyond this instruction the value is on the stack
                pc_score_t = (pc_in_trace, score)
                update_trace_q.put(pc_score_t)

            # Not move but some instruction that changes track_reg and not traceable further (for now)
            # modify this case for the LEA instruction
            elif tag == "Lea":
                reg_read = cfg.ins_src_regs_dict[pc_in_trace][0]
                track_reg = reg_read
                pc_score_t = (pc_in_trace, score)
                update_trace_q.put(pc_score_t)
                if cfg.ins_idx_reg_dict[pc_in_trace] != 0:
                    track_reg = cfg.ins_idx_reg_dict[pc_in_trace]
                    is_track_idx_reg = True
                    #print >> sys.stderr, "Tracking index register %s from pc @ %lx"%(cfg.regs_dict[track_reg], pc_in_trace)

            else:
#                return
#                reg_read = cfg.ins_src_regs_dict[pc_in_trace][0]
#                track_reg = reg_read
                pc_score_t = (pc_in_trace, score)
                update_trace_q.put(pc_score_t)

        elif tag == "StackW":
            reg_read = cfg.ins_src_regs_dict[pc_in_trace][0]
            if track_reg == reg_read:
                is_reg_pushed_on_stack = False #beyond this the value is in **some** register
                pc_score_t = (pc_in_trace, score)
                update_trace_q.put(pc_score_t)

    # reaching this point is failure to find update instruction

#    print >> sys.stderr, "Failed to locate update point for %lx" % (delinq_load_addr)
#    print >> sys.stderr, update_trace_q

    return
def pointer_analysis_with_trace_hints(track_reg, delinq_load_addr, BB_addr, trace_q, cfg, pointer_update_addr_dict, pointer_update_time_dict, time_to_update_dict, delinq_loads_till_update, delinq_loads_till_use, delinq_loads_update_addr, prefetch_decisions, conf):

#    print BB_addr, BBs_inspected, pointer_update_addr_list

    BBs_in_loop = []

    last_trace_pc = trace_q.popleft()

    if last_trace_pc != delinq_load_addr:
        return

    inter_delinq_loads = 0

    score = 0

    is_reg_pushed_on_stack = False

    while trace_q:

        pc_in_trace = trace_q.popleft()

        if pc_in_trace == 0:
            return

        BB_addr = static_BB_cfg.discover_BB_for_address(pc_in_trace, cfg.BB_dict)

        if BB_addr == None:
            cfg = disassm.get_func_disassm(cfg.exec_file, pc_in_trace)
            BB_addr = static_BB_cfg.discover_BB_for_address(pc_in_trace, cfg.BB_dict)
            if BB_addr == None:
                return

        if not BB_addr in BBs_in_loop:
            BBs_in_loop.append(BB_addr)

        tag = None
        if pc_in_trace in cfg.ins_tags_dict:
            tag = cfg.ins_tags_dict[pc_in_trace]
            if pc_in_trace in conf.all_delinq_loads_list and prefetch_decisions:
                if prefetch_decisions[pc_in_trace].l3_mr >= 0.01:
                    inter_delinq_loads += 1
                elif prefetch_decisions[pc_in_trace].l2_mr >= 0.05:
                    inter_delinq_loads += 1
                elif prefetch_decisions[pc_in_trace].l1_mr >= 0.2:
                    inter_delinq_loads += 1

        score += 1


        if not pc_in_trace in cfg.ins_dst_regs_dict.keys():
            continue

        reg_updated_curr_pc = cfg.ins_dst_regs_dict[pc_in_trace][0]

        if reg_updated_curr_pc == track_reg and not is_reg_pushed_on_stack: #the register value should not be on the track during this

            if tag == "Read":
                if not pc_in_trace in pointer_update_addr_dict:
                    pointer_update_addr_dict[pc_in_trace] = 1
                else:
                    pointer_update_addr_dict[pc_in_trace] += 1
                    
#               if(is_nested_object(delinq_load_addr, pc_in_trace, cfg)):
#                   print >> sys.stderr, "Nested Object found @ 0x%lx --> 0x%lx!!!" % (delinq_load_addr, pc_in_trace)
#                   track_reg = cfg.ins_base_reg_dict[pc_in_trace]
#                   continue

                track_reg = None

                #should not include the pointer update instruction, its latency should not be counted
#                score -= 1
                record_update_time(delinq_load_addr, score, pointer_update_time_dict, inter_delinq_loads, delinq_loads_till_use)
                record_update_addr(delinq_load_addr, pc_in_trace, delinq_loads_update_addr)
                record_time_to_update(delinq_load_addr, pc_in_trace, trace_q, cfg, time_to_update_dict, delinq_loads_till_update, BBs_in_loop, delinq_loads_update_addr, prefetch_decisions, conf)
                return BBs_in_loop
#                        return track_reg
                
                # move r1, r2  -- not mem op
            elif tag == "Move":
                reg_read = cfg.ins_src_regs_dict[pc_in_trace][0]
                track_reg = reg_read
            
                #if the register being tracked is read from the stack
            elif tag == "StackR":
                is_reg_pushed_on_stack = True #beyond this instruction the value is on the stack

            # Not move but some instruction that changes track_reg and not traceable further (for now)
            # modify this case for the LEA instruction
            else:
                if not pc_in_trace in pointer_update_addr_dict:
                    pointer_update_addr_dict[pc_in_trace] = 1
                else:
                    pointer_update_addr_dict[pc_in_trace] += 1
                track_reg = None
#                score -= 1
                record_update_time(delinq_load_addr, score, pointer_update_time_dict, inter_delinq_loads, delinq_loads_till_use)
                record_update_addr(delinq_load_addr, pc_in_trace, delinq_loads_update_addr)
                record_time_to_update(delinq_load_addr, pc_in_trace, trace_q, cfg, time_to_update_dict, delinq_loads_till_update, BBs_in_loop, delinq_loads_update_addr, prefetch_decisions, conf)
                return BBs_in_loop

        elif tag == "StackW":
            reg_read = cfg.ins_src_regs_dict[pc_in_trace][0]
            if track_reg == reg_read:
                is_reg_pushed_on_stack = False #beyond this the value is in **some** register
                


    # reaching this point is failure to find update instruction
    return
def analyze_non_strided_delinq_loads(global_pc_smptrace_hist, global_pc_stride_hist, prefetch_decisions, exec_file, num_samples, avg_mem_latency):

    ins_src_regs_dict = {}
    ins_dst_regs_dict = {}
    ins_tags_dict = {}
    branch_dict = {}
    routine_BB_dict = {}

    # information maps for Memory operations
    ins_base_reg_dict = {}
    ins_mem_dis_dict = {}
    ins_idx_reg_dict = {}
    ins_mem_scale_dict = {}

    global_prefetchable_pcs = []
    delinq_load_address_list = []

    for delinq_load_addr in prefetch_decisions.keys():
        pref_param = prefetch_decisions[delinq_load_addr]
        if "ptr" in pref_param.pf_type:
            delinq_load_address_list.append(delinq_load_addr)

    delinq_load_address_list = sorted(delinq_load_address_list)

    conf = Conf1(exec_file, delinq_load_address_list, num_samples, avg_mem_latency)

    irr_list = []
    print >> sys.stderr, "\nSample freq irregular accesses!\n"
    for pc in delinq_load_address_list:
        pc_smptrace_hist = global_pc_smptrace_hist[pc]
        l3mr = prefetch_decisions[pc].l3_mr
        l2mr = prefetch_decisions[pc].l2_mr
        l1mr = prefetch_decisions[pc].l1_mr
        sample_freq = float(len(pc_smptrace_hist.keys()))/float(num_samples)
        score = float(sample_freq)*float(l3mr)
        irr_list += [(pc, sample_freq, l3mr, l2mr, l1mr, score)]
        

    sorted_irr_list = sorted(irr_list, key=operator.itemgetter(5), reverse=True)

    trimmed_delinq_load_addr_list = []
    count = 0


    for tup in sorted_irr_list:
        pc = tup[0]
        sample_freq = tup[1]
        l3mr = tup[2]
        l2mr = tup[3] 
        l1mr = tup[4] 
        score = tup[5]
        if count < 15:
            trimmed_delinq_load_addr_list += [pc]
        count = count + 1
        pc_stride_hist = global_pc_stride_hist[pc]
        sorted_x = sorted(pc_stride_hist.iteritems(), key=operator.itemgetter(1), reverse=True)
        sample_count = sum([pair[1] for pair in sorted_x])
        max_stride = sorted_x[0][0] 
        max_stride_freq = float(sorted_x[0][1])/float(sample_count)

        print >> sys.stderr, "\npc:%lx  freq:%lf  l3mr:%lf  l2mr:%lf  l1mr:%lf  score:%lf"%(pc, sample_freq, l3mr, l2mr, l1mr, score)


    for delinq_load_addr in trimmed_delinq_load_addr_list: #delinq_load_address_list:

        cfg = disassm.get_func_disassm(conf.exec_file, delinq_load_addr)

        if not (cfg.ins_tags_dict[delinq_load_addr] == 'Read' or cfg.ins_tags_dict[delinq_load_addr] == 'Write'):
            continue

#        if float(len(global_pc_smptrace_hist[delinq_load_addr].keys()))/float(conf.num_samples) < 0.005:
#            continue

        (pointer_update_addr_dict, pointer_update_time_dict, time_to_update_dict, delinq_loads_till_update, delinq_loads_till_use, all_BBs_in_loop, is_ind, stride) = ins_trace_ptr_nobj_analysis.detect_pointer_chasing(global_pc_smptrace_hist, global_pc_stride_hist, delinq_load_addr, None, cfg, conf)

        analyze_pointer_prefetch(pointer_update_addr_dict, prefetch_decisions, pointer_update_time_dict, time_to_update_dict, delinq_load_addr, delinq_loads_till_update, delinq_loads_till_use, all_BBs_in_loop, cfg, conf, is_ind, stride)

#        (pointer_update_addr_dict, pointer_update_time_dict, time_to_update_dict, delinq_loads_till_update, delinq_loads_till_use, all_BBs_in_loop) = ins_trace_analysis.detect_pointer_chasing(global_pc_smptrace_hist, delinq_load_addr, prefetch_decisions, cfg, conf)

#        analyze_pointer_prefetch(pointer_update_addr_dict, pointer_update_time_dict, time_to_update_dict, delinq_load_addr, delinq_loads_till_update, delinq_loads_till_use, all_BBs_in_loop, cfg, conf)

        if delinq_load_addr in conf.indirect_pref_decisions:
            do_cost_benefit_analysis(cfg, conf, delinq_load_addr, prefetch_decisions)
        
    decide_prefetch_schedules(cfg, conf)
    print_indirect_prefetch_decisions(conf)
        except IOError, e:
            continue

        usf_file.close()

        for (pc_rdist_hist, pc_stride_hist, pc_freq_hist, pc_time_hist, pc_corr_hist, pc_fwd_rdist_hist, pc_smptrace_hist) in burst_hists:
            continue

        ins_trace_ptr_nobj_analysis.add_trace_to_global_pc_smptrace_hist(global_pc_smptrace_hist, pc_smptrace_hist)
        ins_trace_ptr_nobj_analysis.add_to_pc_stride_hist(pc_stride_hist, global_pc_stride_hist)

    print >> sys.stderr, "Starting trace analysis..."

    for delinq_load_addr in delinq_load_address_list:

        cfg = disassm.get_func_disassm(conf.exec_file, delinq_load_addr)
    
        if not (cfg.ins_tags_dict[delinq_load_addr] == 'Read' or cfg.ins_tags_dict[delinq_load_addr] == 'Write'):
            continue

        print >> sys.stderr, "Sample frequency %lx: %lf"%(delinq_load_addr, float(len(pc_smptrace_hist.keys()))/float(conf.num_samples))

        (pointer_update_addr_dict, pointer_update_time_dict, time_to_update_dict, delinq_loads_till_update, delinq_loads_till_use, all_BBs_in_loop, is_ind, stride) = ins_trace_ptr_nobj_analysis.detect_pointer_chasing(global_pc_smptrace_hist, global_pc_stride_hist, delinq_load_addr, None, cfg, conf)

#        analyze_pointer_prefetch(pointer_update_addr_dict, pointer_update_time_dict, time_to_update_dict, delinq_load_addr, delinq_loads_till_update, delinq_loads_till_use, all_BBs_in_loop, cfg, conf, is_ind, stride)

        analyze_pointer_prefetch(pointer_update_addr_dict, [], pointer_update_time_dict, time_to_update_dict, delinq_load_addr, delinq_loads_till_update, delinq_loads_till_use, all_BBs_in_loop, cfg, conf, is_ind, stride)

    decide_prefetch_schedules(cfg, conf)
    print_indirect_prefetch_decisions(conf)