def get_lookup(agi): """Return a dict[map][opcode]->list""" map_names = ild_info.get_maps(agi) lookup = {} for insn_map in map_names: lookup[insn_map] = collections.defaultdict(list) return lookup
def get_constraints_lu_table(agi, ptrns_by_map_opcode, state_space, vexvalid, all_ops_widths): """ returns a tuple (cdict_by_map_opcode,cnames) cnames is a set of all constraint names used in patterns. cdict_by_map_opcode is a traditional 2D lookup dict from map,opcode to constraint_dict_t objects that represent the mapping from constraints values to different patterns of the corresponding (map,opcode,vexvalid) bin. These cdict objects can later be used for generating hash functions from constraint values to patterns (inums). """ maps = ild_info.get_maps(agi) cdict_by_map_opcode = collections.defaultdict(dict) cnames = set() for insn_map in maps: for opcode in range(0, 256): opcode = hex(opcode) ptrns = ptrns_by_map_opcode[insn_map][opcode] cdict = _get_united_cdict(ptrns, state_space, vexvalid, all_ops_widths) cdict_by_map_opcode[insn_map][opcode] = cdict if cdict: cnames = cnames.union(set(cdict.cnames)) return cdict_by_map_opcode,cnames
def __init__(self, is_amd=True, info_lookup=None): self.is_amd = is_amd if info_lookup == None: self.lookup = {} for insn_map in ild_info.get_maps(is_amd): self.lookup[insn_map] = collections.defaultdict(list) else: self.lookup = info_lookup
def gen_xed3(agi,ild_info,is_3dnow,ild_patterns, all_state_space,ild_gendir,all_ops_widths): all_cnames = set() ptrn_dict = {} maps = ild_info.get_maps(is_3dnow) for insn_map in maps: ptrn_dict[insn_map] = collections.defaultdict(list) for ptrn in ild_patterns: ptrn_dict[ptrn.insn_map][ptrn.opcode].append(ptrn) #FIXME:bad name vv_lu = {} #mapping between a operands to their look up function op_lu_map = {} for vv in sorted(all_state_space['VEXVALID'].keys()): #cdict is a 2D dictionary: #cdict[map][opcode] = ild_cdict.constraint_dict_t #each constraint_dict_t describes all the patterns that fall #into corresponding map-opcode #cnames is a set of all constraint names from the patterns #in the given vv space cdict,cnames = ild_cdict.get_constraints_lu_table(ptrn_dict, is_3dnow, all_state_space, vv, all_ops_widths) all_cnames = all_cnames.union(cnames) _msg("vv%s cnames: %s" % (vv,cnames)) #now generate the C hash functions for the constraint #dictionaries (ph_lu,lu_fo_list,operands_lu_list) = ild_cdict.gen_ph_fos( agi, cdict, is_3dnow, mbuild.join(ild_gendir, 'all_constraints_vv%s.txt' %vv), ptrn_dict, vv) #hold only one instance of each function for op in operands_lu_list : if op.function_name not in op_lu_map: op_lu_map[op.function_name] = op vv_lu[str(vv)] = (ph_lu,lu_fo_list) _msg("all cnames: %s" % all_cnames) #dump the hash functions and lookup tables for obtaining these #hash functions in the decode time ild_codegen.dump_vv_map_lookup(agi, vv_lu, is_3dnow, list(op_lu_map.values()), h_fn='xed3-phash.h') #xed3_nt.work generates all the functions and lookup tables for #dynamic decoding xed3_nt.work(agi, all_state_space, all_ops_widths, ild_patterns)
def _get_united_lookup(ptrn_list, is_3dnow): """ Combine storage obtained from grammar and from ILD storage @return: ild_info.storage_t object """ #build an ild_info_storage_t object from grammar from_grammar = get_info_storage(ptrn_list, 0, is_3dnow) #get an ild_info_storage_t object from ild python-based storage from_storage = ild_storage_data.gen_ild_info() #FIXME: should we make is_amd=(is_3dnow or from_storage.is_3dnow)? united_lookup = ild_storage.ild_storage_t(is_amd=is_3dnow) #unite the lookups, conflicts will be resolved by priority for insn_map in ild_info.get_maps(is_3dnow): for op in range(0, 256): ulist = (from_grammar.get_info_list(insn_map, hex(op)) + from_storage.get_info_list(insn_map, hex(op))) united_lookup.set_info_list(insn_map, hex(op), ulist) return united_lookup
def gen_xed3(agi, ild_info, ild_patterns, all_state_space, ild_gendir, all_ops_widths): all_cnames = set() ptrn_dict = {} # map,opcode -> pattern maps = ild_info.get_maps(agi) for insn_map in maps: ptrn_dict[insn_map] = collections.defaultdict(list) for ptrn in ild_patterns: ptrn_dict[ptrn.insn_map][ptrn.opcode].append(ptrn) #FIXME:bad name vv_lu = {} # vexvalid-space -> ( ph_lu, lu_fo_list) #mapping between a operands to their look up function op_lu_map = {} # func name -> function (for unique-ifying) for vv in sorted(all_state_space['VEXVALID'].keys()): #cdict is a 2D dictionary: #cdict[map][opcode] = ild_cdict.constraint_dict_t #Each constraint_dict_t describes all the patterns that fall #into corresponding map-opcode. #cnames is a set of all constraint names from the patterns #in the given vv space cdict,cnames = ild_cdict.get_constraints_lu_table(agi, ptrn_dict, all_state_space, vv, all_ops_widths) all_cnames = all_cnames.union(cnames) _msg("vv%s cnames: %s" % (vv,cnames)) constraints_log_file = mbuild.join(ild_gendir, 'all_constraints_vv%s.txt' %vv) #now generate the C hash functions for the constraint #dictionaries. # # ph_lu map from map,opcode -> hash fn name # lu_fo_list list of all phash fn objects # operands_lu_list list of operands lookup fns ph_lu, lu_fo_list, operands_lu_list = ild_cdict.gen_ph_fos(agi, cdict, constraints_log_file, ptrn_dict, vv) #hold only one instance of each function for op in operands_lu_list: if op.function_name not in op_lu_map: op_lu_map[op.function_name] = op vv_lu[str(vv)] = (ph_lu,lu_fo_list) _msg("all cnames: %s" % all_cnames) #dump the (a) hash functions and (b) lookup tables for obtaining #these hash functions (at decode time). ** Static decode ** ild_codegen.gen_static_decode(agi, vv_lu, list(op_lu_map.values()), h_fn='xed3-phash.h') #dec_dyn.work(...) generates all the functions and lookup tables for # ** Dynamic decode ** dec_dyn.work(agi, all_state_space, all_ops_widths, ild_patterns)
def gen_l1_functions_and_lookup(agi, united_lookup, disp_dict): """Compute L1(conflict resolution) functions list and disp_bytes lookup tables dict. @param agi: all generators info @param united_lookup: the 2D lookup by map-opcode to info objects list. united_lookup['0x0']['0x78'] == [ild_info1, ild_info2, ... ] @type united_lookup: {string(insn_map) : {string(opcode): [ild_info.ild_info_t]} } """ #list of L1 function objects that resolve conflicts in map-opcodes #functions. This list will be dumped to xed_ild_imm_l1.h l1_resolution_fos = [] #dictionary l1_lookup[insn_map][opcode] = l1_function_name #this dictionary will be used to dump the has_imm lookup tables l1_lookup = {} #dictionary from function body(as string) to list of function objects #with that body. #This dict will be used to bucket identical functions in order to #not define same functions more than once. l1_bucket_dict = collections.defaultdict(list) for insn_map in ild_info.get_maps(united_lookup.is_amd): l1_lookup[insn_map] = {} for opcode in range(0, 256): #look in the hardcoded resolution functions if (insn_map, hex(opcode)) in harcoded_res_functions: l1_fn = harcoded_res_functions[(insn_map, hex(opcode))] l1_lookup[insn_map][hex(opcode)] = l1_fn continue info_list = united_lookup.get_info_list(insn_map, hex(opcode)) #get only info objects with minimum priority info_list = ild_info.get_min_prio_list(info_list) is_conflict = _is_disp_conflict(info_list, disp_dict) if len(info_list) > 1 and is_conflict: l1_fo = _resolve_conflicts(agi, info_list, disp_dict) if not l1_fo: ildutil.ild_err( 'FAILED TO GENERATE L1 CONFLICT ' + 'RESOLUTION FUNCTION FOR DISP\n infos: %s' % "\n".join([str(info) for info in info_list])) l1_bucket_dict[l1_fo.emit_body()].append(l1_fo) l1_fn = l1_fo.function_name elif len(info_list) == 0: #if map-opcode pair is undefined the lookup function ptr is #NULL. #This will happen for opcodes like 0F in 0F map - totally #illegal opcodes, that should never be looked up in runtime. #We define NULL pointer for such map-opcodes l1_fn = '(%s)0' % (ildutil.l1_ptr_typename) else: #there are no conflicts, we can use L2 function as L1 info = info_list[0] l1_fn = get_l2_fn_from_info(info, disp_dict) l1_lookup[insn_map][hex(opcode)] = l1_fn #there are 18 L1 functions with same body (currently, may change #in future) #we are going to bucket L1 functions with identical body but different #names in order to have only one function for each unique body #FIXME: the bucketed function name is not self descriptive bucket_name = 'xed_lookup_function_DISP_BUCKET_%s_l1' cur_bucket = 0 for res_fun_list in list(l1_bucket_dict.values()): if len(res_fun_list) == 1: #only one such function - we should define it as is l1_resolution_fos.append(res_fun_list[0]) else: #more than one L1 function with identical body #we should define L1 function with that body #and fix references in the lookup table #the function name cur_buck_name = bucket_name % cur_bucket cur_bucket += 1 #fix references in the lookup table for res_fun in res_fun_list: for insn_map in list(l1_lookup.keys()): for opcode in list(l1_lookup[insn_map].keys()): cur_fn = l1_lookup[insn_map][opcode] if cur_fn == res_fun.function_name: l1_lookup[insn_map][opcode] = cur_buck_name #define the L1 function and add it to the list of L1 functions #to dump buck_fo = res_fun_list[0] buck_fo.function_name = cur_buck_name l1_resolution_fos.append(buck_fo) return l1_resolution_fos, l1_lookup
def dump_vv_map_lookup(agi, vv_lu, is_3dnow, op_lu_list, h_fn='xed3-phash.h'): phash_headers = [ 'xed-ild-eosz-getters.h', 'xed-ild-easz-getters.h', 'xed-internal-header.h', 'xed-ild-getters.h', 'xed-ild-private.h' ] maplu_headers = [] all_zero_by_map = {} for vv in sorted(vv_lu.keys()): (phash_map_lu, lu_fo_list) = vv_lu[vv] all_zero_by_map[vv] = _test_map_all_zero(vv, phash_map_lu) # dump a file w/prototypes and per-opcode functions pointed to # by the elements of the various 256-entry arrays. pheader = 'xed3-phash-vv{}.h'.format(vv) dump_flist_2_header(agi, pheader, ['xed3-operand-lu.h'], lu_fo_list) # dump 256-entry arrays for each (vv,map) map_lu_cfn = 'xed3-phash-lu-vv{}.c'.format(vv) map_lu_hfn = 'xed3-phash-lu-vv{}.h'.format(vv) maplu_headers.append(map_lu_hfn) name_pfx = 'xed3_phash_vv{}'.format(vv) elem_type = 'xed3_find_func_t' dump_lookup( agi, #dump the 256 entry array phash_map_lu, name_pfx, map_lu_cfn, [pheader], elem_type, output_dir=None, all_zero_by_map=all_zero_by_map[vv]) # dump a header with the decls for the 256-entry arrays or # #define NAME 0 for the empty arrays. h_file = agi.open_file(mbuild.join('include-private', map_lu_hfn), start=False) h_file.start() for insn_map in sorted(phash_map_lu.keys()): arr_name = _get_map_lu_name(name_pfx, insn_map) if all_zero_by_map[vv][insn_map]: #h_file.add_code("#define {} 0".format(arr_name)) pass else: h_file.add_code("extern const {} {}[256];".format( elem_type, arr_name)) h_file.close() #dump all the operand lookup functions in the list to a header file hdr = 'xed3-operand-lu.h' dump_flist_2_header(agi, hdr, phash_headers, op_lu_list, emit_bodies=False) dump_flist_2_header(agi, 'xed3-operand-lu.c', [hdr], op_lu_list, is_private=False, emit_headers=False) # write xed3-phash.h (top most thing) h_file = agi.open_file(mbuild.join('include-private', h_fn), start=False) for header in maplu_headers: h_file.add_header(header) h_file.start() maps = ild_info.get_maps(is_3dnow) vv_num = [int(x) for x in list(vv_lu.keys())] vv_index = max(vv_num) + 1 map_num = len(maps) arr_name = 'xed3_phash_lu' elem_type = 'xed3_find_func_t*' h_file.add_code('#define XED_PHASH_MAP_LIMIT {}'.format(map_num)) h_file.add_code('const {} {}[{}][XED_PHASH_MAP_LIMIT] = {{'.format( elem_type, arr_name, vv_index)) #vv is not sequential it may have holes for vv in range(vv_index): map_lus = [] #it's important that maps are correctly ordered for imap in maps: if vv in vv_num: if all_zero_by_map[str(vv)][imap]: arr_name = '0' else: arr_name = _get_map_lu_name('xed3_phash_vv%d' % vv, imap) else: arr_name = '0' map_lus.append(arr_name) vv_arr_name = '{' + ', '.join(map_lus) + '},' h_file.add_code(vv_arr_name) h_file.add_code('};') h_file.close()
def gen_static_decode(agi, vv_lu, op_lu_list, h_fn='xed3-phash.h'): """generate static decoder""" phash_headers = ['xed-ild-eosz-getters.h', 'xed-ild-easz-getters.h', 'xed-internal-header.h', 'xed-ild-private.h'] maplu_headers = [] all_zero_by_map = {} for vv in sorted(vv_lu.keys()): (phash_map_lu, lu_fo_list) = vv_lu[vv] all_zero_by_map[vv] = _test_map_all_zero(vv, phash_map_lu) # dump a file w/prototypes and per-opcode functions pointed to # by the elements of the various 256-entry arrays. pheader = 'xed3-phash-vv{}.h'.format(vv) dump_flist_2_header(agi, pheader, ['xed3-operand-lu.h'], lu_fo_list) # dump 256-entry arrays for each (vv,map) map_lu_cfn = 'xed3-phash-lu-vv{}.c'.format(vv) map_lu_hfn = 'xed3-phash-lu-vv{}.h'.format(vv) maplu_headers.append(map_lu_hfn) name_pfx = 'xed3_phash_vv{}'.format(vv) elem_type = 'xed3_find_func_t' dump_lookup(agi, #dump 256-entry arrays for maps in this encspace phash_map_lu, name_pfx, map_lu_cfn, [pheader], elem_type, output_dir=None, all_zero_by_map=all_zero_by_map[vv]) # dump a header with the decls for the 256-entry arrays or # #define NAME 0 for the empty arrays. h_file = agi.open_file(mbuild.join('include-private',map_lu_hfn), start=False) h_file.start() for insn_map in sorted(phash_map_lu.keys()): arr_name = _get_map_lu_name(name_pfx, insn_map) if all_zero_by_map[vv][insn_map]: #h_file.add_code("#define {} 0".format(arr_name)) pass else: h_file.add_code("extern const {} {}[256];".format( elem_type, arr_name)) h_file.close() #dump all the operand lookup functions in the list to a header file hdr = 'xed3-operand-lu.h' dump_flist_2_header(agi, hdr, phash_headers, op_lu_list, emit_bodies=False) dump_flist_2_header(agi, 'xed3-operand-lu.c', [hdr], op_lu_list, is_private=False, emit_headers=False) # write xed3-phash.h (top most thing). # # xed3-pash.h contains a table indexed by encoding-space & # decoding-map mapping to functions handling decoding that part of # the space. h_file = agi.open_file(mbuild.join('include-private',h_fn), start=False) for header in maplu_headers: h_file.add_header(header) h_file.start() maps = ild_info.get_maps(agi) vv_num = [ int(x) for x in vv_lu.keys() ] vv_max = max(vv_num) + 1 max_maps = ild_info.get_maps_max_id(agi) + 1 arr_name = 'xed3_phash_lu' h_file.add_code('#define XED_PHASH_MAP_LIMIT {}'.format(max_maps)) h_file.add_code('const xed3_find_func_t* {}[{}][XED_PHASH_MAP_LIMIT] = {{'.format( arr_name, vv_max)) for vv in range(0,vv_max): maps = ild_info.get_maps_for_space(agi,vv) dmap = {mi.map_id:mi for mi in maps} # dict indexed by map_id init_vals = ['0'] * max_maps for imap in range(0,max_maps): if imap in dmap: mi = dmap[imap] # if there are maps without instructions, then there # won't be top-level variables to look at for those # maps. if all_zero_by_map[str(vv)][mi.map_name]: init_vals[imap] = '0' else: init_vals[imap] = _get_map_lu_name( 'xed3_phash_vv{}'.format(vv), mi.map_name ) h_file.add_code('{{ {} }},'.format(', '.join(init_vals))) h_file.add_code('};') h_file.close()
def gen_ph_fos(agi, cdict_by_map_opcode, log_fn, ptrn_dict, vv): """ Returns a tuple (phash_lu, phash_fo_list, op_lu_list) * phash_lu: is a traditional 2D dict by (map, opcode) to a hash function name. * phash_fo_list: is a list of all phash function objects created (we might have fos that are not in lookup table - when we have 2-level hash functions). * op_lu_list: is a list for all the operands lookup functions Also writes log file for debugging. """ maps = ild_info.get_maps(agi) log_f = open(log_fn, 'w') cnames = set() # only for logging stats = { '0. #map-opcodes': 0, '1. #entries': 0, '2. #hentries': 0, '3. #hashes': 0, '4. #min_hashes': 0, '5. #cdict_size_1_to_10': 0, '6. #cdict_size_10_to_20': 0, '7. #cdict_size_20_to_100': 0, '8. #cdict_size_at_least_100': 0 } lu_fo_list = [] op_lu_map = {} # fn name -> fn obj phash_lu = {} # map, opcode -> fn name for insn_map in maps: phash_lu[insn_map] = {} zeros = 0 for opcode in range(0, 256): opcode = hex(opcode) cdict = cdict_by_map_opcode[insn_map][opcode] if cdict: stats['0. #map-opcodes'] += 1 stats['1. #entries'] += len(cdict.tuple2rule) cnames = cnames.union(set(cdict.cnames)) _log(log_f,'XYZ VV: {} MAP:{} OPCODE:{}:\n{}\n'.format( vv, insn_map, opcode, cdict)) phash = ild_phash.gen_hash(cdict) if phash: _log(log_f,"%s" % phash) phash_id = 'map%s_opcode%s_vv%d' % (insn_map, opcode, vv) fname = "%s_%s" % (_find_fn_pfx,phash_id) (fo_list, op_lu_fo) = phash.gen_find_fos(fname) lu_fo_list.extend(fo_list) #hold only one instance of each function if op_lu_fo: if op_lu_fo.function_name not in op_lu_map: op_lu_map[op_lu_fo.function_name] = op_lu_fo for fo in fo_list: _log(log_f,'//find function:\n') _log(log_f,fo.emit()) _log(log_f,'-----------------------------\n') #FIXME: assumption: L2 function is last in the list #maybe return dict or tuple to make a distinction between #L2 and L1 functions? phlu_fn = lu_fo_list[-1] phash_lu[insn_map][opcode] = phlu_fn.function_name phash.update_stats(stats) else: _log(log_f,'---NOPHASH-----\n') msg = "Failed to gen phash for map %s opcode %s" ildutil.ild_err(msg % (insn_map, opcode)) else: phash_lu[insn_map][opcode] = '(xed3_find_func_t)0' zeros = zeros + 1 if zeros == 256: # all zero... shortcut to avoid scanning maps for "all-zeros" _log(log_f, "ZEROING phash_lu for map {} vv {}\n".format(insn_map, vv)) phash_lu[insn_map] = None _log(log_f,"cnames: %s\n" %cnames) for key in sorted(stats.keys()): _log(log_f,"%s %s\n" % (key,stats[key])) log_f.close() return phash_lu, lu_fo_list, list(op_lu_map.values())