def _LoadSectionDataDict(out_path, section_info): """Constructs a dict of area name : BDStore of that area's data sections.""" if FLAGS.GetFlag("debug_level"): print("Loading section data...") res = defaultdict(lambda: bd.BDStore(big_endian=True)) areas = list(section_info.area.unique()) for (index, row) in section_info.iterrows(): if row["type"] != "data": continue if row["area"] == "_main": # Include data sections from _main in all areas' BDStores. for area in areas: path = out_path / "sections/_main" / ("%02d.raw" % row["id"]) res[area].RegisterFile(path, offset=int(row["ram_start"], 16)) else: # Otherwise, include only in this area's BDStore. area = row["area"] path = out_path / "sections/rel_linked" / ( "%s/%02d.raw" % (area, row["id"])) res[area].RegisterFile(path, offset=int(row["ram_start"], 16)) return res
def _ProcessDol(filepath): def _CreateSectionDf(id, name, section_info, columns): file_start = section_info[id][0] ram_start = section_info[id][1] size = section_info[id][2] return pd.DataFrame( [["_main", id, name, "text" if id < 7 else "data", file_start, file_start + size, ram_start, ram_start + size, size]], columns=columns) def _CreateBssDf(id, name, section_info, columns, bss_end=None): ram_start = section_info[id][1] + section_info[id][2] ram_end = bss_end if bss_end else section_info[id + 1][1] size = ram_end - ram_start return pd.DataFrame( [["_main", id + 90, name, "bss", np.nan, np.nan, ram_start, ram_end, size]], columns=columns) if FLAGS.GetFlag("debug_level"): print("Processing _main DOL at %s..." % str(filepath)) store = bd.BDStore(big_endian=True) store.RegisterFile(filepath, offset=0) view = store.view(0) # Put together list of (file_start, ram_start, size) tuples. sections = [None for x in range(18)] for x in range(18): sections[x] = (view.ru32(0), view.ru32(0x48), view.ru32(0x90)) view = view.at(4) # Get start / end / size of .bss range. view = store.view(0) bss_start = view.ru32(0xd8) bss_size = view.ru32(0xdc) bss_end = bss_start + bss_size # Output DOL in its entirety, and its individual sections. f = open(_GetOutputPath("_main.dol"), "wb") f.write(store.mem[0].data) f.close() for id in range(18): if sections[id][2]: # size > 0 f = open(_GetOutputPath("sections/_main/%02d.raw" % id), "wb") f.write(view.rbytes(sections[id][2], sections[id][0])) f.close() # Construct DataFrame of DOL section info. columns = [ "area", "id", "name", "type", "file_start", "file_end", "ram_start", "ram_end", "size"] dfs = [] dfs.append(_CreateSectionDf(0, ".init", sections, columns)) dfs.append(_CreateSectionDf(1, ".text", sections, columns)) dfs.append(_CreateSectionDf(7, ".ctors", sections, columns)) dfs.append(_CreateSectionDf(8, ".dtors", sections, columns)) dfs.append(_CreateSectionDf(9, ".rodata", sections, columns)) dfs.append(_CreateSectionDf(10, ".data", sections, columns)) dfs.append(_CreateBssDf(10, ".bss", sections, columns)) dfs.append(_CreateSectionDf(11, ".sdata", sections, columns)) dfs.append(_CreateBssDf(11, ".sbss", sections, columns)) dfs.append(_CreateSectionDf(12, ".sdata2", sections, columns)) dfs.append(_CreateBssDf(12, ".sbss2", sections, columns, bss_end=bss_end)) df = pd.concat(dfs, ignore_index=True) df = df.set_index(["area", "id", "name", "type"]) return df
def _ProcessRel(area, filepath, link_address): def _CreateSectionDf(id, name, area, link_address, columns, section_tbl): file_start = section_tbl.ru32(8 * id) & ~3 size = section_tbl.ru32(8 * id + 4) type = "data" if id > 3 else "text" if file_start == 0: type = "bss" file_start = np.nan file_end = np.nan ram_start = np.nan ram_end = np.nan else: ram_start = file_start + link_address file_end = file_start + size ram_end = ram_start + size if not link_address: ram_start = np.nan ram_end = np.nan return pd.DataFrame( [[area, id, name, type, file_start, file_end, ram_start, ram_end, size]], columns=columns) def _OutputSections(area, store, linked_folder_name): f = open(_GetOutputPath("%s/%s.rel" % (linked_folder_name, area)), "wb") f.write(store.mem[0].data) f.close() section_tbl = store.view(0)[0x10] for id in range(1, 7): file_offset = section_tbl.ru32(8 * id) & ~3 size = section_tbl.ru32(8 * id + 4) if size and id < 6: f = open(_GetOutputPath("sections/%s/%s/%02d.raw" % (linked_folder_name, area, id)), "wb") f.write(store.view(0).rbytes(size, file_offset)) f.close() if FLAGS.GetFlag("debug_level"): print("Processing %s REL at %s..." % (area, filepath)) store = bd.BDStore(big_endian=True) store.RegisterFile(filepath, offset=0) section_tbl = store.view(0)[0x10] # Output REL and its sections, unlinked and linked. _OutputSections(area, store, linked_folder_name="rel_unlinked") if link_address: _LinkRel(store, link_address) _OutputSections(area, store, linked_folder_name="rel_linked") # Construct DataFrame of REL section info. columns = [ "area", "id", "name", "type", "file_start", "file_end", "ram_start", "ram_end", "size"] dfs = [] for (id, name) in { 1: ".text", 2: ".ctors", 3: ".dtors", 4: ".rodata", 5: ".data", 6: ".bss" }.items(): dfs.append(_CreateSectionDf( id, name, area, link_address, columns, section_tbl)) df = pd.concat(dfs, ignore_index=True) df = df.set_index(["area", "id", "name", "type"]) return df
def _CombineRels(rel_pattern_str, symbol_table): def _CreateExportedTableFormat(row): """Converts a symbol table row to exported format.""" return pd.DataFrame([[ "custom", row["sec_id"], "%08x" % row["out_offset"], row["name"], row["namespace"], "%08x" % row["size"], row["align"] ]], columns=[ "area", "sec_id", "sec_offset", "name", "namespace", "size", "align" ]) def _CreateLookupRowWithOutput(row, out_offset): """Takes the input symbol_info row and adds the out_offset.""" return pd.DataFrame([[ row["area"], row["sec_id"], row["sec_offset"], row["sec_offset_end"], row["name"], row["namespace"], row["size"], row["align"], out_offset ]], columns=[ "area", "sec_id", "sec_offset", "sec_offset_end", "name", "namespace", "size", "align", "out_offset" ]) def _LookupNewOffset(symbol_table, area, sec_id, sec_offset): """Returns the new offset corresponding to the old one if one exists. If the old offset corresponds to a symbol not in symbol_table, returns None, assuming that symbol won't be included in the combined REL. Raises an error if there are multiple matches in symbol_table.""" matches = symbol_table[(symbol_table.area == area) & (symbol_table.sec_id == sec_id) & (symbol_table.sec_offset <= sec_offset) & (symbol_table.sec_offset_end > sec_offset)] if matches.shape[0] < 1: return None if matches.shape[0] > 1: raise CombineRelsError("Ambiguous symbol match at %s:%d:%08x." % (area, sec_id, sec_offset)) # Return the new offset + how far into the symbol the old offset is. return (matches["out_offset"].iloc[0] + sec_offset - matches["sec_offset"].iloc[0]) def _WriteToBuffer(buffer, offset, value, size): """Writes an integer value of the given size to a buffer.""" for x in range(size): buffer[offset + x] = ((value >> ((size - 1 - x) * 8)) & 0xff) def _AppendToBuffer(buffer, value, size): """Writes an integer value of the given size to the end of a buffer.""" for x in range(size): buffer.append((value >> ((size - 1 - x) * 8)) & 0xff) # Buffers for each section's raw, unlinked data (for .bss, track the length) section_data = [[] for x in range(6)] bss_length = 0 # Copy all symbols into their respective sections' buffers, and create # a new lookup DataFrame including the output locations. # (For .bss data, keep track of the total length of the combined sections.) dfs = [] for area in sorted(symbol_table.area.unique()): if FLAGS.GetFlag("debug_level"): print("Processing %s symbols..." % area) store = bd.BDStore(big_endian=True) store.RegisterFile(rel_pattern_str.replace("*", area), offset=0) section_tbl = store.view(0)[0x10] for (index, row) in symbol_table.iterrows(): if row["area"] == area: # Add symbol's data to its respective section. if row["sec_id"] == 6: # Pad to alignment to find start point of next symbol. while bss_length % 8 != row["sec_offset"] % 8: bss_length += 1 out_offset = bss_length # Add size of symbol to bss_length (bss data is not stored). bss_length += row["size"] else: id = row["sec_id"] if id > 1: # If not .text section (fixed-alignment of 4), pad to # alignment to find the start point of next symbol. while len( section_data[id]) % 8 != row["sec_offset"] % 8: section_data[id].append(0) out_offset = len(section_data[id]) # Copy bytes from original symbol into section_data. file_offset = section_tbl.ru32(8 * id) & ~3 for b in store.view(0).rbytes( row["size"], file_offset + row["sec_offset"]): section_data[id].append(b) # Add the new location of the symbol to a new table. dfs.append(_CreateLookupRowWithOutput(row, out_offset)) # Join all lookup rows with output offsets, and order lexicographically. df = pd.concat(dfs, ignore_index=True) symbol_table = df.sort_values(by=["area", "sec_id", "sec_offset"]) if FLAGS.GetFlag("debug_level"): print("REL symbols extracted; %d symbols processed." % symbol_table.shape[0]) # Buffers for each section's relocatable data (against the same REL). rel_data = [[] for x in range(6)] rel_offsets = [0 for x in range(6)] # Buffers for each section's relocatable data (against the main DOL). rel_data_main = [[] for x in range(6)] rel_offsets_main = [0 for x in range(6)] # Port all needed relocation information from the original .REL files. for area in sorted(symbol_table.area.unique()): if FLAGS.GetFlag("debug_level"): print("Processing %s relocation tables..." % area) store = bd.BDStore(big_endian=True) store.RegisterFile(rel_pattern_str.replace("*", area), offset=0) header = store.view(0) imp_table = header[0x28] imp_size = header.ru32(0x2c) imp_offset = 0 while imp_offset < imp_size: module_id = imp_table.ru32(imp_offset) rel_table = imp_table[imp_offset + 4] imp_offset += 8 current_section = 0 section_offset = 0 while True: offset = rel_table.ru16(0) type = rel_table.ru8(2) section = rel_table.ru8(3) addend = rel_table.ru32(4) # Advance to next relocation entry. rel_table = rel_table.at(8) if type == 203: # End of rel table. break elif type == 202: # Section change rel entry. current_section = section section_offset = 0 else: # Assuming no symbols in .bss have linkage; need to add # specific support for that if I'm mistaken. if current_section > 5: raise CombineRelsError("Linking to section 6!") section_offset += offset new_offset = _LookupNewOffset(symbol_table, area, current_section, section_offset) # Not used in combined REL; move to next entry. if new_offset == None: continue # Look up the linked-to symbol address in the combined REL. # (if module_id == 0, i.e. linking to DOL, just use addend.) if module_id != 0: addend = _LookupNewOffset(symbol_table, area, section, addend) # If an address could not be found, a dependency must have # been missing from the input symbol_table; for shame. if addend == None: raise CombineRelsError( "Symbol missing dependency at %s:%d:%08x." % (area, current_section, section_offset)) # Update the current offset for the given section/imp. if module_id == 0: offset = new_offset - rel_offsets_main[current_section] rel_offsets_main[current_section] = new_offset else: offset = new_offset - rel_offsets[current_section] rel_offsets[current_section] = new_offset # Add this relocation to the respective table. rel_table_buffer = (rel_data_main[current_section] if module_id == 0 else rel_data[current_section]) _AppendToBuffer(rel_table_buffer, offset, 2) _AppendToBuffer(rel_table_buffer, type, 1) _AppendToBuffer(rel_table_buffer, section, 1) _AppendToBuffer(rel_table_buffer, addend, 4) if FLAGS.GetFlag("debug_level"): print("Relocation tables processed.") # Construct the final REL from the buffers put together beforehand. rel = [] # REL header. _AppendToBuffer(rel, 40, 4) # id (arbitrary custom value) _AppendToBuffer(rel, 0, 4) # next _AppendToBuffer(rel, 0, 4) # prev _AppendToBuffer(rel, 15, 4) # numSections _AppendToBuffer(rel, 0x4c, 4) # sectionInfoOffset _AppendToBuffer(rel, 0, 4) # nameOffset _AppendToBuffer(rel, 0, 4) # nameSize _AppendToBuffer(rel, 3, 4) # version _AppendToBuffer(rel, bss_length, 4) # bssSize _AppendToBuffer(rel, 0, 4) # relOffset - will be filled in later. _AppendToBuffer(rel, 0, 4) # impOffset - will be filled in later. _AppendToBuffer(rel, 0x10, 4) # impSize _AppendToBuffer(rel, 0, 4) # prolog/epilog/unresolved/bssSection _AppendToBuffer(rel, 0, 4) # prolog offset _AppendToBuffer(rel, 0, 4) # epilog offset _AppendToBuffer(rel, 0, 4) # unresolved offset _AppendToBuffer(rel, 8, 4) # align _AppendToBuffer(rel, 8, 4) # bssAlign _AppendToBuffer(rel, 0, 4) # fixSize - will be filled in later. # Section table (initialize to 15 section table entries' worth of zeroes). for _ in range(15 * 8): rel.append(0) # Section 1 (text) section_start = len(rel) _WriteToBuffer(rel, 0x54, section_start | 1, 4) rel += section_data[1] _WriteToBuffer(rel, 0x58, len(rel) - section_start, 4) # Sections 2 and 3 (unused) _WriteToBuffer(rel, 0x5c, len(rel), 4) _WriteToBuffer(rel, 0x60, 4, 4) _AppendToBuffer(rel, 0, 4) _WriteToBuffer(rel, 0x64, len(rel), 4) _WriteToBuffer(rel, 0x68, 4, 4) _AppendToBuffer(rel, 0, 4) # Section 4 (rodata) while len(rel) % 8 != 0: rel.append(0) section_start = len(rel) _WriteToBuffer(rel, 0x6c, section_start, 4) rel += section_data[4] _WriteToBuffer(rel, 0x70, len(rel) - section_start, 4) # Section 5 (data) while len(rel) % 8 != 0: rel.append(0) section_start = len(rel) _WriteToBuffer(rel, 0x74, section_start, 4) rel += section_data[5] _WriteToBuffer(rel, 0x78, len(rel) - section_start, 4) # Section 6 (bss) _WriteToBuffer(rel, 0x7c, 0, 4) _WriteToBuffer(rel, 0x80, bss_length, 4) # Pad before imp table (not necessary, but easier to read in a hex editor.) while len(rel) % 8 != 0: rel.append(0) imp_table = len(rel) rel_table = len(rel) + 0x10 _WriteToBuffer(rel, 0x24, rel_table, 4) # relOffset _WriteToBuffer(rel, 0x28, imp_table, 4) # impOffset _WriteToBuffer(rel, 0x48, rel_table, 4) # fixSize # Reserve space for imp table. for _ in range(16): rel.append(0) # Copy REL -> REL relocation data. _WriteToBuffer(rel, imp_table, 40, 4) _WriteToBuffer(rel, imp_table + 4, len(rel), 4) for x in range(6): if len(rel_data[x]): _AppendToBuffer(rel, 0, 2) # offset _AppendToBuffer(rel, 202, 1) # type (change section) _AppendToBuffer(rel, x, 1) # section _AppendToBuffer(rel, 0, 4) # addend rel += rel_data[x] _AppendToBuffer(rel, 0, 2) # offset _AppendToBuffer(rel, 203, 1) # type (end of table) _AppendToBuffer(rel, 0, 1) # section _AppendToBuffer(rel, 0, 4) # addend # Copy REL -> DOL relocation data. _WriteToBuffer(rel, imp_table + 8, 0, 4) _WriteToBuffer(rel, imp_table + 12, len(rel), 4) for x in range(6): if len(rel_data_main[x]): _AppendToBuffer(rel, 0, 2) # offset _AppendToBuffer(rel, 202, 1) # type (change section) _AppendToBuffer(rel, x, 1) # section _AppendToBuffer(rel, 0, 4) # addend rel += rel_data_main[x] _AppendToBuffer(rel, 0, 2) # offset _AppendToBuffer(rel, 203, 1) # type (end of table) _AppendToBuffer(rel, 0, 1) # section _AppendToBuffer(rel, 0, 4) # addend # Export the final REL. out_rel = open(_GetOutputPath("custom.rel"), "wb") out_rel.write(bytes(rel)) # Export the table of symbol info. # TODO: Add column with file-relative offsets? dfs = [] for (index, row) in symbol_table.iterrows(): dfs.append(_CreateExportedTableFormat(row)) df = pd.concat(dfs, ignore_index=True) df = df.sort_values(by=["area", "sec_id", "sec_offset"]) df.to_csv(_GetOutputPath("custom_symbols.csv"), index=False)
def _AnnotateSymbols(symbols, section_info, out_path): def _AddSectionInfoFields(s, section_info): section = section_info.loc[(s["area"], s["sec_id"])] s["sec_name"] = section["name"] s["sec_type"] = section["type"] ram_addr = section["ram_start"] s["ram_addr"] = ("%08x" % (int(ram_addr, 16) + int(s["sec_offset"], 16)) if isinstance(ram_addr, str) and ram_addr else np.nan) file_addr = section["file_start"] s["file_addr"] = ("%08x" % (int(file_addr, 16) + int(s["sec_offset"], 16)) if isinstance(file_addr, str) and file_addr else np.nan) return s def _InferSymbolType(s, stores): # Not a data symbol. if s["sec_type"] != "data": return s # Symbol's section was not dumped, or out of range. section_lookup = "%s-%02d" % (s["area"], s["sec_id"]) if section_lookup not in stores: return s offset = int(s["sec_offset"], 16) if offset < 0: return s # Otherwise, infer the type and value of the symbol, if possible. view = stores[section_lookup].view(offset) (t, v) = _InferType(view, int(s["size"], 16), exact=True) if t: s["type"] = t s["value"] = v return s # Create a copy of the symbols DataFrame with the desired output columns. df = pd.DataFrame(symbols, columns=[ "area", "sec_id", "sec_offset", "sec_name", "sec_type", "ram_addr", "file_addr", "name", "namespace", "size", "align", "type", "value" ]) # Load previously dumped .DOL / .REL file sections into BDStores. stores = {} for sec_id in (0, 1, 7, 8, 9, 10, 11, 12): section_path = "sections/_main/%02d.raw" % sec_id store = bd.BDStore(big_endian=True) store.RegisterFile(out_path / section_path, offset=0) stores["_main-%02d" % sec_id] = store rels_dir = out_path / "sections/rel_linked" areas = [f.name for f in os.scandir(rels_dir) if f.is_dir()] for area in areas: for sec_id in range(1, 6): store = bd.BDStore(big_endian=True) store.RegisterFile(rels_dir / area / ("%02d.raw" % sec_id), offset=0) stores["%s-%02d" % (area, sec_id)] = store # Fill in remaining columns based on section_info and dumped sections. if FLAGS.GetFlag("debug_level"): print("Converting section offsets to ram/file addresses...") df = df.apply(lambda s: _AddSectionInfoFields(s, section_info), axis=1) if FLAGS.GetFlag("debug_level"): print("Inferring symbol types...") df = df.apply(lambda s: _InferSymbolType(s, stores), axis=1) # Output the final table of joined symbols. df.to_csv(out_path / "annotated_symbols.csv", index=False)