Exemple #1
0
def _LoadSectionDataDict(out_path, section_info):
    """Constructs a dict of area name : BDStore of that area's data sections."""
    if FLAGS.GetFlag("debug_level"):
        print("Loading section data...")
        
    res = defaultdict(lambda: bd.BDStore(big_endian=True))
    areas = list(section_info.area.unique())
    for (index, row) in section_info.iterrows():
        if row["type"] != "data":
            continue
        if row["area"] == "_main":
            # Include data sections from _main in all areas' BDStores.
            for area in areas:
                path = out_path / "sections/_main" / ("%02d.raw" % row["id"])
                res[area].RegisterFile(path, offset=int(row["ram_start"], 16))
        else:
            # Otherwise, include only in this area's BDStore.
            area = row["area"]
            path = out_path / "sections/rel_linked" / (
                "%s/%02d.raw" % (area, row["id"]))
            res[area].RegisterFile(path, offset=int(row["ram_start"], 16))
    return res
Exemple #2
0
def _ProcessDol(filepath):
    def _CreateSectionDf(id, name, section_info, columns):
        file_start = section_info[id][0]
        ram_start = section_info[id][1]
        size = section_info[id][2]
        return pd.DataFrame(
            [["_main", id, name, "text" if id < 7 else "data", file_start, 
                file_start + size, ram_start, ram_start + size, size]], 
            columns=columns)
        
    def _CreateBssDf(id, name, section_info, columns, bss_end=None):
        ram_start = section_info[id][1] + section_info[id][2]
        ram_end = bss_end if bss_end else section_info[id + 1][1]
        size = ram_end - ram_start
        return pd.DataFrame(
            [["_main", id + 90, name, "bss", np.nan, np.nan,
                ram_start, ram_end, size]], 
            columns=columns)

    if FLAGS.GetFlag("debug_level"):
        print("Processing _main DOL at %s..." % str(filepath))
    
    store = bd.BDStore(big_endian=True)
    store.RegisterFile(filepath, offset=0)
    view = store.view(0)
    
    # Put together list of (file_start, ram_start, size) tuples.
    sections = [None for x in range(18)]
    for x in range(18):
        sections[x] = (view.ru32(0), view.ru32(0x48), view.ru32(0x90))
        view = view.at(4)
    # Get start / end / size of .bss range.
    view = store.view(0)
    bss_start = view.ru32(0xd8)
    bss_size = view.ru32(0xdc)
    bss_end = bss_start + bss_size
    
    # Output DOL in its entirety, and its individual sections.
    f = open(_GetOutputPath("_main.dol"), "wb")
    f.write(store.mem[0].data)
    f.close()
    for id in range(18):
        if sections[id][2]:  # size > 0
            f = open(_GetOutputPath("sections/_main/%02d.raw" % id), "wb")
            f.write(view.rbytes(sections[id][2], sections[id][0]))
            f.close()
    
    # Construct DataFrame of DOL section info.
    columns = [
        "area", "id", "name", "type",
        "file_start", "file_end", "ram_start", "ram_end", "size"]
    dfs = []
    dfs.append(_CreateSectionDf(0, ".init", sections, columns))
    dfs.append(_CreateSectionDf(1, ".text", sections, columns))
    dfs.append(_CreateSectionDf(7, ".ctors", sections, columns))
    dfs.append(_CreateSectionDf(8, ".dtors", sections, columns))
    dfs.append(_CreateSectionDf(9, ".rodata", sections, columns))
    dfs.append(_CreateSectionDf(10, ".data", sections, columns))
    dfs.append(_CreateBssDf(10, ".bss", sections, columns))
    dfs.append(_CreateSectionDf(11, ".sdata", sections, columns))
    dfs.append(_CreateBssDf(11, ".sbss", sections, columns))
    dfs.append(_CreateSectionDf(12, ".sdata2", sections, columns))
    dfs.append(_CreateBssDf(12, ".sbss2", sections, columns, bss_end=bss_end))
    df = pd.concat(dfs, ignore_index=True)
    df = df.set_index(["area", "id", "name", "type"])
    return df
Exemple #3
0
def _ProcessRel(area, filepath, link_address):
    def _CreateSectionDf(id, name, area, link_address, columns, section_tbl):
        file_start = section_tbl.ru32(8 * id) & ~3
        size = section_tbl.ru32(8 * id + 4)
        type = "data" if id > 3 else "text"
        if file_start == 0:
            type = "bss"
            file_start = np.nan
            file_end = np.nan
            ram_start = np.nan
            ram_end = np.nan
        else:
            ram_start = file_start + link_address
            file_end = file_start + size
            ram_end = ram_start + size
        if not link_address:
            ram_start = np.nan
            ram_end = np.nan
        return pd.DataFrame(
            [[area, id, name, type, file_start, file_end,
                ram_start, ram_end, size]], 
            columns=columns)
            
    def _OutputSections(area, store, linked_folder_name):
        f = open(_GetOutputPath("%s/%s.rel" % (linked_folder_name, area)), "wb")
        f.write(store.mem[0].data)
        f.close()
        
        section_tbl = store.view(0)[0x10]
        for id in range(1, 7):
            file_offset = section_tbl.ru32(8 * id) & ~3
            size = section_tbl.ru32(8 * id + 4)
            if size and id < 6:
                f = open(_GetOutputPath("sections/%s/%s/%02d.raw" 
                    % (linked_folder_name, area, id)), "wb")
                f.write(store.view(0).rbytes(size, file_offset))
                f.close()

    if FLAGS.GetFlag("debug_level"):
        print("Processing %s REL at %s..." % (area, filepath))
    
    store = bd.BDStore(big_endian=True)
    store.RegisterFile(filepath, offset=0)
    section_tbl = store.view(0)[0x10]
    
    # Output REL and its sections, unlinked and linked.
    _OutputSections(area, store, linked_folder_name="rel_unlinked")
    if link_address:
        _LinkRel(store, link_address)
        _OutputSections(area, store, linked_folder_name="rel_linked")
    
    # Construct DataFrame of REL section info.
    columns = [
        "area", "id", "name", "type",
        "file_start", "file_end", "ram_start", "ram_end", "size"]
    dfs = []
    for (id, name) in {
        1: ".text", 2: ".ctors", 3: ".dtors", 
        4: ".rodata", 5: ".data", 6: ".bss"
    }.items():
        dfs.append(_CreateSectionDf(
            id, name, area, link_address, columns, section_tbl))
    df = pd.concat(dfs, ignore_index=True)
    df = df.set_index(["area", "id", "name", "type"])
    
    return df
Exemple #4
0
def _CombineRels(rel_pattern_str, symbol_table):
    def _CreateExportedTableFormat(row):
        """Converts a symbol table row to exported format."""
        return pd.DataFrame([[
            "custom", row["sec_id"],
            "%08x" % row["out_offset"], row["name"], row["namespace"],
            "%08x" % row["size"], row["align"]
        ]],
                            columns=[
                                "area", "sec_id", "sec_offset", "name",
                                "namespace", "size", "align"
                            ])

    def _CreateLookupRowWithOutput(row, out_offset):
        """Takes the input symbol_info row and adds the out_offset."""
        return pd.DataFrame([[
            row["area"], row["sec_id"], row["sec_offset"],
            row["sec_offset_end"], row["name"], row["namespace"], row["size"],
            row["align"], out_offset
        ]],
                            columns=[
                                "area", "sec_id", "sec_offset",
                                "sec_offset_end", "name", "namespace", "size",
                                "align", "out_offset"
                            ])

    def _LookupNewOffset(symbol_table, area, sec_id, sec_offset):
        """Returns the new offset corresponding to the old one if one exists.
        
        If the old offset corresponds to a symbol not in symbol_table, returns
        None, assuming that symbol won't be included in the combined REL.
        Raises an error if there are multiple matches in symbol_table."""
        matches = symbol_table[(symbol_table.area == area)
                               & (symbol_table.sec_id == sec_id) &
                               (symbol_table.sec_offset <= sec_offset) &
                               (symbol_table.sec_offset_end > sec_offset)]
        if matches.shape[0] < 1:
            return None
        if matches.shape[0] > 1:
            raise CombineRelsError("Ambiguous symbol match at %s:%d:%08x." %
                                   (area, sec_id, sec_offset))
        # Return the new offset + how far into the symbol the old offset is.
        return (matches["out_offset"].iloc[0] + sec_offset -
                matches["sec_offset"].iloc[0])

    def _WriteToBuffer(buffer, offset, value, size):
        """Writes an integer value of the given size to a buffer."""
        for x in range(size):
            buffer[offset + x] = ((value >> ((size - 1 - x) * 8)) & 0xff)

    def _AppendToBuffer(buffer, value, size):
        """Writes an integer value of the given size to the end of a buffer."""
        for x in range(size):
            buffer.append((value >> ((size - 1 - x) * 8)) & 0xff)

    # Buffers for each section's raw, unlinked data (for .bss, track the length)
    section_data = [[] for x in range(6)]
    bss_length = 0

    # Copy all symbols into their respective sections' buffers, and create
    # a new lookup DataFrame including the output locations.
    # (For .bss data, keep track of the total length of the combined sections.)
    dfs = []
    for area in sorted(symbol_table.area.unique()):
        if FLAGS.GetFlag("debug_level"):
            print("Processing %s symbols..." % area)

        store = bd.BDStore(big_endian=True)
        store.RegisterFile(rel_pattern_str.replace("*", area), offset=0)
        section_tbl = store.view(0)[0x10]

        for (index, row) in symbol_table.iterrows():
            if row["area"] == area:
                # Add symbol's data to its respective section.
                if row["sec_id"] == 6:
                    # Pad to alignment to find start point of next symbol.
                    while bss_length % 8 != row["sec_offset"] % 8:
                        bss_length += 1
                    out_offset = bss_length
                    # Add size of symbol to bss_length (bss data is not stored).
                    bss_length += row["size"]
                else:
                    id = row["sec_id"]
                    if id > 1:
                        # If not .text section (fixed-alignment of 4), pad to
                        # alignment to find the start point of next symbol.
                        while len(
                                section_data[id]) % 8 != row["sec_offset"] % 8:
                            section_data[id].append(0)
                    out_offset = len(section_data[id])
                    # Copy bytes from original symbol into section_data.
                    file_offset = section_tbl.ru32(8 * id) & ~3
                    for b in store.view(0).rbytes(
                            row["size"], file_offset + row["sec_offset"]):
                        section_data[id].append(b)
                # Add the new location of the symbol to a new table.
                dfs.append(_CreateLookupRowWithOutput(row, out_offset))

    # Join all lookup rows with output offsets, and order lexicographically.
    df = pd.concat(dfs, ignore_index=True)
    symbol_table = df.sort_values(by=["area", "sec_id", "sec_offset"])

    if FLAGS.GetFlag("debug_level"):
        print("REL symbols extracted; %d symbols processed." %
              symbol_table.shape[0])

    # Buffers for each section's relocatable data (against the same REL).
    rel_data = [[] for x in range(6)]
    rel_offsets = [0 for x in range(6)]
    # Buffers for each section's relocatable data (against the main DOL).
    rel_data_main = [[] for x in range(6)]
    rel_offsets_main = [0 for x in range(6)]

    # Port all needed relocation information from the original .REL files.
    for area in sorted(symbol_table.area.unique()):
        if FLAGS.GetFlag("debug_level"):
            print("Processing %s relocation tables..." % area)

        store = bd.BDStore(big_endian=True)
        store.RegisterFile(rel_pattern_str.replace("*", area), offset=0)

        header = store.view(0)
        imp_table = header[0x28]
        imp_size = header.ru32(0x2c)
        imp_offset = 0
        while imp_offset < imp_size:
            module_id = imp_table.ru32(imp_offset)
            rel_table = imp_table[imp_offset + 4]
            imp_offset += 8

            current_section = 0
            section_offset = 0
            while True:
                offset = rel_table.ru16(0)
                type = rel_table.ru8(2)
                section = rel_table.ru8(3)
                addend = rel_table.ru32(4)
                # Advance to next relocation entry.
                rel_table = rel_table.at(8)

                if type == 203:
                    # End of rel table.
                    break
                elif type == 202:
                    # Section change rel entry.
                    current_section = section
                    section_offset = 0
                else:
                    # Assuming no symbols in .bss have linkage; need to add
                    # specific support for that if I'm mistaken.
                    if current_section > 5:
                        raise CombineRelsError("Linking to section 6!")

                    section_offset += offset
                    new_offset = _LookupNewOffset(symbol_table, area,
                                                  current_section,
                                                  section_offset)
                    # Not used in combined REL; move to next entry.
                    if new_offset == None:
                        continue
                    # Look up the linked-to symbol address in the combined REL.
                    # (if module_id == 0, i.e. linking to DOL, just use addend.)
                    if module_id != 0:
                        addend = _LookupNewOffset(symbol_table, area, section,
                                                  addend)
                    # If an address could not be found, a dependency must have
                    # been missing from the input symbol_table; for shame.
                    if addend == None:
                        raise CombineRelsError(
                            "Symbol missing dependency at %s:%d:%08x." %
                            (area, current_section, section_offset))
                    # Update the current offset for the given section/imp.
                    if module_id == 0:
                        offset = new_offset - rel_offsets_main[current_section]
                        rel_offsets_main[current_section] = new_offset
                    else:
                        offset = new_offset - rel_offsets[current_section]
                        rel_offsets[current_section] = new_offset
                    # Add this relocation to the respective table.
                    rel_table_buffer = (rel_data_main[current_section]
                                        if module_id == 0 else
                                        rel_data[current_section])
                    _AppendToBuffer(rel_table_buffer, offset, 2)
                    _AppendToBuffer(rel_table_buffer, type, 1)
                    _AppendToBuffer(rel_table_buffer, section, 1)
                    _AppendToBuffer(rel_table_buffer, addend, 4)

    if FLAGS.GetFlag("debug_level"):
        print("Relocation tables processed.")

    # Construct the final REL from the buffers put together beforehand.
    rel = []

    # REL header.
    _AppendToBuffer(rel, 40, 4)  # id (arbitrary custom value)
    _AppendToBuffer(rel, 0, 4)  # next
    _AppendToBuffer(rel, 0, 4)  # prev
    _AppendToBuffer(rel, 15, 4)  # numSections
    _AppendToBuffer(rel, 0x4c, 4)  # sectionInfoOffset
    _AppendToBuffer(rel, 0, 4)  # nameOffset
    _AppendToBuffer(rel, 0, 4)  # nameSize
    _AppendToBuffer(rel, 3, 4)  # version
    _AppendToBuffer(rel, bss_length, 4)  # bssSize
    _AppendToBuffer(rel, 0, 4)  # relOffset - will be filled in later.
    _AppendToBuffer(rel, 0, 4)  # impOffset - will be filled in later.
    _AppendToBuffer(rel, 0x10, 4)  # impSize
    _AppendToBuffer(rel, 0, 4)  # prolog/epilog/unresolved/bssSection
    _AppendToBuffer(rel, 0, 4)  # prolog offset
    _AppendToBuffer(rel, 0, 4)  # epilog offset
    _AppendToBuffer(rel, 0, 4)  # unresolved offset
    _AppendToBuffer(rel, 8, 4)  # align
    _AppendToBuffer(rel, 8, 4)  # bssAlign
    _AppendToBuffer(rel, 0, 4)  # fixSize - will be filled in later.

    # Section table (initialize to 15 section table entries' worth of zeroes).
    for _ in range(15 * 8):
        rel.append(0)
    # Section 1 (text)
    section_start = len(rel)
    _WriteToBuffer(rel, 0x54, section_start | 1, 4)
    rel += section_data[1]
    _WriteToBuffer(rel, 0x58, len(rel) - section_start, 4)
    # Sections 2 and 3 (unused)
    _WriteToBuffer(rel, 0x5c, len(rel), 4)
    _WriteToBuffer(rel, 0x60, 4, 4)
    _AppendToBuffer(rel, 0, 4)
    _WriteToBuffer(rel, 0x64, len(rel), 4)
    _WriteToBuffer(rel, 0x68, 4, 4)
    _AppendToBuffer(rel, 0, 4)
    # Section 4 (rodata)
    while len(rel) % 8 != 0:
        rel.append(0)
    section_start = len(rel)
    _WriteToBuffer(rel, 0x6c, section_start, 4)
    rel += section_data[4]
    _WriteToBuffer(rel, 0x70, len(rel) - section_start, 4)
    # Section 5 (data)
    while len(rel) % 8 != 0:
        rel.append(0)
    section_start = len(rel)
    _WriteToBuffer(rel, 0x74, section_start, 4)
    rel += section_data[5]
    _WriteToBuffer(rel, 0x78, len(rel) - section_start, 4)
    # Section 6 (bss)
    _WriteToBuffer(rel, 0x7c, 0, 4)
    _WriteToBuffer(rel, 0x80, bss_length, 4)
    # Pad before imp table (not necessary, but easier to read in a hex editor.)
    while len(rel) % 8 != 0:
        rel.append(0)

    imp_table = len(rel)
    rel_table = len(rel) + 0x10
    _WriteToBuffer(rel, 0x24, rel_table, 4)  # relOffset
    _WriteToBuffer(rel, 0x28, imp_table, 4)  # impOffset
    _WriteToBuffer(rel, 0x48, rel_table, 4)  # fixSize
    # Reserve space for imp table.
    for _ in range(16):
        rel.append(0)
    # Copy REL -> REL relocation data.
    _WriteToBuffer(rel, imp_table, 40, 4)
    _WriteToBuffer(rel, imp_table + 4, len(rel), 4)
    for x in range(6):
        if len(rel_data[x]):
            _AppendToBuffer(rel, 0, 2)  # offset
            _AppendToBuffer(rel, 202, 1)  # type (change section)
            _AppendToBuffer(rel, x, 1)  # section
            _AppendToBuffer(rel, 0, 4)  # addend
            rel += rel_data[x]
    _AppendToBuffer(rel, 0, 2)  # offset
    _AppendToBuffer(rel, 203, 1)  # type (end of table)
    _AppendToBuffer(rel, 0, 1)  # section
    _AppendToBuffer(rel, 0, 4)  # addend
    # Copy REL -> DOL relocation data.
    _WriteToBuffer(rel, imp_table + 8, 0, 4)
    _WriteToBuffer(rel, imp_table + 12, len(rel), 4)
    for x in range(6):
        if len(rel_data_main[x]):
            _AppendToBuffer(rel, 0, 2)  # offset
            _AppendToBuffer(rel, 202, 1)  # type (change section)
            _AppendToBuffer(rel, x, 1)  # section
            _AppendToBuffer(rel, 0, 4)  # addend
            rel += rel_data_main[x]
    _AppendToBuffer(rel, 0, 2)  # offset
    _AppendToBuffer(rel, 203, 1)  # type (end of table)
    _AppendToBuffer(rel, 0, 1)  # section
    _AppendToBuffer(rel, 0, 4)  # addend

    # Export the final REL.
    out_rel = open(_GetOutputPath("custom.rel"), "wb")
    out_rel.write(bytes(rel))

    # Export the table of symbol info.
    # TODO: Add column with file-relative offsets?
    dfs = []
    for (index, row) in symbol_table.iterrows():
        dfs.append(_CreateExportedTableFormat(row))
    df = pd.concat(dfs, ignore_index=True)
    df = df.sort_values(by=["area", "sec_id", "sec_offset"])
    df.to_csv(_GetOutputPath("custom_symbols.csv"), index=False)
def _AnnotateSymbols(symbols, section_info, out_path):
    def _AddSectionInfoFields(s, section_info):
        section = section_info.loc[(s["area"], s["sec_id"])]
        s["sec_name"] = section["name"]
        s["sec_type"] = section["type"]
        ram_addr = section["ram_start"]
        s["ram_addr"] = ("%08x" %
                         (int(ram_addr, 16) + int(s["sec_offset"], 16))
                         if isinstance(ram_addr, str) and ram_addr else np.nan)
        file_addr = section["file_start"]
        s["file_addr"] = ("%08x" %
                          (int(file_addr, 16) + int(s["sec_offset"], 16)) if
                          isinstance(file_addr, str) and file_addr else np.nan)
        return s

    def _InferSymbolType(s, stores):
        # Not a data symbol.
        if s["sec_type"] != "data":
            return s
        # Symbol's section was not dumped, or out of range.
        section_lookup = "%s-%02d" % (s["area"], s["sec_id"])
        if section_lookup not in stores:
            return s
        offset = int(s["sec_offset"], 16)
        if offset < 0:
            return s
        # Otherwise, infer the type and value of the symbol, if possible.
        view = stores[section_lookup].view(offset)
        (t, v) = _InferType(view, int(s["size"], 16), exact=True)
        if t:
            s["type"] = t
            s["value"] = v
        return s

    # Create a copy of the symbols DataFrame with the desired output columns.
    df = pd.DataFrame(symbols,
                      columns=[
                          "area", "sec_id", "sec_offset", "sec_name",
                          "sec_type", "ram_addr", "file_addr", "name",
                          "namespace", "size", "align", "type", "value"
                      ])

    # Load previously dumped .DOL / .REL file sections into BDStores.
    stores = {}
    for sec_id in (0, 1, 7, 8, 9, 10, 11, 12):
        section_path = "sections/_main/%02d.raw" % sec_id
        store = bd.BDStore(big_endian=True)
        store.RegisterFile(out_path / section_path, offset=0)
        stores["_main-%02d" % sec_id] = store

    rels_dir = out_path / "sections/rel_linked"
    areas = [f.name for f in os.scandir(rels_dir) if f.is_dir()]
    for area in areas:
        for sec_id in range(1, 6):
            store = bd.BDStore(big_endian=True)
            store.RegisterFile(rels_dir / area / ("%02d.raw" % sec_id),
                               offset=0)
            stores["%s-%02d" % (area, sec_id)] = store

    # Fill in remaining columns based on section_info and dumped sections.
    if FLAGS.GetFlag("debug_level"):
        print("Converting section offsets to ram/file addresses...")
    df = df.apply(lambda s: _AddSectionInfoFields(s, section_info), axis=1)

    if FLAGS.GetFlag("debug_level"):
        print("Inferring symbol types...")
    df = df.apply(lambda s: _InferSymbolType(s, stores), axis=1)

    # Output the final table of joined symbols.
    df.to_csv(out_path / "annotated_symbols.csv", index=False)