Beispiel #1
0
def extract_string(rom, charmap, max_length=None, expected_ptrs=None):
    """Extract characters from the given file until exhausted.

    This function will extract unti it reaches a terminal character, it will
    return annotated text. You may limit the maximum string length in bytes
    read with max_length.
    
    The expected_ptrs array lists the strings that are directly referenced by
    indexes elsewhere in the ROM. If provided, string extraction will continue
    until an expected pointer is reached. This allows for trash byte
    detection."""

    string = []
    read_chara = 0
    reading_trash = False
    is_last = expected_ptrs is None or rom.tell() >= expected_ptrs[-1]

    while True:
        if max_length is not None and read_chara >= max_length:
            break

        if reading_trash and rom.tell() in expected_ptrs:
            break

        next_chara = CHARA.unpack(rom.read(1))[0]
        read_chara += 1

        if next_chara < 0xE0 and next_chara in charmap[
                1]:  #Control codes are the E0 block
            string.append(charmap[1][next_chara])
        elif next_chara in reverse_specials:
            #This must be the work of an 「ENEMY STAND」
            this_special = specials[reverse_specials[next_chara]]
            string.append("«")
            string.append(reverse_specials[next_chara])

            if this_special.bts:
                fmt = "<" + ("", "B", "H")[this_special.bts]
                word = struct.unpack(fmt, rom.read(this_special.bts))[0]
                string.append(format_int(word))

            string.append("»")

            if this_special.end:
                first_read = False
                break
        elif next_chara == 0xE0 and (is_last or rom.tell() in expected_ptrs):
            #End of string
            break
        else:
            if next_chara == 0xE0:
                reading_trash = True

            #Literal specials
            string.append("«")
            string.append(format_int(next_chara))
            string.append("»")

    return "".join(string)
Beispiel #2
0
def rip_msprite(rom, offset=None):
    """Given a ROM and an offset, decode the metasprite data and return an array
    of sprite configuration data."""

    cloc = rom.tell()
    if offset is not None:
        rom.seek(offset)

    msprite = []

    msprite_len = CHARA.unpack(rom.read(1))[0]

    for i in range(0, msprite_len):
        oam_spr = {}

        oam_spr["y"] = CHARA.unpack(rom.read(1))[0]
        oam_spr["x"] = CHARA.unpack(rom.read(1))[0]
        oam_spr["tile"] = CHARA.unpack(rom.read(1))[0]
        oam_spr["attrib_mode"] = CHARA.unpack(rom.read(1))[0]
        oam_spr["attribs"] = CHARA.unpack(rom.read(1))[0]

        oam_spr["attrib_mode"] = ATTRIBMODE_ENUM[oam_spr["attrib_mode"]]

        msprite.append(oam_spr)

    rom.seek(cloc)
    return msprite
Beispiel #3
0
def extract_metatable(rom, length, offset=None):
    """Extract the bank list (metatable) from the ROM.
    
    Length is required; there is currenly no autodetect for the table length.
    This function returns a list of banks.
    
    If offset is given, ROM will be read from that position. Your existing ROM
    position will be preserved."""

    if offset is None:
        offset = rom.tell()

    last = rom.tell()
    rom.seek(offset)

    ret_banks = []
    for i in range(length):
        ret_banks.append(CHARA.unpack(rom.read(1))[0])

    rom.seek(last)
    return ret_banks
Beispiel #4
0
def rip_msprite_mtable(rom, offset=0x094D, count=9):
    """Rip an entire ROM's metasprite data."""
    cloc = rom.tell()
    rom.seek(offset)

    #The metasprite metatable is oddly organized here.
    #Banks in one place, pointers in the other.
    banks = []
    ptrs = []

    for i in range(0, count):
        banks.append(CHARA.unpack(rom.read(1))[0])

    for i in range(0, count):
        ptrs.append(PTR.unpack(rom.read(2))[0])

    asmsrc = "SECTION \"MetaSprite metatable\", " + format_sectionaddr_rom(
        offset) + "\n"
    asmsrc += "MetaspriteBankMetatable::\n"

    for bank in banks:
        asmsrc += "    db BANK(MetaSprite_" + "{0:x}".format(bank) + ")\n"

    asmsrc += "MetaspriteAddressMetatable::\n"

    for bank, ptr in zip(banks, ptrs):
        asmsrc += "    dw MetaSprite_" + "{0:x}".format(bank) + "\n"

    asmsrc += "\n"

    files = {}
    for bank, ptr in zip(banks, ptrs):
        table_asmsrc, table_files = rip_msprite_table(rom, flat(bank, ptr))

        asmsrc += table_asmsrc + "\n"
        files.update(table_files)

    return (asmsrc, files)
Beispiel #5
0
def decompress_tilemap(rom, offset=None):
    """Decompress a compressed tilemap from a given ROM file.
    
    Return value is an array; first element is the decompressed data, second is
    the number of bytes read from the tilemap. The data is in the form of an
    integer array from 0 to 255, these correspond to tile IDs.
    
    If offset is given, ROM will be read from that position. Your existing ROM
    position will be preserved."""

    if offset is None:
        offset = rom.tell()

    last = rom.tell()
    rom.seek(offset)

    decomp_mapping = []
    decomp_row = []
    comp_length = 0

    next_cmd = CHARA.unpack(rom.read(1))[0]
    comp_length += 1

    if next_cmd & 0x03 == 0:
        #Uncompressed data
        while True:
            next_cmd = CHARA.unpack(rom.read(1))[0]
            comp_length += 1

            if next_cmd == 0xFF:
                break
            elif next_cmd == 0xFE:
                #Newline
                decomp_mapping.append(decomp_row)
                decomp_row = []
            else:
                #Literal tile value
                decomp_row.append(next_cmd)

        #Append the last row
        decomp_mapping.append(decomp_row)

    elif next_cmd != 0xFF:
        #Compressed data
        while True:
            next_cmd = CHARA.unpack(rom.read(1))[0]
            comp_length += 1

            if next_cmd == 0xFF:
                break

            cmd = (next_cmd & 0xC0) >> 6
            count = next_cmd & 0x3F

            if cmd == 3:
                #DecBytes
                dat = CHARA.unpack(rom.read(1))[0]
                comp_length += 1

                for i in range(count + 2):
                    decomp_row.append((dat - i) % 0xFF)
            elif cmd == 2:
                #IncBytes
                dat = CHARA.unpack(rom.read(1))[0]
                comp_length += 1

                for i in range(count + 2):
                    decomp_row.append((dat + i) % 0xFF)
            elif cmd == 1:
                #RepeatBytes
                dat = CHARA.unpack(rom.read(1))[0]
                comp_length += 1

                for i in range(count + 2):
                    decomp_row.append((dat) % 0xFF)
            else:
                #CopyBytes
                for i in range(count + 1):
                    dat = CHARA.unpack(rom.read(1))[0]
                    comp_length += 1

                    decomp_row.append((dat) % 0xFF)

        #Split decomp_row into 32-byte rows.
        #Interestingly enough the design of the decompressor prohibits fully
        #compressed graphics from having shorter rows - there's no newline
        #command nor any way to skip bytes.
        for i in range(0, len(decomp_row), 32):
            decomp_mapping.append(decomp_row[i:i + 32])

    rom.seek(last)

    return decomp_mapping, comp_length
Beispiel #6
0
def extract(args):
    charmap = parse_charmap(args.charmap)
    banknames = parse_bank_names(args.banknames)
    banknames = extract_metatable_from_rom(args.rom, charmap, banknames, args)

    with open(args.rom, 'rb') as rom:
        for bank in banknames:
            wikitext = ["{|", "|-", "!Pointer", "!" + args.language]
            csvdata = [["Pointer", args.language]]

            rom.seek(flat(bank["basebank"], bank["baseaddr"]))

            addr = bank["baseaddr"]
            end = 0x8000

            #Autodetect the end/length of the table by finding the lowest
            #pointer that isn't stored after an existing pointer
            while addr < end:
                next_ptr = PTR.unpack(rom.read(2))[0]

                #Reject obviously invalid pointers
                if (next_ptr < addr or next_ptr > 0x7FFF):
                    break

                end = min(end, next_ptr)
                addr += 2

            tbl_length = (addr - bank["baseaddr"]) // 2

            #Actually extract our strings
            string = []

            #Stores the actual end of the last string, used for alias detection
            last_start = 0xFFFF
            last_end = 0xFFFF
            last_nonaliasing_row = -1

            #Also store if a redirected/overflowed row is being extracted
            redirected = False
            old_loc = None

            for i in range(tbl_length):
                csvrow = [
                    "0x{0:x}".format(
                        flat(bank["basebank"], bank["baseaddr"] + i * 2))
                ]
                wikitext.append("|-")
                wikitext.append("|0x{0:x}".format(
                    flat(bank["basebank"], bank["baseaddr"] + i * 2)))

                rom.seek(flat(bank["basebank"], bank["baseaddr"] + i * 2))
                read_ptr = PTR.unpack(rom.read(2))[0]

                #Attempt to autodetect "holes" in the text data.
                next_ptr = PTR.unpack(rom.read(2))[0]
                expected_length = next_ptr - read_ptr
                if i >= tbl_length - 1:
                    expected_length = -1  #maximum length by far

                #Two different alias detects:

                #First, we try to see if this pointer matches another pointer
                #in the table.
                rom.seek(flat(bank["basebank"], bank["baseaddr"]))
                for j in range(i):
                    if read_ptr == PTR.unpack(rom.read(2))[0]:
                        #Aliased pointer!
                        csvrow.append("<ALIAS ROW 0x{0:x}>".format(j))
                        wikitext.append("|«ALIAS ROW 0x{0:x}»".format(j))
                        print(
                            "Pointer at 0x{0:x} fully aliases pointer 0x{1:x}".
                            format(
                                flat(bank["basebank"],
                                     bank["baseaddr"] + i * 2),
                                flat(bank["basebank"],
                                     bank["baseaddr"] + j * 2)))
                        break
                else:
                    #Second, we try to see if this pointer is in the middle of
                    #the last string.

                    #This alias detection breaks when the previous row uses the
                    #overflow code, so disable it if so.
                    if i > 0 and read_ptr < last_end - 1 and not redirected:
                        print(
                            "Pointer at 0x{0:x} partially aliases previous pointer"
                            .format(rom.tell() - 2))
                        csvrow.append(
                            "<ALIAS ROW 0x{0:x} INTO 0x{1:x}>".format(
                                last_nonaliasing_row, read_ptr - last_start))
                        wikitext.append(
                            "|«ALIAS ROW 0x{0:x} INTO 0x{1:x}»".format(
                                last_nonaliasing_row, read_ptr - last_start))
                        continue

                    read_length = 1
                    first_read = True
                    rom.seek(flat(bank["basebank"], read_ptr))

                    #Now we can initialize these...
                    redirected = False
                    old_loc = None

                    while (rom.tell() % 0x4000 < 0x3FFF or rom.tell() == flat(
                            bank["basebank"], bank["baseaddr"])):
                        next_chara = CHARA.unpack(rom.read(1))[0]
                        while (rom.tell() % 0x4000 < 0x3FFF or rom.tell() ==
                               flat(bank["basebank"], bank["baseaddr"])) and (
                                   read_length <= expected_length or first_read
                                   or redirected
                               ) and next_chara != 0xE0:  #E0 is end-of-string
                            if next_chara < 0xE0 and next_chara in charmap[
                                    1]:  #Control codes are the E0 block
                                string.append(charmap[1][next_chara])
                            elif next_chara in reverse_specials and specials[
                                    reverse_specials[next_chara]].redirect:
                                #Redirecting opcodes are transparently removed from the extracted text.
                                this_special = specials[
                                    reverse_specials[next_chara]]

                                if this_special.bts:
                                    read_length += this_special.bts
                                    fmt = "<" + ("", "B",
                                                 "H")[this_special.bts]
                                    word = struct.unpack(
                                        fmt, rom.read(this_special.bts))[0]

                                    if word < 0x4000 or word > 0x7FFF:
                                        #Overflowing into RAM is illegal - use the jump opcode.
                                        #Overflowing into ROM0 is technically not illegal, but
                                        #unorthodox enough that we're going to disallow it.
                                        string.append(
                                            format_literal(this_special.byte))
                                        string.append(
                                            format_literal(
                                                word & 0xFF, charmap[1]))
                                        string.append(
                                            format_literal(
                                                word >> 8, charmap[1]))
                                    else:
                                        #We need to do this right now to avoid breaking hole detection
                                        old_loc = rom.tell()
                                        read_length = rom.tell() - flat(
                                            bank["basebank"], read_ptr)

                                        rom.seek(flat(args.overflow_bank,
                                                      word))
                                        redirected = True
                                else:
                                    raise RuntimeError(
                                        "Invalid specials dictionary. Redirecting special character is missing bts."
                                    )
                            elif next_chara in reverse_specials:
                                #This must be the work of an 「ENEMY STAND」
                                this_special = specials[
                                    reverse_specials[next_chara]]

                                if this_special.bts:
                                    read_length += this_special.bts
                                    fmt = "<" + ("", "B",
                                                 "H")[this_special.bts]
                                    word = struct.unpack(
                                        fmt, rom.read(this_special.bts))[0]
                                    string.append(
                                        format_control_code(
                                            reverse_specials[next_chara],
                                            word))
                                else:
                                    string.append(
                                        format_control_code(
                                            reverse_specials[next_chara]))

                                if this_special.end:
                                    first_read = False
                                    break
                            #elif next_chara == 0xE2:
                            #Literal newline
                            #    string.append(u"\n")
                            else:
                                #Literal specials
                                string.append(format_literal(next_chara))

                            next_chara = CHARA.unpack(rom.read(1))[0]

                            #Explicitly stop updating read_length if the
                            #overflow opcode is used. Otherwise we'd think we
                            #read thousands or negative thousands of chars
                            if not redirected:
                                read_length = rom.tell() - flat(
                                    bank["basebank"], read_ptr)

                        #After the main extraction loop
                        if read_length >= expected_length:
                            break
                        else:
                            #Detect nulls (spaces) after the end of a string
                            #and append them to avoid creating a new pointer row
                            loc = rom.tell()
                            if redirected:
                                loc = old_loc

                            while CHARA.unpack(rom.read(1))[0] == charmap[0][
                                    " "] and read_length < expected_length:
                                string.append(" ")
                                loc += 1
                                read_length += 1

                            rom.seek(loc)  #cleanup

                            if read_length >= expected_length:
                                break
                            else:
                                #There's a hole in the ROM!
                                #Disassemble the next string.
                                print("Inaccessible data found at 0x{0:x}".
                                      format(flat(bank["basebank"], read_ptr)))

                                csvrow.append("".join(string))
                                wikitext.append("|" + "".join(string))
                                string = []

                                csvdata.append(csvrow)
                                csvrow = ["(No pointer)"]
                                wikitext.append("|-")
                                wikitext.append("|(No pointer)")

                                read_length += 1

                    csvrow.append("".join(string))
                    wikitext.append("|" + "".join(string))
                    string = []

                    #Store the actual end pointer for later use.
                    last_start = read_ptr
                    last_end = read_ptr + read_length
                    last_nonaliasing_row = i

                csvdata.append(csvrow)

            wikitext.append("|-")
            wikitext.append("|}")

            wikitext = "\n".join(wikitext)

            wikidir = os.path.join(args.input, bank["basedir"])
            wikipath = os.path.join(args.input, bank["legacy_filename"])
            csvpath = os.path.join(args.input, bank["filename"])

            install_path(wikidir)
            #with open(wikipath, "w+", encoding="utf-8") as bank_wikitext:
            #bank_wikitext.write(wikitext)

            with open(csvpath, "w+", encoding="utf-8") as bank_csvtext:
                csvwriter = csv.writer(bank_csvtext)

                for csvrow in csvdata:
                    csvwriter.writerow(csvrow)