def extract_string(rom, charmap, max_length=None, expected_ptrs=None): """Extract characters from the given file until exhausted. This function will extract unti it reaches a terminal character, it will return annotated text. You may limit the maximum string length in bytes read with max_length. The expected_ptrs array lists the strings that are directly referenced by indexes elsewhere in the ROM. If provided, string extraction will continue until an expected pointer is reached. This allows for trash byte detection.""" string = [] read_chara = 0 reading_trash = False is_last = expected_ptrs is None or rom.tell() >= expected_ptrs[-1] while True: if max_length is not None and read_chara >= max_length: break if reading_trash and rom.tell() in expected_ptrs: break next_chara = CHARA.unpack(rom.read(1))[0] read_chara += 1 if next_chara < 0xE0 and next_chara in charmap[ 1]: #Control codes are the E0 block string.append(charmap[1][next_chara]) elif next_chara in reverse_specials: #This must be the work of an 「ENEMY STAND」 this_special = specials[reverse_specials[next_chara]] string.append("«") string.append(reverse_specials[next_chara]) if this_special.bts: fmt = "<" + ("", "B", "H")[this_special.bts] word = struct.unpack(fmt, rom.read(this_special.bts))[0] string.append(format_int(word)) string.append("»") if this_special.end: first_read = False break elif next_chara == 0xE0 and (is_last or rom.tell() in expected_ptrs): #End of string break else: if next_chara == 0xE0: reading_trash = True #Literal specials string.append("«") string.append(format_int(next_chara)) string.append("»") return "".join(string)
def rip_msprite(rom, offset=None): """Given a ROM and an offset, decode the metasprite data and return an array of sprite configuration data.""" cloc = rom.tell() if offset is not None: rom.seek(offset) msprite = [] msprite_len = CHARA.unpack(rom.read(1))[0] for i in range(0, msprite_len): oam_spr = {} oam_spr["y"] = CHARA.unpack(rom.read(1))[0] oam_spr["x"] = CHARA.unpack(rom.read(1))[0] oam_spr["tile"] = CHARA.unpack(rom.read(1))[0] oam_spr["attrib_mode"] = CHARA.unpack(rom.read(1))[0] oam_spr["attribs"] = CHARA.unpack(rom.read(1))[0] oam_spr["attrib_mode"] = ATTRIBMODE_ENUM[oam_spr["attrib_mode"]] msprite.append(oam_spr) rom.seek(cloc) return msprite
def spritecsv2bin(data): """Given an array representation of CSV source data, produce a string containing the binary representation of that source data.""" length_spr = len(data) - 1 bin = CHARA.pack(length_spr) for i in range(0, length_spr): bin += CHARA.pack(int(data[i + 1][0], 16)) bin += CHARA.pack(int(data[i + 1][1], 16)) bin += CHARA.pack(int(data[i + 1][2], 16)) bin += CHARA.pack(ATTRIBMODE_ENUM_REV[data[i + 1][3]]) bin += CHARA.pack(int(data[i + 1][4], 16)) return bin
def extract_metatable(rom, length, offset=None): """Extract the bank list (metatable) from the ROM. Length is required; there is currenly no autodetect for the table length. This function returns a list of banks. If offset is given, ROM will be read from that position. Your existing ROM position will be preserved.""" if offset is None: offset = rom.tell() last = rom.tell() rom.seek(offset) ret_banks = [] for i in range(length): ret_banks.append(CHARA.unpack(rom.read(1))[0]) rom.seek(last) return ret_banks
def rip_msprite_mtable(rom, offset=0x094D, count=9): """Rip an entire ROM's metasprite data.""" cloc = rom.tell() rom.seek(offset) #The metasprite metatable is oddly organized here. #Banks in one place, pointers in the other. banks = [] ptrs = [] for i in range(0, count): banks.append(CHARA.unpack(rom.read(1))[0]) for i in range(0, count): ptrs.append(PTR.unpack(rom.read(2))[0]) asmsrc = "SECTION \"MetaSprite metatable\", " + format_sectionaddr_rom( offset) + "\n" asmsrc += "MetaspriteBankMetatable::\n" for bank in banks: asmsrc += " db BANK(MetaSprite_" + "{0:x}".format(bank) + ")\n" asmsrc += "MetaspriteAddressMetatable::\n" for bank, ptr in zip(banks, ptrs): asmsrc += " dw MetaSprite_" + "{0:x}".format(bank) + "\n" asmsrc += "\n" files = {} for bank, ptr in zip(banks, ptrs): table_asmsrc, table_files = rip_msprite_table(rom, flat(bank, ptr)) asmsrc += table_asmsrc + "\n" files.update(table_files) return (asmsrc, files)
def decompress_tilemap(rom, offset=None): """Decompress a compressed tilemap from a given ROM file. Return value is an array; first element is the decompressed data, second is the number of bytes read from the tilemap. The data is in the form of an integer array from 0 to 255, these correspond to tile IDs. If offset is given, ROM will be read from that position. Your existing ROM position will be preserved.""" if offset is None: offset = rom.tell() last = rom.tell() rom.seek(offset) decomp_mapping = [] decomp_row = [] comp_length = 0 next_cmd = CHARA.unpack(rom.read(1))[0] comp_length += 1 if next_cmd & 0x03 == 0: #Uncompressed data while True: next_cmd = CHARA.unpack(rom.read(1))[0] comp_length += 1 if next_cmd == 0xFF: break elif next_cmd == 0xFE: #Newline decomp_mapping.append(decomp_row) decomp_row = [] else: #Literal tile value decomp_row.append(next_cmd) #Append the last row decomp_mapping.append(decomp_row) elif next_cmd != 0xFF: #Compressed data while True: next_cmd = CHARA.unpack(rom.read(1))[0] comp_length += 1 if next_cmd == 0xFF: break cmd = (next_cmd & 0xC0) >> 6 count = next_cmd & 0x3F if cmd == 3: #DecBytes dat = CHARA.unpack(rom.read(1))[0] comp_length += 1 for i in range(count + 2): decomp_row.append((dat - i) % 0xFF) elif cmd == 2: #IncBytes dat = CHARA.unpack(rom.read(1))[0] comp_length += 1 for i in range(count + 2): decomp_row.append((dat + i) % 0xFF) elif cmd == 1: #RepeatBytes dat = CHARA.unpack(rom.read(1))[0] comp_length += 1 for i in range(count + 2): decomp_row.append((dat) % 0xFF) else: #CopyBytes for i in range(count + 1): dat = CHARA.unpack(rom.read(1))[0] comp_length += 1 decomp_row.append((dat) % 0xFF) #Split decomp_row into 32-byte rows. #Interestingly enough the design of the decompressor prohibits fully #compressed graphics from having shorter rows - there's no newline #command nor any way to skip bytes. for i in range(0, len(decomp_row), 32): decomp_mapping.append(decomp_row[i:i + 32]) rom.seek(last) return decomp_mapping, comp_length
def extract(args): charmap = parse_charmap(args.charmap) banknames = parse_bank_names(args.banknames) banknames = extract_metatable_from_rom(args.rom, charmap, banknames, args) with open(args.rom, 'rb') as rom: for bank in banknames: wikitext = ["{|", "|-", "!Pointer", "!" + args.language] csvdata = [["Pointer", args.language]] rom.seek(flat(bank["basebank"], bank["baseaddr"])) addr = bank["baseaddr"] end = 0x8000 #Autodetect the end/length of the table by finding the lowest #pointer that isn't stored after an existing pointer while addr < end: next_ptr = PTR.unpack(rom.read(2))[0] #Reject obviously invalid pointers if (next_ptr < addr or next_ptr > 0x7FFF): break end = min(end, next_ptr) addr += 2 tbl_length = (addr - bank["baseaddr"]) // 2 #Actually extract our strings string = [] #Stores the actual end of the last string, used for alias detection last_start = 0xFFFF last_end = 0xFFFF last_nonaliasing_row = -1 #Also store if a redirected/overflowed row is being extracted redirected = False old_loc = None for i in range(tbl_length): csvrow = [ "0x{0:x}".format( flat(bank["basebank"], bank["baseaddr"] + i * 2)) ] wikitext.append("|-") wikitext.append("|0x{0:x}".format( flat(bank["basebank"], bank["baseaddr"] + i * 2))) rom.seek(flat(bank["basebank"], bank["baseaddr"] + i * 2)) read_ptr = PTR.unpack(rom.read(2))[0] #Attempt to autodetect "holes" in the text data. next_ptr = PTR.unpack(rom.read(2))[0] expected_length = next_ptr - read_ptr if i >= tbl_length - 1: expected_length = -1 #maximum length by far #Two different alias detects: #First, we try to see if this pointer matches another pointer #in the table. rom.seek(flat(bank["basebank"], bank["baseaddr"])) for j in range(i): if read_ptr == PTR.unpack(rom.read(2))[0]: #Aliased pointer! csvrow.append("<ALIAS ROW 0x{0:x}>".format(j)) wikitext.append("|«ALIAS ROW 0x{0:x}»".format(j)) print( "Pointer at 0x{0:x} fully aliases pointer 0x{1:x}". format( flat(bank["basebank"], bank["baseaddr"] + i * 2), flat(bank["basebank"], bank["baseaddr"] + j * 2))) break else: #Second, we try to see if this pointer is in the middle of #the last string. #This alias detection breaks when the previous row uses the #overflow code, so disable it if so. if i > 0 and read_ptr < last_end - 1 and not redirected: print( "Pointer at 0x{0:x} partially aliases previous pointer" .format(rom.tell() - 2)) csvrow.append( "<ALIAS ROW 0x{0:x} INTO 0x{1:x}>".format( last_nonaliasing_row, read_ptr - last_start)) wikitext.append( "|«ALIAS ROW 0x{0:x} INTO 0x{1:x}»".format( last_nonaliasing_row, read_ptr - last_start)) continue read_length = 1 first_read = True rom.seek(flat(bank["basebank"], read_ptr)) #Now we can initialize these... redirected = False old_loc = None while (rom.tell() % 0x4000 < 0x3FFF or rom.tell() == flat( bank["basebank"], bank["baseaddr"])): next_chara = CHARA.unpack(rom.read(1))[0] while (rom.tell() % 0x4000 < 0x3FFF or rom.tell() == flat(bank["basebank"], bank["baseaddr"])) and ( read_length <= expected_length or first_read or redirected ) and next_chara != 0xE0: #E0 is end-of-string if next_chara < 0xE0 and next_chara in charmap[ 1]: #Control codes are the E0 block string.append(charmap[1][next_chara]) elif next_chara in reverse_specials and specials[ reverse_specials[next_chara]].redirect: #Redirecting opcodes are transparently removed from the extracted text. this_special = specials[ reverse_specials[next_chara]] if this_special.bts: read_length += this_special.bts fmt = "<" + ("", "B", "H")[this_special.bts] word = struct.unpack( fmt, rom.read(this_special.bts))[0] if word < 0x4000 or word > 0x7FFF: #Overflowing into RAM is illegal - use the jump opcode. #Overflowing into ROM0 is technically not illegal, but #unorthodox enough that we're going to disallow it. string.append( format_literal(this_special.byte)) string.append( format_literal( word & 0xFF, charmap[1])) string.append( format_literal( word >> 8, charmap[1])) else: #We need to do this right now to avoid breaking hole detection old_loc = rom.tell() read_length = rom.tell() - flat( bank["basebank"], read_ptr) rom.seek(flat(args.overflow_bank, word)) redirected = True else: raise RuntimeError( "Invalid specials dictionary. Redirecting special character is missing bts." ) elif next_chara in reverse_specials: #This must be the work of an 「ENEMY STAND」 this_special = specials[ reverse_specials[next_chara]] if this_special.bts: read_length += this_special.bts fmt = "<" + ("", "B", "H")[this_special.bts] word = struct.unpack( fmt, rom.read(this_special.bts))[0] string.append( format_control_code( reverse_specials[next_chara], word)) else: string.append( format_control_code( reverse_specials[next_chara])) if this_special.end: first_read = False break #elif next_chara == 0xE2: #Literal newline # string.append(u"\n") else: #Literal specials string.append(format_literal(next_chara)) next_chara = CHARA.unpack(rom.read(1))[0] #Explicitly stop updating read_length if the #overflow opcode is used. Otherwise we'd think we #read thousands or negative thousands of chars if not redirected: read_length = rom.tell() - flat( bank["basebank"], read_ptr) #After the main extraction loop if read_length >= expected_length: break else: #Detect nulls (spaces) after the end of a string #and append them to avoid creating a new pointer row loc = rom.tell() if redirected: loc = old_loc while CHARA.unpack(rom.read(1))[0] == charmap[0][ " "] and read_length < expected_length: string.append(" ") loc += 1 read_length += 1 rom.seek(loc) #cleanup if read_length >= expected_length: break else: #There's a hole in the ROM! #Disassemble the next string. print("Inaccessible data found at 0x{0:x}". format(flat(bank["basebank"], read_ptr))) csvrow.append("".join(string)) wikitext.append("|" + "".join(string)) string = [] csvdata.append(csvrow) csvrow = ["(No pointer)"] wikitext.append("|-") wikitext.append("|(No pointer)") read_length += 1 csvrow.append("".join(string)) wikitext.append("|" + "".join(string)) string = [] #Store the actual end pointer for later use. last_start = read_ptr last_end = read_ptr + read_length last_nonaliasing_row = i csvdata.append(csvrow) wikitext.append("|-") wikitext.append("|}") wikitext = "\n".join(wikitext) wikidir = os.path.join(args.input, bank["basedir"]) wikipath = os.path.join(args.input, bank["legacy_filename"]) csvpath = os.path.join(args.input, bank["filename"]) install_path(wikidir) #with open(wikipath, "w+", encoding="utf-8") as bank_wikitext: #bank_wikitext.write(wikitext) with open(csvpath, "w+", encoding="utf-8") as bank_csvtext: csvwriter = csv.writer(bank_csvtext) for csvrow in csvdata: csvwriter.writerow(csvrow)