def read_index(file, index_offset): # Read the index header file.seek(index_offset) index_header_data = file.read(INDEX_HEADER_SIZE) index_header = parse_index_header(index_header_data) LOGGER.debug("Parsed index header: %s", index_header) number_of_chunks = index_header["number_of_entries"] # Read each index entry to get the address of the chunk attributes file.seek(index_offset + INDEX_HEADER_SIZE + index_header["entries_size"]) index_entries = [] for i in range(0, number_of_chunks): offset, size = struct.unpack("<2I", file.read(8)) index_entries.append((offset, size)) # Read attributes for each chunk chunks = [] has_compressed_chunks = False for (chunk_attributes_offset, chunk_attributes_size) in index_entries: file.seek(index_offset + INDEX_HEADER_SIZE + chunk_attributes_offset) chunk_attributes_data = file.read(chunk_attributes_size) attrs = parse_chunk_attributes(chunk_attributes_data) LOGGER.debug(attrs) # Read chunk data file.seek(attrs["offset"]) chunk_data = file.read(attrs["size"]) if not has_compressed_chunks and attrs["flags"] & model.ChunkFlags.Compressed: has_compressed_chunks = True # Create a new chunk children = [model.ChunkChild(t["chid"], model.ChunkId(t["tag"], t["number"])) for t in attrs["children"]] name = attrs.get("name") this_chunk = model.Chunk(attrs["tag"], attrs["number"], flags=attrs["flags"], data=chunk_data, children=children, name=name) chunks.append(this_chunk) return chunks
def parse_and_assemble(input_file, opcode_list, verbose=False): """ Parse a script """ # TODO: rewrite this using a real parser! # Generate reverse lookup for opcodes string_to_opcode = {opcode.mnemonic.lower(): opcode_id for opcode_id, opcode in opcode_list.items()} # Load known constants defs = knownconstants.load_constants() defs = {v: k for k, v in defs.items()} string_table = None script = None script_chunk_tag = None script_chunk_number = None string_table_chunk_tag = "GSTX" string_table_chunk_number = None # instruction pointer starts at 2 instruction_pointer = 2 labels = {"start": instruction_pointer} stack = list() for line_number, line in enumerate(input_file): # Remove whitespace and comments. This is pretty rough. line = line.strip() while "#" in line: line = line[:line.find("#")] line = line.strip() if len(line) == 0: continue # Check if this is a define define_match = DEFINE_FORMAT.match(line) if define_match: define_name, define_value_str, _ = define_match.groups() define_value = parse_number(define_value_str) if define_name in defs: logger.warning("replacing const value %s with 0x%x (was 0x%x)", define_name, define_value, defs[define_name]) defs[define_name] = define_value continue # Split line into components split_line = re.match("^((?P<label>[A-Za-z0-9_@]+)\:\s*)?(?P<cmd>[A-Za-z]+)?\s*(?P<args>.*)$", line) if not split_line: continue label_name = split_line.group("label") command = split_line.group("cmd").lower() args = split_line.group("args") # Handle labels first if label_name: if label_name in labels: raise AssembleScriptException("Label %s already defined" % label_name) labels[label_name] = instruction_pointer if not command: continue if command == "stringtable": # Create a new stringtable if not args: raise AssembleScriptException("syntax: stringtable <string-table-chunk-no>") string_table_chunk_number = parse_number(args) if string_table is not None: raise AssembleScriptException("String table already defined") logger.debug("Creating string table: GSTX 0x%x", string_table_chunk_number) string_table = stringtable.StringTable() elif command == "string": string_id = None string_value = None if args: string_args = args.split() if len(string_args) >= 2: string_id = parse_number(string_args[0]) string_value = " ".join(string_args[1:]).strip("\"") if not string_id or not string_value: raise AssembleScriptException("syntax: string <id> <quoted-string>") logger.debug("Adding to string table: 0x%x -> %s", string_id, string_value) string_table[string_id] = string_value elif command == "script": # New script directive script_chunk_tag = None script_chunk_number = None if args: script_args = args.split() if len(script_args) == 2: script_chunk_tag = script_args[0].upper() script_chunk_number = parse_number(script_args[1]) if script_chunk_tag is None or script_chunk_number is None: raise AssembleScriptException("syntax: script chunk-tag chunk-number") if script_chunk_tag != "GLSC" and script_chunk_tag != "GLOP": raise AssembleScriptException("invalid script chunk tag type") if script is not None: raise AssembleScriptException("Cannot have more than one script in a source file yet") # Use same version as in 3DMM compiler_version = chunkyfilemodel.Version(29, 16) script = scriptmodel.Script(endianness=chunkyfilemodel.Endianness.LittleEndian, characterset=chunkyfilemodel.CharacterSet.ANSI, compilerversion=compiler_version) elif command == "push": # Special handling for PUSH instructions if not args: raise AssembleScriptException("syntax: push <value>") stack.append(parse_value(args, defs)) elif command in string_to_opcode: # It's an opcode opcode = opcode_list[string_to_opcode[command]] # Check if we have a variable name variable_name = None if args: variable_name = args # If there's existing stuff on the stack, generate a PUSH instruction if len(stack) > 0: push_instruction = scriptmodel.Instruction(opcode=0, params=list(stack), address=instruction_pointer) stack.clear() script.instructions.append(push_instruction) instruction_pointer += push_instruction.number_of_dwords # Generate this instruction this_instruction = scriptmodel.Instruction(opcode=opcode.opcode, address=instruction_pointer, variable=variable_name) script.instructions.append(this_instruction) instruction_pointer += this_instruction.number_of_dwords else: raise AssembleScriptException("invalid command: %s", command) # Add trailing PUSH instruction if required if len(stack) > 0: push_instruction = scriptmodel.Instruction(opcode=0, params=list(stack)) stack.clear() script.instructions.append(push_instruction) instruction_pointer += push_instruction.number_of_dwords() # Log defined labels if len(labels.items()) > 0 and verbose: logger.debug("Labels:") for label_name, label_instruction_pointer in labels.items(): logger.debug("%s: %d", label_name, label_instruction_pointer) # Resolve defined labels: for ins_pos in range(0, len(script.instructions)): if len(script.instructions[ins_pos].params) > 0: for param_pos, param in enumerate(script.instructions[ins_pos].params): if isinstance(param, LabelReference): # hack to handle output from disassembler if param.label_name[0] == "$": param.label_name = "@" + param.label_name[1:] label_ip = labels.get(param.label_name) if label_ip is None: raise AssembleScriptException("Label %s not found" % param.label_name) # Replace parameter with resolved value script.instructions[ins_pos].params[param_pos] = 0xCC000000 + label_ip # Create a chunky file containing the script and string table chunky_file = chunkyfilemodel.ChunkyFile(endianness=chunkyfilemodel.Endianness.LittleEndian, characterset=chunkyfilemodel.CharacterSet.ANSI, file_type="ASMX") if string_table: if verbose: logger.debug("String table:") for string_id, string_value in string_table.items(): logger.debug("0x%x: %s", string_id, string_value) string_table_data = string_table.to_buffer() string_table_chunk = chunkyfilemodel.Chunk(tag=string_table_chunk_tag, number=string_table_chunk_number, data=string_table_data) chunky_file.chunks.append(string_table_chunk) if script: if verbose: logger.debug("Script:") script_formatter = scriptformatter.TextScriptFormatter() logger.debug( script_formatter.format_script(script, chunkyfilemodel.ChunkId(script_chunk_tag, script_chunk_number))) script_data = assemble_script(script) script_chunk = chunkyfilemodel.Chunk(tag=script_chunk_tag, number=script_chunk_number, data=script_data) if string_table: string_table_id = chunkyfilemodel.ChunkId(string_table_chunk_tag, string_table_chunk_number) string_table_child = chunkyfilemodel.ChunkChild(chid=0, ref=string_table_id) script_chunk.children.append(string_table_child) chunky_file.chunks.append(script_chunk) return chunky_file
def xml_to_chunky_file(chunky_file, xml_path, change_file_type=False): """ Load chunks from an XML file and add them to a chunky file :param chunky_file: Existing chunky file instance that new chunks will be added to :param xml_path: XML filename :param change_file_type: change the file type tag in the header """ logger = logging.getLogger(__name__) # Load XML file tree = ElementTree.parse(xml_path) chunky_file_xml = tree.getroot() # TODO: validate with an XSD? if chunky_file_xml.tag != "ChunkyFile": raise Exception("Not the right kind of XML file") # Set chunky file options if not already set file_type = chunky_file_xml.attrib.get("type") endianness = model.Endianness[chunky_file_xml.attrib.get( "endianness", "LittleEndian")] charset = model.CharacterSet[chunky_file_xml.attrib.get("charset", "ANSI")] if chunky_file.file_type == EMPTY_FILE: chunky_file.file_type = file_type chunky_file.endianness = endianness chunky_file.characterset = charset else: if file_type is not None and chunky_file.file_type != file_type and change_file_type: logger.warning("Changing file type from %s to %s", chunky_file.file_type, file_type) chunky_file.file_type = file_type if chunky_file.endianness != endianness: logger.warning("Changing file endianness from %s to %s", chunky_file.endianness, endianness) chunky_file.endianness = endianness if chunky_file.characterset != charset: logger.warning("Changing file character set from %s to %s", chunky_file.characterset, charset) chunky_file.characterset = charset for chunk_xml in chunky_file_xml.findall("Chunk"): # Get chunk metadata chunk_tag = chunk_xml.attrib["tag"] chunk_number = int(chunk_xml.attrib["number"]) chunk_id = model.ChunkId(chunk_tag, chunk_number) chunk_name = chunk_xml.attrib.get("name", None) logger.debug("Processing chunk: %s - %s", chunk_id, chunk_name if chunk_name else "n/a") chunk_flags = model.ChunkFlags.Default if chunk_xml.attrib.get("loner", "false").lower() == "true": chunk_flags |= model.ChunkFlags.Loner if chunk_xml.attrib.get("compressed", "false").lower() == "true": chunk_flags |= model.ChunkFlags.Compressed # Get chunk children and data chunk_data = None chunk_children = list() for child_xml in chunk_xml: if child_xml.tag == "Child": chid = int(child_xml.attrib["chid"]) tag = child_xml.attrib["tag"] number = int(child_xml.attrib["number"]) chunk_child = model.ChunkChild(chid=chid, ref=model.ChunkId(tag, number)) chunk_children.append(chunk_child) elif child_xml.tag == "Data": chunk_data = base64.b64decode(child_xml.text) elif child_xml.tag == "File": with open(child_xml.text, "rb") as data_file: chunk_data = data_file.read() else: raise Exception("unhandled child tag type: %s" % child_xml.tag) # Check if there is an existing chunk if chunk_id in chunky_file: existing_chunk = chunky_file[chunk_id] logger.info("%s: Modifying existing chunk", chunk_id) # Update chunk metadata if chunk_name: existing_chunk.name = chunk_name if chunk_flags != existing_chunk.flags: # TODO: set flags correctly # if the loner flag is not set correctly the file won't load logger.warning("Chunk flags are different: %s vs %s", existing_chunk.flags, chunk_flags) # TODO: update existing children instead of just adding for new_child in chunk_children: existing_child = [ c for c in existing_chunk.children if c.chid == new_child.chid ] if len(existing_child) > 0: logger.warning("child %s: %s already exists" % (existing_chunk, existing_child)) else: existing_chunk.children.append(new_child) # Set chunk data # TODO: handle compression if chunk_data: existing_chunk.raw_data = chunk_data else: logger.info("%s: Creating new chunk", chunk_id) # Create a new chunk this_chunk = model.Chunk(chunk_tag, chunk_number, chunk_name, chunk_flags, data=chunk_data) this_chunk.children = chunk_children chunky_file.chunks.append(this_chunk)
def parse_and_assemble(input_file, opcode_list, verbose=False): """ Parse a script """ # TODO: rewrite this using a real parser! # Generate reverse lookup for opcodes string_to_opcode = { opcode.mnemonic.lower(): opcode_id for opcode_id, opcode in opcode_list.items() } string_table = None script = None script_chunk_tag = None script_chunk_number = None string_table_chunk_tag = "GSTX" string_table_chunk_number = None # instruction pointer starts at 2 instruction_pointer = 2 labels = {"start": instruction_pointer} stack = list() for line_number, line in enumerate(input_file): # Remove whitespace and comments. This is pretty rough. line = line.strip() while "#" in line: line = line[:line.find("#")] line = line.strip() split_line = line.split(" ") command = split_line[0].lower() if command == "": continue elif command == "stringtable": # Create a new stringtable if len(split_line) < 2: raise AssembleScriptException( "syntax: stringtable <string-table-chunk-no>") string_table_chunk_number = parse_number(split_line[1]) if string_table is not None: raise AssembleScriptException("String table already defined") logger.debug("Creating string table: GSTX 0x%x", string_table_chunk_number) string_table = stringtable.StringTable() elif command == "string": if len(split_line) < 3: raise AssembleScriptException( "syntax: string <id> <quoted-string>") string_id = parse_number(split_line[1]) string_value = " ".join(split_line[2:]).strip("\"") logger.debug("Adding to string table: 0x%x -> %s", string_id, string_value) string_table[string_id] = string_value elif command == "script": # New script directive if len(split_line) < 3: raise AssembleScriptException( "syntax: script chunk-tag chunk-number") script_chunk_tag = split_line[1].upper() if script_chunk_tag != "GLSC" and script_chunk_tag != "GLOP": raise AssembleScriptException("invalid script chunk tag type") script_chunk_number = parse_number(split_line[2]) if script is not None: raise AssembleScriptException("Script already defined") # Use same version as in 3DMM compiler_version = chunkyfilemodel.Version(29, 16) script = scriptmodel.Script( endianness=chunkyfilemodel.Endianness.LittleEndian, characterset=chunkyfilemodel.CharacterSet.ANSI, compilerversion=compiler_version) elif command.endswith(":"): # Labels label_name = command[:-1] if label_name in labels: raise AssembleScriptException("Label %s already defined" % label_name) labels[label_name] = instruction_pointer elif command == "push": # Special handling for PUSH instructions if len(split_line) != 2: raise AssembleScriptException("syntax: push <value>") value = parse_value(split_line[1], labels) stack.append(value) elif command in string_to_opcode: # It's an opcode opcode = opcode_list[string_to_opcode[command]] # Check if we have a variable name variable_name = None if len(split_line) > 1: variable_name = split_line[1] # If there's existing stuff on the stack, generate a PUSH instruction if len(stack) > 0: push_instruction = scriptmodel.Instruction( opcode=0, params=list(stack), address=instruction_pointer) stack.clear() script.instructions.append(push_instruction) instruction_pointer += push_instruction.number_of_dwords # Generate this instruction this_instruction = scriptmodel.Instruction( opcode=opcode.opcode, address=instruction_pointer, variable=variable_name) script.instructions.append(this_instruction) instruction_pointer += this_instruction.number_of_dwords else: raise AssembleScriptException("invalid command: %s", command) # Add trailing PUSH instruction if required if len(stack) > 0: push_instruction = scriptmodel.Instruction(opcode=0, params=list(stack)) stack.clear() script.instructions.append(push_instruction) instruction_pointer += push_instruction.number_of_dwords() # Log labels if len(labels.items()) > 0 and verbose: logger.debug("Labels:") for label_name, label_instruction_pointer in labels.items(): logger.debug("%s: %d", label_name, label_instruction_pointer) # Create a chunky file containing the script and string table chunky_file = chunkyfilemodel.ChunkyFile( endianness=chunkyfilemodel.Endianness.LittleEndian, characterset=chunkyfilemodel.CharacterSet.ANSI, file_type="ASMX") if string_table: if verbose: logger.debug("String table:") for string_id, string_value in string_table.items(): logger.debug("0x%x: %s", string_id, string_value) string_table_data = string_table.to_buffer() string_table_chunk = chunkyfilemodel.Chunk( tag=string_table_chunk_tag, number=string_table_chunk_number, data=string_table_data, flags=chunkyfilemodel.ChunkFlags.Loner) chunky_file.chunks.append(string_table_chunk) if script: if verbose: logger.debug("Script:") script_formatter = scriptformatter.TextScriptFormatter() logger.debug( script_formatter.format_script( script, chunkyfilemodel.ChunkId(script_chunk_tag, script_chunk_number))) script_data = assemble_script(script) script_chunk = chunkyfilemodel.Chunk(tag=script_chunk_tag, number=script_chunk_number, data=script_data) if string_table: string_table_id = chunkyfilemodel.ChunkId( string_table_chunk_tag, string_table_chunk_number) string_table_child = chunkyfilemodel.ChunkChild( chid=0, ref=string_table_id) script_chunk.children.append(string_table_child) chunky_file.chunks.append(script_chunk) return chunky_file