def test_find_all_text_pointers_in_script_engine_script(self): address = 0x197637 # 0x197634 script = parse_script_engine_script_at(address, debug=False) bank = calculate_bank(address) r = find_all_text_pointers_in_script_engine_script(script, bank=bank, debug=False) results = list(r) self.assertIn(0x197661, results)
def line_has_comment_address(line, returnable={}, bank=None): """checks that a given line has a comment with a valid address, and returns the address in the object. Note: bank is required if you have a 4-letter-or-less address, because otherwise there is no way to figure out which bank is curretly being scanned.""" #first set the bank/offset to nada returnable["bank"] = None returnable["offset"] = None returnable["address"] = None #only valid characters are 0-9a-fA-F valid = [str(x) for x in range(10)] + \ [chr(x) for x in range(ord('a'), ord('f')+1)] + \ [chr(x) for x in range(ord('A'), ord('F')+1)] #check if there is a comment in this line if ";" not in line: return False #first throw away anything in quotes if (line.count("\"") % 2 == 0 and line.count("\"")!=0) \ or (line.count("\'") % 2 == 0 and line.count("\'")!=0): line = remove_quoted_text(line) #check if there is still a comment in this line after quotes removed if ";" not in line: return False #but even if there's a semicolon there must be later text if line[-1] == ";": return False #and just a space doesn't count if line[-2:] == "; ": return False #and multiple whitespace doesn't count either line = line.rstrip(" ").lstrip(" ") if line[-1] == ";": return False #there must be more content after the semicolon if len(line)-1 == line.find(";"): return False #split it up into the main comment part comment = line[line.find(";")+1:] #don't want no leading whitespace comment = comment.lstrip(" ").rstrip(" ") #split up multi-token comments into single tokens token = comment if " " in comment: #use the first token in the comment token = comment.split(" ")[0] if token in ["0x", "$", "x", ":"]: return False offset = None #process a token with a A:B format if ":" in token: #3:3F0A, $3:$3F0A, 0x3:0x3F0A, 3:3F0A #split up the token bank_piece = token.split(":")[0].lower() offset_piece = token.split(":")[1].lower() #filter out blanks/duds if bank_piece in ["$", "0x", "x"] \ or offset_piece in ["$", "0x", "x"]: return False #they can't have both "$" and "x" if "$" in bank_piece and "x" in bank_piece: return False if "$" in offset_piece and "x" in offset_piece: return False #process the bank piece if "$" in bank_piece: bank_piece = bank_piece.replace("$", "0x") #check characters for validity? for c in bank_piece.replace("x", ""): if c not in valid: return False bank = int(bank_piece, 16) #process the offset piece if "$" in offset_piece: offset_piece = offset_piece.replace("$", "0x") #check characters for validity? for c in offset_piece.replace("x", ""): if c not in valid: return False if len(offset_piece) == 0: return None offset = int(offset_piece, 16) #filter out blanks/duds elif token in ["$", "0x", "x"]: return False #can't have both "$" and "x" in the number elif "$" in token and "x" in token: return False elif "x" in token and not "0x" in token: #it should be 0x return False elif "$" in token and not "x" in token: token = token.replace("$", "0x") offset = int(token, 16) elif "0x" in token and not "$" in token: offset = int(token, 16) else: #might just be "1" at this point token = token.lower() #check if there are bad characters for c in token: if c not in valid: return False offset = int(token, 16) if offset == None and bank == None: return False if bank == None: bank = pointers.calculate_bank(offset) returnable["bank"] = bank returnable["offset"] = offset returnable["address"] = pointers.calculate_pointer(offset, bank=bank) return True
def line_has_comment_address(line, returnable={}, bank=None): """checks that a given line has a comment with a valid address, and returns the address in the object. Note: bank is required if you have a 4-letter-or-less address, because otherwise there is no way to figure out which bank is curretly being scanned.""" #first set the bank/offset to nada returnable["bank"] = None returnable["offset"] = None returnable["address"] = None #only valid characters are 0-9A-F valid = [str(x) for x in range(0, 10)] + [chr(x) for x in range(97, 102 + 1)] #check if there is a comment in this line if ";" not in line: return False #first throw away anything in quotes if (line.count("\"") % 2 == 0 and line.count("\"")!=0) \ or (line.count("\'") % 2 == 0 and line.count("\'")!=0): line = remove_quoted_text(line) #check if there is still a comment in this line after quotes removed if ";" not in line: return False #but even if there's a semicolon there must be later text if line[-1] == ";": return False #and just a space doesn't count if line[-2:] == "; ": return False #and multiple whitespace doesn't count either line = line.rstrip(" ").lstrip(" ") if line[-1] == ";": return False #there must be more content after the semicolon if len(line) - 1 == line.find(";"): return False #split it up into the main comment part comment = line[line.find(";") + 1:] #don't want no leading whitespace comment = comment.lstrip(" ").rstrip(" ") #split up multi-token comments into single tokens token = comment if " " in comment: #use the first token in the comment token = comment.split(" ")[0] if token in ["0x", "$", "x", ":"]: return False offset = None #process a token with a A:B format if ":" in token: #3:3F0A, $3:$3F0A, 0x3:0x3F0A, 3:3F0A #split up the token bank_piece = token.split(":")[0].lower() offset_piece = token.split(":")[1].lower() #filter out blanks/duds if bank_piece in ["$", "0x", "x"] \ or offset_piece in ["$", "0x", "x"]: return False #they can't have both "$" and "x" if "$" in bank_piece and "x" in bank_piece: return False if "$" in offset_piece and "x" in offset_piece: return False #process the bank piece if "$" in bank_piece: bank_piece = bank_piece.replace("$", "0x") #check characters for validity? for c in bank_piece.replace("x", ""): if c not in valid: return False bank = int(bank_piece, 16) #process the offset piece if "$" in offset_piece: offset_piece = offset_piece.replace("$", "0x") #check characters for validity? for c in offset_piece.replace("x", ""): if c not in valid: return False if len(offset_piece) == 0: return None offset = int(offset_piece, 16) #filter out blanks/duds elif token in ["$", "0x", "x"]: return False #can't have both "$" and "x" in the number elif "$" in token and "x" in token: return False elif "x" in token and not "0x" in token: #it should be 0x return False elif "$" in token and not "x" in token: token = token.replace("$", "0x") offset = int(token, 16) elif "0x" in token and not "$" in token: offset = int(token, 16) else: #might just be "1" at this point token = token.lower() #check if there are bad characters for c in token: if c not in valid: return False offset = int(token, 16) if offset == None and bank == None: return False if bank == None: bank = calculate_bank(offset) returnable["bank"] = bank returnable["offset"] = offset returnable["address"] = calculate_pointer(offset, bank=bank) return True
def test_calculate_bank(self): self.failUnless(calculate_bank(0x8000) == 2) self.failUnless(calculate_bank("0x9000") == 2) self.failUnless(calculate_bank(0) == 0) for address in [0x4000, 0x5000, 0x6000, 0x7000]: self.assertRaises(Exception, calculate_bank, address)
def find_addresses(): """returns a list of text pointers useful for testing parse_text_engine_script_at Note that this list is not exhaustive. There are some texts that are only pointed to from some script that a current script just points to. So find_all_text_pointers_in_script_engine_script will have to recursively follow through each script to find those. .. it does this now :) """ addresses = set() # for each map group for map_group in map_names: # for each map id for map_id in map_names[map_group]: # skip the offset key if map_id == "offset": continue # dump this into smap smap = map_names[map_group][map_id] # signposts signposts = smap["signposts"] # for each signpost for signpost in signposts: if signpost["func"] in [0, 1, 2, 3, 4]: # dump this into script script = signpost["script"] elif signpost["func"] in [05, 06]: script = signpost["script"] else: continue # skip signposts with no bytes if len(script) == 0: continue # find all text pointers in script texts = find_all_text_pointers_in_script_engine_script(script, smap["event_bank"]) # dump these addresses in addresses.update(texts) # xy triggers xy_triggers = smap["xy_triggers"] # for each xy trigger for xy_trigger in xy_triggers: # dump this into script script = xy_trigger["script"] # find all text pointers in script texts = find_all_text_pointers_in_script_engine_script(script, smap["event_bank"]) # dump these addresses in addresses.update(texts) # trigger scripts triggers = smap["trigger_scripts"] # for each trigger for (i, trigger) in triggers.items(): # dump this into script script = trigger["script"] # find all text pointers in script texts = find_all_text_pointers_in_script_engine_script(script, pointers.calculate_bank(trigger["address"])) # dump these addresses in addresses.update(texts) # callback scripts callbacks = smap["callback_scripts"] # for each callback for (k, callback) in callbacks.items(): # dump this into script script = callback["script"] # find all text pointers in script texts = find_all_text_pointers_in_script_engine_script(script, pointers.calculate_bank(callback["address"])) # dump these addresses in addresses.update(texts) # people-events events = smap["people_events"] # for each event for event in events: if event["event_type"] == "script": # dump this into script script = event["script"] # find all text pointers in script texts = find_all_text_pointers_in_script_engine_script(script, smap["event_bank"]) # dump these addresses in addresses.update(texts) if event["event_type"] == "trainer": trainer_data = event["trainer_data"] addresses.update([trainer_data["text_when_seen_ptr"]]) addresses.update([trainer_data["text_when_trainer_beaten_ptr"]]) trainer_bank = pointers.calculate_bank(event["trainer_data_address"]) script1 = trainer_data["script_talk_again"] texts1 = find_all_text_pointers_in_script_engine_script(script1, trainer_bank) addresses.update(texts1) script2 = trainer_data["script_when_lost"] texts2 = find_all_text_pointers_in_script_engine_script(script2, trainer_bank) addresses.update(texts2)
def find_addresses(): """returns a list of text pointers useful for testing parse_text_engine_script_at Note that this list is not exhaustive. There are some texts that are only pointed to from some script that a current script just points to. So find_all_text_pointers_in_script_engine_script will have to recursively follow through each script to find those. .. it does this now :) """ addresses = set() # for each map group for map_group in map_names: # for each map id for map_id in map_names[map_group]: # skip the offset key if map_id == "offset": continue # dump this into smap smap = map_names[map_group][map_id] # signposts signposts = smap["signposts"] # for each signpost for signpost in signposts: if signpost["func"] in [0, 1, 2, 3, 4]: # dump this into script script = signpost["script"] elif signpost["func"] in [05, 06]: script = signpost["script"] else: continue # skip signposts with no bytes if len(script) == 0: continue # find all text pointers in script texts = find_all_text_pointers_in_script_engine_script( script, smap["event_bank"]) # dump these addresses in addresses.update(texts) # xy triggers xy_triggers = smap["xy_triggers"] # for each xy trigger for xy_trigger in xy_triggers: # dump this into script script = xy_trigger["script"] # find all text pointers in script texts = find_all_text_pointers_in_script_engine_script( script, smap["event_bank"]) # dump these addresses in addresses.update(texts) # trigger scripts triggers = smap["trigger_scripts"] # for each trigger for (i, trigger) in triggers.items(): # dump this into script script = trigger["script"] # find all text pointers in script texts = find_all_text_pointers_in_script_engine_script( script, pointers.calculate_bank(trigger["address"])) # dump these addresses in addresses.update(texts) # callback scripts callbacks = smap["callback_scripts"] # for each callback for (k, callback) in callbacks.items(): # dump this into script script = callback["script"] # find all text pointers in script texts = find_all_text_pointers_in_script_engine_script( script, pointers.calculate_bank(callback["address"])) # dump these addresses in addresses.update(texts) # people-events events = smap["people_events"] # for each event for event in events: if event["event_type"] == "script": # dump this into script script = event["script"] # find all text pointers in script texts = find_all_text_pointers_in_script_engine_script( script, smap["event_bank"]) # dump these addresses in addresses.update(texts) if event["event_type"] == "trainer": trainer_data = event["trainer_data"] addresses.update([trainer_data["text_when_seen_ptr"]]) addresses.update( [trainer_data["text_when_trainer_beaten_ptr"]]) trainer_bank = pointers.calculate_bank( event["trainer_data_address"]) script1 = trainer_data["script_talk_again"] texts1 = find_all_text_pointers_in_script_engine_script( script1, trainer_bank) addresses.update(texts1) script2 = trainer_data["script_when_lost"] texts2 = find_all_text_pointers_in_script_engine_script( script2, trainer_bank) addresses.update(texts2)