def __init__(self, raw_buff): self.reset() self.file_size = 0 self.valid_axml = True self.buff = BuffHandle(raw_buff) magic_number = unpack('<L', self.buff.read(4))[0] if magic_number == MAGIC_NUMBER: self.file_size = unpack('<L', self.buff.read(4))[0] self.sb = StringPoolChunk(self.buff) self.m_resourceIDs = [] self.m_prefixuri = {} self.m_uriprefix = {} self.m_prefixuriL = [] self.visited_ns = [] elif magic_number >= MAGIC_NUMBER_MIN and magic_number <= MAGIC_NUMBER_MAX: self.file_size = unpack('<L', self.buff.read(4))[0] self.sb = StringPoolChunk(self.buff) self.m_resourceIDs = [] self.m_prefixuri = {} self.m_uriprefix = {} self.m_prefixuriL = [] self.visited_ns = [] else: self.valid_axml = False raise Exception("It's a invalid xml file.")
def __init__(self, raw_buff): self.analyzed = False self._resolved_strings = None self.buff = BuffHandle(raw_buff) self.header = ARSCHeader(self.buff) self.packageCount = unpack('<i', self.buff.read(4))[0] self.stringpool_main = StringPoolChunk(self.buff) self.next_header = ARSCHeader(self.buff) self.packages = {} self.values = {} self.resource_values = collections.defaultdict(collections.defaultdict) self.resource_configs = collections.defaultdict( lambda: collections.defaultdict(set)) self.resource_keys = collections.defaultdict( lambda: collections.defaultdict(collections.defaultdict)) for i in range(0, self.packageCount): current_package = ARSCResTablePackage(self.buff) package_name = current_package.get_name() self.packages[package_name] = [] mTableStrings = StringPoolChunk(self.buff) mKeyStrings = StringPoolChunk(self.buff) self.packages[package_name].append(current_package) self.packages[package_name].append(mTableStrings) self.packages[package_name].append(mKeyStrings) pc = PackageContext(current_package, self.stringpool_main, mTableStrings, mKeyStrings) current = self.buff.get_idx() while not self.buff.end(): header = ARSCHeader(self.buff) self.packages[package_name].append(header) if header.type == RES_TABLE_TYPE_SPEC_TYPE: self.packages[package_name].append(ARSCResTypeSpec( self.buff, pc)) elif header.type == RES_TABLE_TYPE_TYPE: a_res_type = ARSCResType(self.buff, pc) self.packages[package_name].append(a_res_type) self.resource_configs[package_name][a_res_type].add( a_res_type.config) entries = [] for j in range(0, a_res_type.entryCount): current_package.mResId = current_package.mResId & 0xffff0000 | j entries.append((unpack('<i', self.buff.read(4))[0], current_package.mResId)) self.packages[package_name].append(entries) for entry, res_id in entries: if self.buff.end(): break if entry != -1: ate = ARSCResTableEntry(self.buff, res_id, pc) self.packages[package_name].append(ate) elif header.type == RES_TABLE_PACKAGE_TYPE: break else: print("unknown type") break current += header.size self.buff.set_idx(current)
class AXMLParser(object): def __init__(self, raw_buff): self.reset() self.file_size = 0 self.valid_axml = True self.buff = BuffHandle(raw_buff) magic_number = unpack('<L', self.buff.read(4))[0] if magic_number == MAGIC_NUMBER: self.file_size = unpack('<L', self.buff.read(4))[0] self.sb = StringPoolChunk(self.buff) self.m_resourceIDs = [] self.m_prefixuri = {} self.m_uriprefix = {} self.m_prefixuriL = [] self.visited_ns = [] elif magic_number >= MAGIC_NUMBER_MIN and magic_number <= MAGIC_NUMBER_MAX: self.file_size = unpack('<L', self.buff.read(4))[0] self.sb = StringPoolChunk(self.buff) self.m_resourceIDs = [] self.m_prefixuri = {} self.m_uriprefix = {} self.m_prefixuriL = [] self.visited_ns = [] else: self.valid_axml = False raise Exception("It's a invalid xml file.") def is_valid(self): return self.valid_axml def reset(self): self.m_event = -1 self.m_lineNumber = -1 self.m_name = -1 self.m_namespaceUri = -1 self.m_attributes = [] self.m_idAttribute = -1 self.m_classAttribute = -1 self.m_styleAttribute = -1 def __next__(self): self.do_next() return self.m_event def do_next(self): if self.m_event == END_DOCUMENT: return event = self.m_event self.reset() while True: chunkType = -1 # Fake END_DOCUMENT event. if event == END_TAG: pass # START_DOCUMENT if event == START_DOCUMENT: chunkType = CHUNK_XML_START_TAG else: if self.buff.end(): self.m_event = END_DOCUMENT break # --- FIXME 这里不一定是4个 # 这里出里问题,导致死循环 data4 = self.buff.read(4) if data4: chunkType = unpack('<L', data4)[0] else: pass if chunkType == CHUNK_RESOURCEIDS: chunkSize = unpack('<L', self.buff.read(4))[0] # FIXME if chunkSize < 8 or chunkSize % 4 != 0: break for i in range(0, int(chunkSize / 4) - 2): self.m_resourceIDs.append( unpack('<L', self.buff.read(4))[0]) continue # FIXME if chunkType < CHUNK_XML_FIRST or chunkType > CHUNK_XML_LAST: break # Fake START_DOCUMENT event. if chunkType == CHUNK_XML_START_TAG and event == -1: self.m_event = START_DOCUMENT break self.buff.read(4) # /*chunkSize*/ lineNumber = unpack('<L', self.buff.read(4))[0] self.buff.read(4) # 0xFFFFFFFF if chunkType == CHUNK_XML_START_NAMESPACE or chunkType == CHUNK_XML_END_NAMESPACE: if chunkType == CHUNK_XML_START_NAMESPACE: prefix = unpack('<L', self.buff.read(4))[0] uri = unpack('<L', self.buff.read(4))[0] self.m_prefixuri[prefix] = uri self.m_uriprefix[uri] = prefix self.m_prefixuriL.append((prefix, uri)) self.ns = uri else: self.ns = -1 self.buff.read(4) self.buff.read(4) (prefix, uri) = self.m_prefixuriL.pop() continue self.m_lineNumber = lineNumber if chunkType == CHUNK_XML_START_TAG: self.m_namespaceUri = unpack('<L', self.buff.read(4))[0] self.m_name = unpack('<L', self.buff.read(4))[0] # FIXME self.buff.read(4) # flags attributeCount = unpack('<L', self.buff.read(4))[0] self.m_idAttribute = (attributeCount >> 16) - 1 attributeCount = attributeCount & 0xFFFF self.m_classAttribute = unpack('<L', self.buff.read(4))[0] self.m_styleAttribute = (self.m_classAttribute >> 16) - 1 self.m_classAttribute = (self.m_classAttribute & 0xFFFF) - 1 for i in range(0, attributeCount * ATTRIBUTE_LENGHT): self.m_attributes.append( unpack('<L', self.buff.read(4))[0]) for i in range(ATTRIBUTE_IX_VALUE_TYPE, len(self.m_attributes), ATTRIBUTE_LENGHT): self.m_attributes[i] = self.m_attributes[i] >> 24 self.m_event = START_TAG break if chunkType == CHUNK_XML_END_TAG: self.m_namespaceUri = unpack('<L', self.buff.read(4))[0] self.m_name = unpack('<L', self.buff.read(4))[0] self.m_event = END_TAG break if chunkType == CHUNK_XML_TEXT: self.m_name = unpack('<L', self.buff.read(4))[0] # FIXME self.buff.read(4) self.buff.read(4) self.m_event = TEXT break def get_prefix_by_uri(self, uri): try: return self.m_uriprefix[uri] except KeyError: return -1 def get_prefix(self): try: return self.sb.getString(self.m_uriprefix[self.m_namespaceUri]) except KeyError: return '' def get_name(self): if self.m_name == -1 or (self.m_event != START_TAG and self.m_event != END_TAG): return '' return self.sb.getString(self.m_name) def get_text(self): if self.m_name == -1 or self.m_event != TEXT: return '' return self.sb.getString(self.m_name) def get_namespace_prefix(self, pos): prefix = self.m_prefixuriL[pos][0] return self.sb.getString(prefix) def get_namespace_uri(self, pos): uri = self.m_prefixuriL[pos][1] return self.sb.getString(uri) def get_xmlns(self): buff = "" for i in self.m_uriprefix: if i not in self.visited_ns: buff += "xmlns:%s=\"%s\"\n" % ( self.sb.getString(self.m_uriprefix[i]), self.sb.getString(self.m_prefixuri[self.m_uriprefix[i]])) self.visited_ns.append(i) return buff def get_attribute_offset(self, index): # FIXME if self.m_event != START_TAG: print("Current event is not START_TAG.") offset = index * 5 # FIXME if offset >= len(self.m_attributes): print("Invalid attribute index") return offset def get_attribute_count(self): if self.m_event != START_TAG: return -1 return len(self.m_attributes) / ATTRIBUTE_LENGHT def get_attribute_prefix(self, index): offset = self.get_attribute_offset(index) uri = self.m_attributes[offset + ATTRIBUTE_IX_NAMESPACE_URI] prefix = self.get_prefix_by_uri(uri) if prefix == -1: return "" return self.sb.getString(prefix) def get_attribute_name(self, index): offset = self.get_attribute_offset(index) name = self.m_attributes[offset + ATTRIBUTE_IX_NAME] if name == -1: return "" res = self.sb.getString(name) if not res: attr = self.m_resourceIDs[name] if attr in public.SYSTEM_RESOURCES['attributes']['inverse']: res = 'android:' + public.SYSTEM_RESOURCES['attributes'][ 'inverse'][attr] return res def get_attribute_valueType(self, index): offset = self.get_attribute_offset(index) return self.m_attributes[offset + ATTRIBUTE_IX_VALUE_TYPE] def get_attribute_value_data(self, index): offset = self.get_attribute_offset(index) return self.m_attributes[offset + ATTRIBUTE_IX_VALUE_DATA] def get_attribute_value(self, index): offset = self.get_attribute_offset(index) valueType = self.m_attributes[offset + ATTRIBUTE_IX_VALUE_TYPE] if valueType == TYPE_STRING: valueString = self.m_attributes[offset + ATTRIBUTE_IX_VALUE_STRING] return self.sb.getString(valueString) # WIP return ""
class ARSCParser(object): def __init__(self, raw_buff): self.analyzed = False self._resolved_strings = None self.buff = BuffHandle(raw_buff) self.header = ARSCHeader(self.buff) self.packageCount = unpack('<i', self.buff.read(4))[0] self.stringpool_main = StringPoolChunk(self.buff) self.next_header = ARSCHeader(self.buff) self.packages = {} self.values = {} self.resource_values = collections.defaultdict(collections.defaultdict) self.resource_configs = collections.defaultdict( lambda: collections.defaultdict(set)) self.resource_keys = collections.defaultdict( lambda: collections.defaultdict(collections.defaultdict)) for i in range(0, self.packageCount): current_package = ARSCResTablePackage(self.buff) package_name = current_package.get_name() self.packages[package_name] = [] mTableStrings = StringPoolChunk(self.buff) mKeyStrings = StringPoolChunk(self.buff) self.packages[package_name].append(current_package) self.packages[package_name].append(mTableStrings) self.packages[package_name].append(mKeyStrings) pc = PackageContext(current_package, self.stringpool_main, mTableStrings, mKeyStrings) current = self.buff.get_idx() while not self.buff.end(): header = ARSCHeader(self.buff) self.packages[package_name].append(header) if header.type == RES_TABLE_TYPE_SPEC_TYPE: self.packages[package_name].append(ARSCResTypeSpec( self.buff, pc)) elif header.type == RES_TABLE_TYPE_TYPE: a_res_type = ARSCResType(self.buff, pc) self.packages[package_name].append(a_res_type) self.resource_configs[package_name][a_res_type].add( a_res_type.config) entries = [] for j in range(0, a_res_type.entryCount): current_package.mResId = current_package.mResId & 0xffff0000 | j entries.append((unpack('<i', self.buff.read(4))[0], current_package.mResId)) self.packages[package_name].append(entries) for entry, res_id in entries: if self.buff.end(): break if entry != -1: ate = ARSCResTableEntry(self.buff, res_id, pc) self.packages[package_name].append(ate) elif header.type == RES_TABLE_PACKAGE_TYPE: break else: print("unknown type") break current += header.size self.buff.set_idx(current) def _analyse(self): if self.analyzed: return self.analyzed = True for package_name in self.packages: self.values[package_name] = {} nb = 3 while nb < len(self.packages[package_name]): header = self.packages[package_name][nb] if isinstance(header, ARSCHeader): if header.type == RES_TABLE_TYPE_TYPE: a_res_type = self.packages[package_name][nb + 1] if a_res_type.config.get_language( ) not in self.values[package_name]: self.values[package_name][ a_res_type.config.get_language() ] = {} self.values[package_name][a_res_type.config.get_language( )]["public"] = [] c_value = self.values[package_name][ a_res_type.config.get_language() ] entries = self.packages[package_name][nb + 2] nb_i = 0 for entry, res_id in entries: if entry != -1: ate = self.packages[ package_name][nb + 3 + nb_i] self.resource_values[ate.mResId][ a_res_type.config] = ate self.resource_keys[package_name][ a_res_type.get_type()][ate.get_value()] = ate.mResId if ate.get_index() != -1: c_value["public"].append( (a_res_type.get_type(), ate.get_value(), ate.mResId)) if a_res_type.get_type() not in c_value: c_value[a_res_type.get_type()] = [] if a_res_type.get_type() == "string": c_value["string"].append( self.get_resource_string(ate)) elif a_res_type.get_type() == "id": if not ate.is_complex(): c_value["id"].append( self.get_resource_id(ate)) elif a_res_type.get_type() == "bool": if not ate.is_complex(): c_value["bool"].append( self.get_resource_bool(ate)) elif a_res_type.get_type() == "integer": c_value["integer"].append( self.get_resource_integer(ate)) elif a_res_type.get_type() == "color": c_value["color"].append( self.get_resource_color(ate)) elif a_res_type.get_type() == "dimen": c_value["dimen"].append( self.get_resource_dimen(ate)) nb_i += 1 nb += 3 + nb_i - 1 # -1 to account for the nb+=1 on the next line nb += 1 def get_resource_string(self, ate): return [ate.get_value(), ate.get_key_data()] def get_resource_id(self, ate): x = [ate.get_value()] if ate.key.get_data() == 0: x.append("false") elif ate.key.get_data() == 1: x.append("true") return x def get_resource_bool(self, ate): x = [ate.get_value()] if ate.key.get_data() == 0: x.append("false") elif ate.key.get_data() == -1: x.append("true") return x def get_resource_integer(self, ate): return [ate.get_value(), ate.key.get_data()] def get_resource_color(self, ate): entry_data = ate.key.get_data() return [ ate.get_value(), "#%02x%02x%02x%02x" % ( ((entry_data >> 24) & 0xFF), ((entry_data >> 16) & 0xFF), ((entry_data >> 8) & 0xFF), (entry_data & 0xFF)) ] def get_resource_dimen(self, ate): try: return [ ate.get_value(), "%s%s" % ( complexToFloat(ate.key.get_data()), DIMENSION_UNITS[ate.key.get_data() & COMPLEX_UNIT_MASK]) ] except IndexError: return [ate.get_value(), ate.key.get_data()] # FIXME def get_resource_style(self, ate): return ["", ""] def get_packages_names(self): return list(self.packages.keys()) def get_locales(self, package_name): self._analyse() return list(self.values[package_name].keys()) def get_types(self, package_name, locale): self._analyse() return list(self.values[package_name][locale].keys()) def get_public_resources(self, package_name, locale='\x00\x00'): self._analyse() buff = '<?xml version="1.0" encoding="utf-8"?>\n' buff += '<resources>\n' try: for i in self.values[package_name][locale]["public"]: buff += '<public type="%s" name="%s" id="0x%08x" />\n' % ( i[0], i[1], i[2]) except KeyError: pass buff += '</resources>\n' return buff.encode('utf-8') def get_string_resources(self, package_name, locale='\x00\x00'): self._analyse() res = [] buff = '<?xml version="1.0" encoding="utf-8"?>\n' buff += '<resources>\n' try: import binascii for i in self.values[package_name][locale]["string"]: item = {} item['name'], item['value'] = i[0], binascii.hexlify( i[1].encode('utf-8')).decode() res.append(item) except KeyError: pass return res def get_strings_resources(self): self._analyse() buff = '<?xml version="1.0" encoding="utf-8"?>\n' buff += "<packages>\n" for package_name in self.get_packages_names(): buff += "<package name=\"%s\">\n" % package_name for locale in self.get_locales(package_name): buff += "<locale value=%s>\n" % repr(locale) buff += '<resources>\n' try: for i in self.values[package_name][locale]["string"]: buff += '<string name="%s">%s</string>\n' % (i[0], i[ 1]) except KeyError: pass buff += '</resources>\n' buff += '</locale>\n' buff += "</package>\n" buff += "</packages>\n" return buff.encode('utf-8') def get_id_resources(self, package_name, locale='\x00\x00'): self._analyse() buff = '<?xml version="1.0" encoding="utf-8"?>\n' buff += '<resources>\n' try: for i in self.values[package_name][locale]["id"]: if len(i) == 1: buff += '<item type="id" name="%s"/>\n' % (i[0]) else: buff += '<item type="id" name="%s">%s</item>\n' % (i[0], i[1]) except KeyError: pass buff += '</resources>\n' return buff.encode('utf-8') def get_bool_resources(self, package_name, locale='\x00\x00'): self._analyse() buff = '<?xml version="1.0" encoding="utf-8"?>\n' buff += '<resources>\n' try: for i in self.values[package_name][locale]["bool"]: buff += '<bool name="%s">%s</bool>\n' % (i[0], i[1]) except KeyError: pass buff += '</resources>\n' return buff.encode('utf-8') def get_integer_resources(self, package_name, locale='\x00\x00'): self._analyse() buff = '<?xml version="1.0" encoding="utf-8"?>\n' buff += '<resources>\n' try: for i in self.values[package_name][locale]["integer"]: buff += '<integer name="%s">%s</integer>\n' % (i[0], i[1]) except KeyError: pass buff += '</resources>\n' return buff.encode('utf-8') def get_color_resources(self, package_name, locale='\x00\x00'): self._analyse() buff = '<?xml version="1.0" encoding="utf-8"?>\n' buff += '<resources>\n' try: for i in self.values[package_name][locale]["color"]: buff += '<color name="%s">%s</color>\n' % (i[0], i[1]) except KeyError: pass buff += '</resources>\n' return buff.encode('utf-8') def get_dimen_resources(self, package_name, locale='\x00\x00'): self._analyse() buff = '<?xml version="1.0" encoding="utf-8"?>\n' buff += '<resources>\n' try: for i in self.values[package_name][locale]["dimen"]: buff += '<dimen name="%s">%s</dimen>\n' % (i[0], i[1]) except KeyError: pass buff += '</resources>\n' return buff.encode('utf-8') def get_id(self, package_name, rid, locale='\x00\x00'): self._analyse() try: for i in self.values[package_name][locale]["public"]: if i[2] == rid: return i except KeyError: return None class ResourceResolver(object): def __init__(self, android_resources, config=None): self.resources = android_resources self.wanted_config = config def resolve(self, res_id): result = [] self._resolve_into_result(result, res_id, self.wanted_config) return result def _resolve_into_result(self, result, res_id, config): configs = self.resources.get_res_configs(res_id, config) if configs: for config, ate in configs: self.put_ate_value(result, ate, config) def put_ate_value(self, result, ate, config): if ate.is_complex(): complex_array = [] result.append(config, complex_array) for _, item in ate.item.items: self.put_item_value(complex_array, item, config, complex_=True) else: self.put_item_value(result, ate.key, config, complex_=False) def put_item_value(self, result, item, config, complex_): if item.is_reference(): res_id = item.get_data() if res_id: self._resolve_into_result( result, item.get_data(), self.wanted_config) else: if complex_: result.append(item.format_value()) else: result.append((config, item.format_value())) def get_resolved_res_configs(self, rid, config=None): resolver = ARSCParser.ResourceResolver(self, config) return resolver.resolve(rid) def get_resolved_strings(self): self._analyse() if self._resolved_strings: return self._resolved_strings r = {} for package_name in self.get_packages_names(): r[package_name] = {} k = {} for locale in self.values[package_name]: v_locale = locale if v_locale == '\x00\x00': v_locale = 'DEFAULT' r[package_name][v_locale] = {} try: for i in self.values[package_name][locale]["public"]: if i[0] == 'string': r[package_name][v_locale][i[2]] = None k[i[1]] = i[2] except KeyError: pass try: for i in self.values[package_name][locale]["string"]: if i[0] in k: r[package_name][v_locale][k[i[0]]] = i[1] except KeyError: pass self._resolved_strings = r return r def get_res_configs(self, rid, config=None): self._analyse() if not rid: raise ValueError("'rid' should be set") try: res_options = self.resource_values[rid] if len(res_options) > 1 and config: return [( config, res_options[config])] else: return list(res_options.items()) except KeyError: return [] def get_string(self, package_name, name, locale='\x00\x00'): self._analyse() try: for i in self.values[package_name][locale]["string"]: if i[0] == name: return i except KeyError: return None def get_res_id_by_key(self, package_name, resource_type, key): try: return self.resource_keys[package_name][resource_type][key] except KeyError: return None def get_items(self, package_name): self._analyse() return self.packages[package_name] def get_type_configs(self, package_name, type_name=None): if package_name is None: package_name = self.get_packages_names()[0] result = collections.defaultdict(list) for res_type, configs in list(self.resource_configs[package_name].items()): if res_type.get_package_name() == package_name and ( type_name is None or res_type.get_type() == type_name): result[res_type.get_type()].extend(configs) return result