예제 #1
0
    def __init__(self, raw_buff):
        self.reset()

        self.valid_axml = True
        self.axml_tampered = False
        self.packerwarning = False
        self.buff = bytecode.BuffHandle(raw_buff)

        axml_file, = unpack('<L', self.buff.read(4))

        if axml_file != const.CHUNK_AXML_FILE:
            # It looks like the header is wrong.
            # need some other checks.
            # We noted, that a some of files start with 0x0008NNNN,
            # where NNNN is some random number

            if axml_file >> 16 == 0x0008:
                self.axml_tampered = True
                warn(
                    "AXML file has an unusual header, most malwares like "
                    "doing such stuff to anti androguard! But we try to parse "
                    "it anyways. Header: 0x{:08x}".format(axml_file))
            else:
                self.valid_axml = False
                warn(
                    "Not a valid AXML file. Header 0x{:08x}".format(axml_file))
                return

        # Next is the filesize
        self.filesize, = unpack('<L', self.buff.read(4))
        assert self.filesize <= self.buff.size(), (
            "Declared filesize does not match real size: {} vs {}".format(
                self.filesize, self.buff.size()))

        # Now we parse the STRING POOL
        header = arscutil.ARSCHeader(
            self.buff)  # read 8 byte=header+chunk_size
        assert header.type == const.RES_STRING_POOL_TYPE, (
            "Expected String Pool header, got %x" % header.type)

        self.sb = StringBlock(self.buff, header)

        self.m_resourceIDs = []
        self.m_prefixuri = {}
        self.m_uriprefix = defaultdict(list)
        # Contains a list of current prefix/uri pairs
        self.m_prefixuriL = []
        # Store which namespaces are already printed
        self.visited_ns = []
예제 #2
0
    def __init__(self, raw_buff):
        self.reset()

        self.valid_axml = True
        self.buff = bytecode.BuffHandle(raw_buff)

        axml_file = unpack('<L', self.buff.read(4))[0]

        if axml_file == const.CHUNK_AXML_FILE:
            self.buff.read(4)

            self.sb = StringBlock(self.buff)

            self.m_resourceIDs = []
            self.m_prefixuri = {}
            self.m_uriprefix = {}
            self.m_prefixuriL = []

            self.visited_ns = []
        else:
            self.valid_axml = False
            warn("Not a valid xml file")
예제 #3
0
class AXMLParser(object):

    def __init__(self, raw_buff):
        self.reset()

        self.valid_axml = True
        self.buff = bytecode.BuffHandle(raw_buff)

        axml_file = unpack('<L', self.buff.read(4))[0]

        if axml_file == const.CHUNK_AXML_FILE:
            self.buff.read(4)

            self.sb = StringBlock(self.buff)

            self.m_resourceIDs = []
            self.m_prefixuri = {}
            self.m_uriprefix = {}
            self.m_prefixuriL = []

            self.visited_ns = []
        else:
            self.valid_axml = False
            warn("Not a valid xml file")

    def is_valid(self):
        return self.valid_axml

    def reset(self):
        self.m_event = -1
        self.m_lineNumber = -1
        self.m_name = -1
        self.m_namespaceUri = -1
        self.m_attributes = []
        self.m_idAttribute = -1
        self.m_classAttribute = -1
        self.m_styleAttribute = -1

    def __next__(self):
        self.doNext()
        return self.m_event

    def doNext(self):
        if self.m_event == const.END_DOCUMENT:
            return

        event = self.m_event

        self.reset()
        while True:
            chunkType = -1

            # Fake END_DOCUMENT event.
            if event == const.END_TAG:
                pass

            # START_DOCUMENT
            if event == const.START_DOCUMENT:
                chunkType = const.CHUNK_XML_START_TAG
            else:
                if self.buff.end():
                    self.m_event = const.END_DOCUMENT
                    break
                chunkType = unpack('<L', self.buff.read(4))[0]

            if chunkType == const.CHUNK_RESOURCEIDS:
                chunkSize = unpack('<L', self.buff.read(4))[0]
                # FIXME
                if chunkSize < 8 or chunkSize % 4 != 0:
                    warn("Invalid chunk size")

                for i in _range(0, int(chunkSize / 4 - 2)):
                    self.m_resourceIDs.append(
                        unpack('<L', self.buff.read(4))[0])

                continue

            # FIXME
            if chunkType < const.CHUNK_XML_FIRST or \
                    chunkType > const.CHUNK_XML_LAST:
                warn("invalid chunk type")

            # Fake START_DOCUMENT event.
            if chunkType == const.CHUNK_XML_START_TAG and event == -1:
                self.m_event = const.START_DOCUMENT
                break

            self.buff.read(4)  # /*chunkSize*/
            lineNumber = unpack('<L', self.buff.read(4))[0]
            self.buff.read(4)  # 0xFFFFFFFF

            if chunkType == const.CHUNK_XML_START_NAMESPACE or \
                    chunkType == const.CHUNK_XML_END_NAMESPACE:
                if chunkType == const.CHUNK_XML_START_NAMESPACE:
                    prefix = unpack('<L', self.buff.read(4))[0]
                    uri = unpack('<L', self.buff.read(4))[0]

                    self.m_prefixuri[prefix] = uri
                    self.m_uriprefix[uri] = prefix
                    self.m_prefixuriL.append((prefix, uri))
                    self.ns = uri
                else:
                    self.ns = -1
                    self.buff.read(4)
                    self.buff.read(4)
                    (prefix, uri) = self.m_prefixuriL.pop()
                    # del self.m_prefixuri[ prefix ]
                    # del self.m_uriprefix[ uri ]

                continue

            self.m_lineNumber = lineNumber

            if chunkType == const.CHUNK_XML_START_TAG:
                self.m_namespaceUri = unpack('<L', self.buff.read(4))[0]
                self.m_name = unpack('<L', self.buff.read(4))[0]

                # FIXME
                self.buff.read(4)  # flags

                attributeCount = unpack('<L', self.buff.read(4))[0]
                self.m_idAttribute = (attributeCount >> 16) - 1
                attributeCount = attributeCount & 0xFFFF
                self.m_classAttribute = unpack('<L', self.buff.read(4))[0]
                self.m_styleAttribute = (self.m_classAttribute >> 16) - 1

                self.m_classAttribute = (self.m_classAttribute & 0xFFFF) - 1

                for i in _range(0, attributeCount * const.ATTRIBUTE_LENGHT):
                    self.m_attributes.append(
                        unpack('<L', self.buff.read(4))[0])

                for i in _range(
                        const.ATTRIBUTE_IX_VALUE_TYPE, len(self.m_attributes),
                        const.ATTRIBUTE_LENGHT):
                    self.m_attributes[i] = self.m_attributes[i] >> 24

                self.m_event = const.START_TAG
                break

            if chunkType == const.CHUNK_XML_END_TAG:
                self.m_namespaceUri = unpack('<L', self.buff.read(4))[0]
                self.m_name = unpack('<L', self.buff.read(4))[0]
                self.m_event = const.END_TAG
                break

            if chunkType == const.CHUNK_XML_TEXT:
                self.m_name = unpack('<L', self.buff.read(4))[0]

                # FIXME
                self.buff.read(4)
                self.buff.read(4)

                self.m_event = const.TEXT
                break

    def getPrefixByUri(self, uri):
        try:
            return self.m_uriprefix[uri]
        except KeyError:
            return -1

    def getPrefix(self):
        try:
            return self.sb.getString(self.m_uriprefix[self.m_namespaceUri])
        except KeyError:
            return ''

    def getName(self):
        if self.m_name == -1 or (
                self.m_event != const.START_TAG and
                self.m_event != const.END_TAG):
            return ''

        return self.sb.getString(self.m_name)

    def getText(self):
        if self.m_name == -1 or self.m_event != const.TEXT:
            return ''

        return self.sb.getString(self.m_name)

    def getNamespacePrefix(self, pos):
        prefix = self.m_prefixuriL[pos][0]
        return self.sb.getString(prefix)

    def getNamespaceUri(self, pos):
        uri = self.m_prefixuriL[pos][1]
        return self.sb.getString(uri)

    def getXMLNS(self):
        buff = ""
        for i in self.m_uriprefix:
            if i not in self.visited_ns:
                buff += "xmlnamespace:%s=\"%s\"\n" % (
                    self.sb.getString(self.m_uriprefix[i]),
                    self.sb.getString(self.m_prefixuri[self.m_uriprefix[i]]))
                self.visited_ns.append(i)
        return buff

    def getNamespaceCount(self, pos):
        pass

    def getAttributeOffset(self, index):
        # FIXME
        if self.m_event != const.START_TAG:
            warn("Current event is not START_TAG.")

        offset = index * 5
        # FIXME
        if offset >= len(self.m_attributes):
            warn("Invalid attribute index")

        return offset

    def getAttributeCount(self):
        if self.m_event != const.START_TAG:
            return -1

        return len(self.m_attributes) / const.ATTRIBUTE_LENGHT

    def getAttributePrefix(self, index):
        offset = self.getAttributeOffset(index)
        uri = self.m_attributes[offset + const.ATTRIBUTE_IX_NAMESPACE_URI]

        prefix = self.getPrefixByUri(uri)

        if prefix == -1:
            return ""

        return self.sb.getString(prefix)

    def getAttributeName(self, index):
        offset = self.getAttributeOffset(index)
        name = self.m_attributes[offset + const.ATTRIBUTE_IX_NAME]

        if name == -1:
            return ""

        res = self.sb.getString(name)
        if not res:
            attr = self.m_resourceIDs[name]
            if attr in public.SYSTEM_RESOURCES['attributes']['inverse']:
                res = 'android' + \
                    public.SYSTEM_RESOURCES['attributes']['inverse'][attr]

        return res

    def getAttributeValueType(self, index):
        offset = self.getAttributeOffset(index)
        return self.m_attributes[offset + const.ATTRIBUTE_IX_VALUE_TYPE]

    def getAttributeValueData(self, index):
        offset = self.getAttributeOffset(index)
        return self.m_attributes[offset + const.ATTRIBUTE_IX_VALUE_DATA]

    def getAttributeValue(self, index):
        offset = self.getAttributeOffset(index)
        valueType = self.m_attributes[offset + const.ATTRIBUTE_IX_VALUE_TYPE]
        if valueType == const.TYPE_STRING:
            valueString = self.m_attributes[
                offset + const.ATTRIBUTE_IX_VALUE_STRING]
            return self.sb.getString(valueString)
        # WIP
        return ""
예제 #4
0
    def __init__(self, raw_buff):
        self.analyzed = False
        self.buff = bytecode.BuffHandle(raw_buff)
        # print "SIZE", hex(self.buff.size())

        self.header = ARSCHeader(self.buff)
        self.packageCount = unpack('<i', self.buff.read(4))[0]

        # print hex(self.packageCount)

        self.stringpool_main = StringBlock(self.buff)

        self.next_header = ARSCHeader(self.buff)
        self.packages = {}
        self.values = {}

        for i in _range(0, self.packageCount):
            current_package = ARSCResTablePackage(self.buff)
            package_name = current_package.get_name()

            self.packages[package_name] = []

            mTableStrings = StringBlock(self.buff)
            mKeyStrings = StringBlock(self.buff)

            # self.stringpool_main.show()
            # self.mTableStrings.show()
            # self.mKeyStrings.show()

            self.packages[package_name].append(current_package)
            self.packages[package_name].append(mTableStrings)
            self.packages[package_name].append(mKeyStrings)

            pc = PackageContext(current_package, self.stringpool_main,
                                mTableStrings, mKeyStrings)

            current = self.buff.get_idx()
            while not self.buff.end():
                header = ARSCHeader(self.buff)
                self.packages[package_name].append(header)

                if header.type == const.RES_TABLE_TYPE_SPEC_TYPE:
                    self.packages[package_name].append(
                        ARSCResTypeSpec(self.buff, pc))

                elif header.type == const.RES_TABLE_TYPE_TYPE:
                    a_res_type = ARSCResType(self.buff, pc)
                    self.packages[package_name].append(a_res_type)

                    entries = []
                    for i in _range(0, a_res_type.entryCount):
                        current_package.mResId = \
                            current_package.mResId & 0xffff0000 | i
                        entries.append((unpack('<i', self.buff.read(4))[0],
                                        current_package.mResId))

                    self.packages[package_name].append(entries)

                    for entry, res_id in entries:
                        if self.buff.end():
                            break

                        if entry != -1:
                            ate = ARSCResTableEntry(self.buff, res_id, pc)
                            self.packages[package_name].append(ate)

                elif header.type == const.RES_TABLE_PACKAGE_TYPE:
                    break
                else:
                    warn("unknown type")
                    break

                current += header.size
                self.buff.set_idx(current)
예제 #5
0
class AXMLParser(object):
    def __init__(self, raw_buff):
        self.reset()

        self.valid_axml = True
        self.axml_tampered = False
        self.packerwarning = False
        self.buff = bytecode.BuffHandle(raw_buff)

        axml_file, = unpack('<L', self.buff.read(4))

        if axml_file != const.CHUNK_AXML_FILE:
            # It looks like the header is wrong.
            # need some other checks.
            # We noted, that a some of files start with 0x0008NNNN,
            # where NNNN is some random number

            if axml_file >> 16 == 0x0008:
                self.axml_tampered = True
                warn(
                    "AXML file has an unusual header, most malwares like "
                    "doing such stuff to anti androguard! But we try to parse "
                    "it anyways. Header: 0x{:08x}".format(axml_file))
            else:
                self.valid_axml = False
                warn(
                    "Not a valid AXML file. Header 0x{:08x}".format(axml_file))
                return

        # Next is the filesize
        self.filesize, = unpack('<L', self.buff.read(4))
        assert self.filesize <= self.buff.size(), (
            "Declared filesize does not match real size: {} vs {}".format(
                self.filesize, self.buff.size()))

        # Now we parse the STRING POOL
        header = arscutil.ARSCHeader(
            self.buff)  # read 8 byte=header+chunk_size
        assert header.type == const.RES_STRING_POOL_TYPE, (
            "Expected String Pool header, got %x" % header.type)

        self.sb = StringBlock(self.buff, header)

        self.m_resourceIDs = []
        self.m_prefixuri = {}
        self.m_uriprefix = defaultdict(list)
        # Contains a list of current prefix/uri pairs
        self.m_prefixuriL = []
        # Store which namespaces are already printed
        self.visited_ns = []

    def is_valid(self):
        return self.valid_axml

    def reset(self):
        self.m_event = -1
        self.m_lineNumber = -1
        self.m_name = -1
        self.m_namespaceUri = -1
        self.m_attributes = []
        self.m_idAttribute = -1
        self.m_classAttribute = -1
        self.m_styleAttribute = -1

    def __next__(self):
        self.doNext()
        return self.m_event

    def doNext(self):
        if self.m_event == const.END_DOCUMENT:
            return

        event = self.m_event

        self.reset()
        while True:
            chunkType = -1
            # General notes:
            # * chunkSize is from start of chunk, including the tag type

            # Fake END_DOCUMENT event.
            if event == const.END_TAG:
                pass

            # START_DOCUMENT
            if event == const.START_DOCUMENT:
                chunkType = const.CHUNK_XML_START_TAG
            else:
                # Stop at the declared filesize or at the end of the file
                if self.buff.end() or self.buff.get_idx() == self.filesize:
                    self.m_event = const.END_DOCUMENT
                    break
                chunkType = unpack('<L', self.buff.read(4))[0]

            # Parse ResourceIDs. This chunk is after the String section
            if chunkType == const.CHUNK_RESOURCEIDS:
                chunkSize = unpack('<L', self.buff.read(4))[0]

                # Check size: < 8 bytes mean that the chunk is not complete
                # Should be aligned to 4 bytes.
                if chunkSize < 8 or chunkSize % 4 != 0:
                    warn("Invalid chunk size in chunk RESOURCEIDS")

                for i in range(0, (chunkSize // 4) - 2):
                    self.m_resourceIDs.append(
                        unpack('<L', self.buff.read(4))[0])

                continue

            # FIXME, unknown chunk types might cause problems
            if chunkType < const.CHUNK_XML_FIRST or \
                    chunkType > const.CHUNK_XML_LAST:
                warn("invalid chunk type 0x{:08x}".format(chunkType))

            # Fake START_DOCUMENT event.
            if chunkType == const.CHUNK_XML_START_TAG and event == -1:
                self.m_event = const.START_DOCUMENT
                break

            # After the chunk_type, there are always 3 fields for the remaining
            # tags we need to parse:
            # Chunk Size (we do not need it)
            # TODO for sanity checks, we should use it and check if the chunks
            # are correct in size
            self.buff.read(4)
            # Line Number
            self.m_lineNumber = unpack('<L', self.buff.read(4))[0]
            # Comment_Index (usually 0xFFFFFFFF, we do not need it)
            self.buff.read(4)

            # Now start to parse the field

            # There are five (maybe more) types of Chunks:
            # * START_NAMESPACE
            # * END_NAMESPACE
            # * START_TAG
            # * END_TAG
            # * TEXT
            if chunkType == const.CHUNK_XML_START_NAMESPACE or \
                    chunkType == const.CHUNK_XML_END_NAMESPACE:
                if chunkType == const.CHUNK_XML_START_NAMESPACE:
                    prefix = unpack('<L', self.buff.read(4))[0]
                    uri = unpack('<L', self.buff.read(4))[0]

                    # FIXME We will get a problem here, if the same uri is used
                    # with different prefixes!
                    # prefix --> uri is a 1:1 mapping
                    self.m_prefixuri[prefix] = uri
                    # but uri --> prefix is a 1:n mapping!
                    self.m_uriprefix[uri].append(prefix)
                    self.m_prefixuriL.append((prefix, uri))
                    self.ns = uri

                    # Workaround for closing tags
                    if (uri, prefix) in self.visited_ns:
                        self.visited_ns.remove((uri, prefix))
                else:
                    self.ns = -1
                    # END_PREFIX contains again prefix and uri field
                    prefix, = unpack('<L', self.buff.read(4))
                    uri, = unpack('<L', self.buff.read(4))

                    # We can then remove those from the prefixuriL
                    if (prefix, uri) in self.m_prefixuriL:
                        self.m_prefixuriL.remove((prefix, uri))

                    # We also remove the entry from prefixuri and uriprefix:
                    if prefix in self.m_prefixuri:
                        del self.m_prefixuri[prefix]
                    if uri in self.m_uriprefix:
                        self.m_uriprefix[uri].remove(prefix)
                    # Need to remove them from visisted namespaces as well, as it might pop up later
                    # FIXME we need to remove it also if we leave a tag which closes it namespace
                    # Workaround for now: remove it on a START_NAMESPACE tag
                    if (uri, prefix) in self.visited_ns:
                        self.visited_ns.remove((uri, prefix))

                    else:
                        warn("Reached a NAMESPACE_END without having the "
                             "namespace stored before? Prefix ID: {}, URI ID: "
                             "{}".format(prefix, uri))

                continue

            # START_TAG is the start of a new tag.
            if chunkType == const.CHUNK_XML_START_TAG:
                # The TAG consists of some fields:
                # * (chunk_size, line_number, comment_index - we read before)
                # * namespace_uri
                # * name
                # * flags
                # * attribute_count
                # * class_attribute
                # After that, there are two lists of attributes, 20 bytes each

                self.m_namespaceUri = unpack('<L', self.buff.read(4))[0]
                self.m_name = unpack('<L', self.buff.read(4))[0]

                # FIXME
                self.buff.read(4)  # flags

                attributeCount = unpack('<L', self.buff.read(4))[0]
                self.m_idAttribute = (attributeCount >> 16) - 1
                attributeCount = attributeCount & 0xFFFF
                self.m_classAttribute = unpack('<L', self.buff.read(4))[0]
                self.m_styleAttribute = (self.m_classAttribute >> 16) - 1

                self.m_classAttribute = (self.m_classAttribute & 0xFFFF) - 1

                # Now, we parse the attributes.
                # Each attribute has 5 fields of 4 byte
                for i in range(0, attributeCount * const.ATTRIBUTE_LENGHT):
                    # Each field is linearly parsed into the array
                    self.m_attributes.append(
                        unpack('<L', self.buff.read(4))[0])

                # Then there are class_attributes
                for i in range(const.ATTRIBUTE_IX_VALUE_TYPE,
                               len(self.m_attributes), const.ATTRIBUTE_LENGHT):
                    self.m_attributes[i] = self.m_attributes[i] >> 24

                self.m_event = const.START_TAG
                break

            if chunkType == const.CHUNK_XML_END_TAG:
                self.m_namespaceUri = unpack('<L', self.buff.read(4))[0]
                self.m_name = unpack('<L', self.buff.read(4))[0]
                self.m_event = const.END_TAG
                break

            if chunkType == const.CHUNK_XML_TEXT:
                # TODO we do not know what the TEXT field does...
                self.m_name = unpack('<L', self.buff.read(4))[0]

                # FIXME
                # Raw_value
                self.buff.read(4)
                # typed_value, is an enum
                self.buff.read(4)

                self.m_event = const.TEXT
                break

    def getPrefixByUri(self, uri):
        # As uri --> prefix is 1:n mapping,
        # We will just return the first one we match.
        if uri not in self.m_uriprefix:
            return -1
        else:
            if len(self.m_uriprefix[uri]) == 0:
                return -1
            return self.m_uriprefix[uri][0]

    def getPrefix(self):
        # The default is, that the namespaceUri is 0xFFFFFFFF
        # Then we know, there is none
        if self.m_namespaceUri == 0xFFFFFFFF:
            return u''

        # FIXME this could be problematic. Need to find the correct namespace prefix
        if self.m_namespaceUri in self.m_uriprefix:
            candidate = self.m_uriprefix[self.m_namespaceUri][0]
            try:
                return self.sb.getString(candidate)
            except KeyError:
                return u''
        else:
            return u''

    def getName(self):
        if self.m_name == -1 or (self.m_event != const.START_TAG
                                 and self.m_event != const.END_TAG):
            return u''

        return self.sb.getString(self.m_name)

    def getText(self):
        if self.m_name == -1 or self.m_event != const.TEXT:
            return u''

        return self.sb.getString(self.m_name)

    def getNamespacePrefix(self, pos):
        prefix = self.m_prefixuriL[pos][0]
        return self.sb.getString(prefix)

    def getNamespaceUri(self, pos):
        uri = self.m_prefixuriL[pos][1]
        return self.sb.getString(uri)

    def getXMLNS(self):
        buff = ""
        for prefix, uri in self.m_prefixuri.items():
            if (uri, prefix) not in self.visited_ns:
                prefix_str = self.sb.getString(prefix)
                prefix_uri = self.sb.getString(self.m_prefixuri[prefix])
                # FIXME Packers like Liapp use empty uri to fool XML Parser
                # FIXME they also mess around with the Manifest, thus it can not be parsed easily
                if prefix_uri == '':
                    warn("Empty Namespace URI for Namespace {}.".format(
                        prefix_str))
                    self.packerwarning = True

                # if prefix is (null), which is indicated by an empty str, then do not print :
                if prefix_str != '':
                    prefix_str = ":" + prefix_str
                buff += 'xmlns{}="{}"\n'.format(prefix_str, prefix_uri)
                self.visited_ns.append((uri, prefix))
        return buff

    def getNamespaceCount(self, pos):
        pass

    def getAttributeOffset(self, index):
        # FIXME
        if self.m_event != const.START_TAG:
            warn("Current event is not START_TAG.")

        offset = index * 5
        # FIXME
        if offset >= len(self.m_attributes):
            warn("Invalid attribute index")

        return offset

    def getAttributeCount(self):
        if self.m_event != const.START_TAG:
            return -1

        return len(self.m_attributes) // const.ATTRIBUTE_LENGHT

    def getAttributePrefix(self, index):
        offset = self.getAttributeOffset(index)
        uri = self.m_attributes[offset + const.ATTRIBUTE_IX_NAMESPACE_URI]

        prefix = self.getPrefixByUri(uri)

        if prefix == -1:
            return ""

        return self.sb.getString(prefix)

    def getAttributeName(self, index):
        offset = self.getAttributeOffset(index)
        name = self.m_attributes[offset + const.ATTRIBUTE_IX_NAME]

        if name == -1:
            return ""

        res = self.sb.getString(name)
        # If the result is a (null) string, we need to look it up.
        if not res:
            attr = self.m_resourceIDs[name]
            if attr in public.SYSTEM_RESOURCES['attributes']['inverse']:
                res = 'android:' + public.SYSTEM_RESOURCES['attributes'][
                    'inverse'][attr]
            else:
                # Attach the HEX Number, so for multiple missing attributes we do not run
                # into problems.
                res = 'android:UNKNOWN_SYSTEM_ATTRIBUTE_{:08x}'.format(attr)

        return res

    def getAttributeValueType(self, index):
        offset = self.getAttributeOffset(index)
        return self.m_attributes[offset + const.ATTRIBUTE_IX_VALUE_TYPE]

    def getAttributeValueData(self, index):
        offset = self.getAttributeOffset(index)
        return self.m_attributes[offset + const.ATTRIBUTE_IX_VALUE_DATA]

    def getAttributeValue(self, index):
        """
        This function is only used to look up strings
        All other work is made by format_value
        # FIXME should unite those functions
        :param index:
        :return:
        """
        offset = self.getAttributeOffset(index)
        valueType = self.m_attributes[offset + const.ATTRIBUTE_IX_VALUE_TYPE]
        if valueType == const.TYPE_STRING:
            valueString = self.m_attributes[offset +
                                            const.ATTRIBUTE_IX_VALUE_STRING]
            return self.sb.getString(valueString)
        return ""
예제 #6
0
    def __init__(self, raw_buff):
        self.analyzed = False
        self._resolved_strings = None
        self.buff = bytecode.BuffHandle(raw_buff)

        self.header = ARSCHeader(self.buff)
        self.packageCount = unpack('<i', self.buff.read(4))[0]

        self.packages = {}
        self.values = {}
        self.resource_values = collections.defaultdict(collections.defaultdict)
        self.resource_configs = collections.defaultdict(
            lambda: collections.defaultdict(set))
        self.resource_keys = collections.defaultdict(
            lambda: collections.defaultdict(collections.defaultdict))
        self.stringpool_main = None

        # skip to the start of the first chunk
        self.buff.set_idx(self.header.start + self.header.header_size)

        data_end = self.header.start + self.header.size

        while self.buff.get_idx() <= data_end - ARSCHeader.SIZE:
            res_header = ARSCHeader(self.buff)

            if res_header.start + res_header.size > data_end:
                # this inner chunk crosses the boundary of the table chunk
                break

            if res_header.type == const.RES_STRING_POOL_TYPE and not self.stringpool_main:
                self.stringpool_main = StringBlock(self.buff, res_header)

            elif res_header.type == const.RES_TABLE_PACKAGE_TYPE:
                assert len(
                    self.packages
                ) < self.packageCount, "Got more packages than expected"

                current_package = ARSCResTablePackage(self.buff, res_header)
                package_name = current_package.get_name()
                package_data_end = res_header.start + res_header.size

                self.packages[package_name] = []

                # After the Header, we have the resource type symbol table
                self.buff.set_idx(current_package.header.start +
                                  current_package.typeStrings)
                type_sp_header = ARSCHeader(self.buff)
                assert type_sp_header.type == const.RES_STRING_POOL_TYPE, \
                    "Expected String Pool header, got %x" % type_sp_header.type
                mTableStrings = StringBlock(self.buff, type_sp_header)

                # Next, we should have the resource key symbol table
                self.buff.set_idx(current_package.header.start +
                                  current_package.keyStrings)
                key_sp_header = ARSCHeader(self.buff)
                assert key_sp_header.type == const.RES_STRING_POOL_TYPE, \
                    "Expected String Pool header, got %x" % key_sp_header.type
                mKeyStrings = StringBlock(self.buff, key_sp_header)

                # Add them to the dict of read packages
                self.packages[package_name].append(current_package)
                self.packages[package_name].append(mTableStrings)
                self.packages[package_name].append(mKeyStrings)

                pc = PackageContext(current_package, self.stringpool_main,
                                    mTableStrings, mKeyStrings)

                # skip to the first header in this table package chunk
                # FIXME is this correct? We have already read the first two sections!
                # self.buff.set_idx(res_header.start + res_header.header_size)
                # this looks more like we want: (???)
                self.buff.set_idx(res_header.start + res_header.header_size +
                                  type_sp_header.size + key_sp_header.size)

                # Read all other headers
                while self.buff.get_idx(
                ) <= package_data_end - ARSCHeader.SIZE:
                    pkg_chunk_header = ARSCHeader(self.buff)
                    log.debug("Found a header: {}".format(pkg_chunk_header))
                    if pkg_chunk_header.start + pkg_chunk_header.size > package_data_end:
                        # we are way off the package chunk; bail out
                        break

                    self.packages[package_name].append(pkg_chunk_header)

                    if pkg_chunk_header.type == const.RES_TABLE_TYPE_SPEC_TYPE:
                        self.packages[package_name].append(
                            ARSCResTypeSpec(self.buff, pc))

                    elif pkg_chunk_header.type == const.RES_TABLE_TYPE_TYPE:
                        a_res_type = ARSCResType(self.buff, pc)
                        self.packages[package_name].append(a_res_type)
                        self.resource_configs[package_name][a_res_type].add(
                            a_res_type.config)

                        log.debug("Config: {}".format(a_res_type.config))

                        entries = []
                        for i in range(0, a_res_type.entryCount):
                            current_package.mResId = current_package.mResId & 0xffff0000 | i
                            entries.append((unpack('<i', self.buff.read(4))[0],
                                            current_package.mResId))

                        self.packages[package_name].append(entries)

                        for entry, res_id in entries:
                            if self.buff.end():
                                break

                            if entry != -1:
                                ate = ARSCResTableEntry(self.buff, res_id, pc)
                                self.packages[package_name].append(ate)
                                if ate.is_weak():
                                    # FIXME we are not sure how to implement the FLAG_WEAk!
                                    # We saw the following: There is just a single Res_value after the ARSCResTableEntry
                                    # and then comes the next ARSCHeader.
                                    # Therefore we think this means all entries are somehow replicated?
                                    # So we do some kind of hack here. We set the idx to the entry again...
                                    # Now we will read all entries!
                                    # Not sure if this is a good solution though
                                    self.buff.set_idx(ate.start)
                    elif pkg_chunk_header.type == const.RES_TABLE_LIBRARY_TYPE:
                        log.warning(
                            "RES_TABLE_LIBRARY_TYPE chunk is not supported")
                    else:
                        # silently skip other chunk types
                        pass

                    # skip to the next chunk
                    self.buff.set_idx(pkg_chunk_header.start +
                                      pkg_chunk_header.size)

            # move to the next resource chunk
            self.buff.set_idx(res_header.start + res_header.size)
예제 #7
0
    def __init__(self, raw_buff):
        self._reset()

        self._valid = True
        self.axml_tampered = False
        self.buff = bytecode.BuffHandle(raw_buff)

        # Minimum is a single ARSCHeader, which would be a strange edge case...
        if self.buff.size() < 8:
            log.error(
                "Filesize is too small to be a valid AXML file! Filesize: {}".
                format(self.buff.size()))
            self._valid = False
            return

        # This would be even stranger, if an AXML file is larger than 4GB...
        # But this is not possible as the maximum chunk size is a unsigned 4 byte int.
        if self.buff.size() > 0xFFFFFFFF:
            log.error(
                "Filesize is too large to be a valid AXML file! Filesize: {}".
                format(self.buff.size()))
            self._valid = False
            return

        try:
            axml_header = ARSCHeader(self.buff)
        except AssertionError as e:
            log.error("Error parsing first resource header: %s", e)
            self._valid = False
            return

        self.filesize = axml_header.size

        if axml_header.header_size == 28024:
            # Can be a common error: the file is not an AXML but a plain XML
            # The file will then usually start with '<?xm' / '3C 3F 78 6D'
            log.warning(
                "Header size is 28024! Are you trying to parse a plain XML file?"
            )

        if axml_header.header_size != 8:
            log.error("This does not look like an AXML file. "
                      "header size does not equal 8! header size = {}".format(
                          axml_header.header_size))
            self._valid = False
            return

        if self.filesize > self.buff.size():
            log.error(
                "This does not look like an AXML file. "
                "Declared filesize does not match real size: {} vs {}".format(
                    self.filesize, self.buff.size()))
            self._valid = False
            return

        if self.filesize < self.buff.size():
            # The file can still be parsed up to the point where the chunk should end.
            self.axml_tampered = True
            log.warning(
                "Declared filesize ({}) is smaller than total file size ({}). "
                "Was something appended to the file? Trying to parse it anyways."
                .format(self.filesize, self.buff.size()))

        # Not that severe of an error, we have plenty files where this is not
        # set correctly
        if axml_header.type != const.RES_XML_TYPE:
            self.axml_tampered = True
            log.warning("AXML file has an unusual resource type! "
                        "Malware likes to to such stuff to anti androguard! "
                        "But we try to parse it anyways. "
                        "Resource Type: 0x{:04x}".format(axml_header.type))

        # Now we parse the STRING POOL
        try:
            header = ARSCHeader(self.buff)
        except AssertionError as e:
            log.error("Error parsing resource header of string pool: %s", e)
            self._valid = False
            return

        if header.header_size != 0x1C:
            log.error(
                "This does not look like an AXML file. String chunk header "
                "size does not equal 28! header size = {}".format(
                    header.header_size))
            self._valid = False
            return

        if header.type != const.RES_STRING_POOL_TYPE:
            log.error(
                "Expected String Pool header, got resource type 0x{:04x} "
                "instead".format(header.type))
            self._valid = False
            return

        self.sb = StringBlock(self.buff, header)

        # Stores resource ID mappings, if any
        self.m_resourceIDs = []

        # Store a list of prefix/uri mappings encountered
        self.namespaces = []