Example #1
0
 def _get_type_str(flag):
     if idc.is_byte(flag):
         return "unsigned char"
     elif idc.is_word(flag):
         return "unsigned short"
     elif idc.is_dword(flag):
         return "unsigned int"
     elif idc.is_qword(flag):
         return "unsigned long long"
     else:
         return "unknown"
Example #2
0
    def _getDataDisasm(self):
        """
        You cannot get array data using getdisasm. The disassembly has to be extracted differently.
        This identifies the data in question, and gets its disassembly
        :return: the disasssembly of the data item
        """
        # First, do the easy cases that just work with GetDisasm
        flags = idc.GetFlags(self.ea)
        # TODO: change this so it accounts for arrays also
        if idc.is_enum0(flags):
            return self._filterComments(idc.GetDisasm(self.ea))
        if idc.is_data(flags) and (
                idc.is_byte(flags) and idc.get_item_size(self.ea) == 1
                or idc.is_word(flags) and idc.get_item_size(self.ea) == 2
                or idc.is_dword(flags) and idc.get_item_size(self.ea) == 4):
            # normal case where an int is not misread as a reference
            content = self.getContent()
            if self.getXRefsFrom()[1] and self.isPointer(content):
                disasm = idc.GetDisasm(self.ea)
                contentData = Data(content)
                # If it's a struct member, replace it with its hex, but keep the information
                if '.' in disasm and (';' not in disasm
                                      or '.' in disasm[:disasm.index(';')]):
                    disasm = 'DCD %s+0x%X // %s' % (contentData.getName(),
                                                    content - contentData.ea,
                                                    disasm[len('DCD '):])

            elif ida_bytes.is_manual(flags, 0):
                # Manual forms put in IDA, just grab it. (This is for cases where computations are applied to data)
                disasm = idc.GetDisasm(self.ea)
            else:
                # build the disassembly: this is for none-pointer symbols found in IDA (ex: word_0)
                if idc.is_byte(flags): op = 'DCB'
                elif idc.is_word(flags): op = 'DCW'
                else: op = 'DCD'
                disasm = op + ' ' + '0x%X' % content

            return self._filterComments(disasm)
        else:  # The weird case... an array. I don't know why it's weird. IDA doesn't like it!
            # It is assumed this is an array, but the type is unknown. Imply type based on disasm of first line!
            # analysis on the array is based on the very first line
            disasm = idc.GetDisasm(self.ea)
            if ';' in disasm:
                disasm = disasm[:disasm.index(';')]
            firstLineSplitDisasm = list(filter(None, re.split('[ ,]', disasm)))
            dataType = firstLineSplitDisasm[0]

            return self._getArrDisasm(len(firstLineSplitDisasm) - 1, dataType)
Example #3
0
def registerUncompFile(ea, force=True):
    # type: (int) -> bool
    d = Data.Data(ea)
    compPtr = d.getContent()
    if not idc.is_dword(d.getFlags()) or type(compPtr) == list:
        if not force: return False
        print('[%07X] -> dword' % (ea))
        forceItemOp(ea, idc.create_dword, ea)
        d = Data.Data(ea)
        compPtr = d.getContent()

    # compressed pointers have the 31th bit set
    if not compPtr & (1 << 31):
        return False

    compPtr = compPtr - (1 << 31)

    #  make its content an array, and set a name for it, and a size
    compData = Data.Data(compPtr)
    if compData.ea != compPtr:
        idc.del_items(compData.ea)
        compData = Data.Data(compPtr)
    compSize = getLZ77CompressedSize(compPtr)
    # size must have been identified
    if compSize == -1:
        return False
    if compSize % 4 != 0:
        compSize += 4 - (compSize % 4)  # must be word aligned

    if compData.getSize() != compSize:
        if not idc.del_items(compPtr, compSize):
            for i in range(compPtr, compPtr + compSize):
                idc.del_items(i, 1)
        idc.make_array(compPtr, compSize)

    if not compData.getName():
        compData.setName('comp_%07X' % compData.ea)

    idc.op_man(ea, 0, '%s + 1<<31' % compData.getName())

    # now register the compressed data as its own file
    filename = 'data/compressed/%s.lz77' % compData.getName()
    print('[%07X] addFile %s' % (ea, filename))
    dis = Terminal.DisTerminal()
    dis.addFile(filename, compPtr, compPtr + compSize)

    return True
Example #4
0
 def getTypeName(self):
     # type: () -> str
     """
     :return: the type of the data item, if it's a struct/enum/const, the name of it.
     a number of stars can follow, indicating that it's a pointer.
     """
     type = idc.get_type(self.ea)
     flags = idc.GetFlags(self.ea)
     typeName = "INVALID"
     if idc.isCode(flags):
         typeName = "code"
     elif idc.isData(flags):
         if idc.is_byte(flags) and self.getSize() == 1:
             typeName = "u8"
         elif idc.is_word(flags) and self.getSize() == 2:
             typeName = "u16"
         elif idc.is_dword(flags) and self.getSize() == 4:
             if self.isPointer(self.getContent()):
                 typeName = "void*"
             else:
                 typeName = "u32"
         else:  # The weird case... an array. I don't know why it's weird. IDA doesn't like it!
             # It is assumed this is an array, but the type is unknown. Imply type based on disasm of first line!
             firstLineSplitDisasm = list(
                 filter(None, re.split('[ ,]', idc.GetDisasm(self.ea))))
             dataType = firstLineSplitDisasm[0]
             if dataType == "DCB":
                 typeName = "u8[%d]" % (self.getSize())
             if dataType == "DCW":
                 typeName = "u16[%d]" % (self.getSize() / 2)
             if dataType == "DCD":
                 if self.hasPointer():
                     typeName = "void*[%d]" % (self.getSize() / 4)
                 else:
                     typeName = "u32[%d]" % (self.getSize() / 4)
     elif idc.isUnknown(flags):
         typeName = "u8"
     elif idc.isStruct(flags):
         typeName = idc.GetStrucName
     return typeName
def _read_struct_member_once(ea, flags, size, member_sid, member_size,
                             asobject):
    """Read part of a struct member for _read_struct_member."""
    if idc.is_byte(flags):
        return read_word(ea, 1), 1
    elif idc.is_word(flags):
        return read_word(ea, 2), 2
    elif idc.is_dword(flags):
        return read_word(ea, 4), 4
    elif idc.is_qword(flags):
        return read_word(ea, 8), 8
    elif idc.is_oword(flags):
        return read_word(ea, 16), 16
    elif idc.is_strlit(flags):
        return idc.GetManyBytes(ea, size), size
    elif idc.is_float(flags):
        return idc.Float(ea), 4
    elif idc.is_double(flags):
        return idc.Double(ea), 8
    elif idc.is_struct(flags):
        value = read_struct(ea, sid=member_sid, asobject=asobject)
        return value, member_size
    return None, size
Example #6
0
    def _getDataDisasm(self, ea, elemsPerLine=-1):
        """
        You cannot get array data using getdisasm. The disassembly has to be extracted differently.
        This identifies the data in question, and gets its disassembly
        :param ea: the effective address of the item to get the disassembly of
        :param elemsPerLine: if 0, maximum will be used. if <0, it'll be parsed from the database. otherwise, it's n.
        :return: the disasssembly of the data item
        """
        # First, do the easy cases that just work with GetDisasm
        flags = idc.GetFlags(ea)
        if idc.is_data(flags) and (
                idc.is_byte(flags) and idc.get_item_size(ea) == 1
                or idc.is_word(flags) and idc.get_item_size(ea) == 2
                or idc.is_dword(flags) and idc.get_item_size(ea) == 4):
            # normal case where an int is not misread as a reference
            data = Data(ea)
            content = data.getContent()
            if self.isPointer(content):
                disasm = idc.GetDisasm(ea)  # very simple, this works.
            else:
                # build the disassembly: this is for none-pointer symbols found in IDA (ex: word_0)
                if idc.is_byte(flags): op = 'DCB'
                elif idc.is_word(flags): op = 'DCW'
                else: op = 'DCD'
                disasm = op + ' ' + '0x%X' % content
            return self._filterComments(disasm)
        else:  # The weird case... an array. I don't know why it's weird. IDA doesn't like it!
            # It is assumed this is an array, but the type is unknown. Imply type based on disasm of first line!
            # analysis on the array is based on the very first line
            firstLineSplitDisasm = list(
                filter(None, re.split('[ ,]', idc.GetDisasm(ea))))
            dataType = firstLineSplitDisasm[0]

            # Grab all of the bytes in the array
            arr = self.getContent()

            # determine the number of elements per line, if 0 (default) is specified, then it's parsed instead
            if elemsPerLine < 0:
                commentWords = len(
                    list(filter(None, re.split('[ ,]', self.getComment()))))
                # -1 to not include type, ex: DCB, DCD... But comments can exist on the first line too!
                elemsPerLine = len(firstLineSplitDisasm) - 1 - commentWords
            elif elemsPerLine == 0:  # when specifying 0, all will show in one line!
                elemsPerLine = len(arr)

            # whether to display a name, or data, is determiend by the xrefs from this item!
            xrefs = self.getXRefsFrom()

            # only bother to check for names if it's an array of words
            wordArray = dataType == 'DCD'

            # generate disassembly for array
            disasm = dataType + ' '
            elemIndex = 0
            for elem in arr:
                # tab if new line
                if disasm[-1] == '\n': disasm += '\t%s' % (dataType + ' ')
                # add element and increment counter until new line
                # if it's a pointer and defined as an xref, display its label not just the number
                # TODO: isPointer is a bottleneck call, so prefer to call it last
                if wordArray and (elem in xrefs[1] or elem
                                  in xrefs[0]) and self.isPointer(elem):
                    # TODO: maybe you ahould get the name of Data.Data(elem) also, for +index
                    elemEA = Data(elem).ea
                    name = idc.Name(elemEA)
                    if name:
                        offset = elem - elemEA
                        if offset != 0:
                            offset = '+%d' % offset
                        else:
                            offset = ''
                        disasm += "%s%s, " % (name, offset)
                    else:
                        disasm += '0x%X, ' % elem
                else:
                    disasm += '0x%X, ' % elem

                elemIndex += 1

                # if we reach the number of elements a line, we add a new line
                if elemIndex % elemsPerLine == 0:
                    # replace ", " at the end if present
                    disasm = disasm[len(disasm) -
                                    2:] == ', ' and disasm[:-2] or disasm
                    # advance for the next line
                    disasm += "\n"

            # remove ", " at the end if present
            disasm = disasm[len(disasm) - 2:] == ', ' and disasm[:-2] or disasm
            # remove new line at the end if present
            disasm = disasm[len(disasm) - 1:] == '\n' and disasm[:-1] or disasm

            return disasm
Example #7
0
    def getContent(self, bin=False):
        """
        reads bytes at the EA of the data item and constructs its content representation based on its type
        :param bin: if True, array of bytes is always passed back
        """
        flags = idc.GetFlags(self.ea)
        output = -1

        if idc.isCode(flags):
            # an instruction is also data, its bytes are gathered and combined into one integer
            bytes = []
            for char in idc.get_bytes(self.ea, self.getSize()):
                bytes.append(ord(char))
            # either return one discrete instruction int, or an array of bytes representing it
            if bin:
                output = bytes
            else:
                output = self._combineBytes(bytes, self.getSize())[0]

        elif idc.isStruct(flags):
            pass
        elif idc.isData(flags):
            # normal case, build up a u8, u16, or u32
            if idc.is_data(flags) and (
                    idc.is_byte(flags) and self.getSize() == 1
                    or idc.is_word(flags) and self.getSize() == 2
                    or idc.is_dword(flags) and self.getSize() == 4):
                bytes = []
                for char in idc.get_bytes(self.ea, self.getSize()):
                    bytes.append(ord(char))
                # either return one discrete primitive, or the array of bytes representing it
                if bin:
                    output = bytes
                else:
                    output = self._combineBytes(bytes, self.getSize())[0]
            # The weird case... an array. I don't know why it's weird. IDA doesn't like it!
            else:
                # It is assumed this is an array, but the type is unknown. Imply type based on disasm of first line!
                firstLineSplitDisasm = list(
                    filter(None, re.split('[ ,]', idc.GetDisasm(self.ea))))
                dataType = firstLineSplitDisasm[0]
                elemsPerLine = len(
                    firstLineSplitDisasm
                ) - 1  # don't include type, ex: DCB 0, 4, 5, 0x02, 0

                # Grab all of the bytes in the array
                bytes = []
                for char in idc.get_bytes(self.ea, idc.get_item_size(self.ea)):
                    bytes.append(ord(char))

                # figure out datatype to convert the array to be of
                bytesPerElem = dataType == 'DCB' and 1 \
                               or dataType == 'DCW' and 2 \
                               or dataType == 'DCD' and 4 \
                               or 1  # if type unknown, just show it as a an array of bytes

                # create new array with correct type, or just return the bytes
                if bin:
                    output = bytes
                else:
                    output = self._combineBytes(bytes, bytesPerElem)
        elif idc.isUnknown(flags):
            # unknown data elements are always 1 byte in size!
            output = ord(idc.get_bytes(self.ea, 1))
            if bin: output = [output]
        return output