Esempio n. 1
0
    def find_template(self, inst, hi):
        """Find the template for an instruction value.

        Args:
            inst (int): Actual instruction value.
            hi (HexagonInstruction): Instruction object where the `template` attribute is set
                to the value of the found (if any) template.

        Returns:
            bool: True if a template was found; False otherwise.

        TODOs:
            * Improve performance.

        """
        template_sources = []  # type: List[InstructionTemplate]
        if hi.is_duplex:
            template_sources = self.duplex_templates
        else:
            template_sources = self.segmented_inst_templates[extract_bits(
                inst, 31, 28)]

        i = 0
        template_sources_len = len(template_sources)
        # Length precomputed to improve performance.

        while i < template_sources_len:
            template = template_sources[i]

            # TODO: A while is used instead of a ``for ... in enumerate`` because I'm not sure I can
            # modify the list being enumerated (see below).

            if inst & template.encoding.mask == template.encoding.value:
                hi.template = template
                # Found a template match.

                # TODO: Small hack to partially reorder the list by most found, one swap at time, should be improved.
                if i != 0:
                    template_sources[i], template_sources[
                        i - 1] = template_sources[i - 1], template_sources[i]

                return True

            i += 1

        return False
Esempio n. 2
0
    def find_template(self, inst, hi):
        """Find the template for an instruction value.

        Args:
            inst (int): Actual instruction value.
            hi (HexagonInstruction): Instruction object where the `template` attribute is set
                to the value of the found (if any) template.

        Returns:
            bool: True if a template was found; False otherwise.

        TODOs:
            * Improve performance.

        """
        template_sources = [] # type: List[InstructionTemplate]
        if hi.is_duplex:
            template_sources = self.duplex_templates
        else:
            template_sources = self.segmented_inst_templates[extract_bits(inst, 31, 28)]
            
        i = 0
        template_sources_len = len(template_sources)
        # Length precomputed to improve performance.

        while i < template_sources_len:
            template = template_sources[i]

            # TODO: A while is used instead of a ``for ... in enumerate`` because I'm not sure I can
            # modify the list being enumerated (see below).
            
            if inst & template.encoding.mask == template.encoding.value:
                hi.template = template
                # Found a template match.

                # TODO: Small hack to partially reorder the list by most found, one swap at time, should be improved.
                if i != 0:
                    template_sources[i], template_sources[i - 1] = template_sources[i - 1], template_sources[i]
                    
                return True
            
            i += 1
        
        return False
Esempio n. 3
0
    def process_constant_extender(self, hi):
        """Process (if exists) a constant extender from the previous instruction, and apply it to this one.

        If the previous instruction was a constant extender (``immext``), it has to be
        applied to one of the immediate operands of this instruction. Which one of the
        immediate operands it has to be applied to depends on the type of the instruction,
        as specified in Table 10-10.

        To avoid coding all the information of that table inside this function some
        simplifications have been applied. First, if the instruction has only one
        immediate operand, then it has to be applied to that one. Second, the
        ``HexagonInstructionDecoder``, in ``resolve_constant_extender``, takes advantage
        of the behavior of the instruction (``apply_extension`` function in the
        instruction's behavior) to infer which operand the extension applies.

        Note (from the manual): "When constant extenders are used, scaled immediates are
        not scaled by the processor. Instead, the assembler must encode the full 32-bit
        unscaled value."

        Args:
            hi (HexagonInstruction): Current instruction being disassembled.

        Returns:
            None: the extension is applied to the HexagonInstruction itself.

        """
        if self.curr_packet.n_inst() < 2:
            # There has to be at least 2 instructions in the packet so far to apply a constant
            # extension, the ``immext`` and the following instruction to apply it to.
            return

        if self.curr_packet.get_before_last_inst().immext is None:
            # Previous instruction was not a constant extender.
            return

        if len(hi.imm_ops) == 0:
            raise UnknownInstructionException(
                "Previous instruction was an 'immext', but current instruction doesn't have "
                "any immediate operands to apply the extension to.")

        if len(hi.imm_ops) > 2:
            raise UnknownInstructionException(
                "Instruction has more than 2 immediate operands ({:d}). No instruction "
                "studied so far has been observed to have more than that, this is probably "
                "an error from the parsing/decoding stages.".format(
                    len(hi.imm_ops)))
            # Although having more than 2 imm. ops. impacts the logic of this function,
            # the check should be done prior to the disassembling stage.
            # TODO: Move this check to a more adequate function, maybe in the decoding stage.

        extension_target = None  # type: InstructionImmediate
        # The immediate operand to which the constant extension will be applied.

        if len(hi.imm_ops) == 1:
            extension_target = hi.imm_ops[0]
            # If there is only one immediate operand, then this is the one to be extended.

        elif hi.template.imm_ext_op:
            extension_target = hi.get_real_operand(hi.template.imm_ext_op)
            # Two imm. operands, rely on the `imm_ext_op` indicator generated by the decoder.

        else:
            extension_target = hi.imm_ops[0]
            # The decoder couldn't figure out which of the two imm. op. the
            # extension applies to. Arbitrarily, decide to apply it to the
            # first one.
            # This case shouldn't be happening, there should always be a call
            # to ``apply_extension`` in the behavior of an instruction whose imm.
            # op. can be extended.
            # TODO: Log this case if it happens.

        extension_target.value = (
            self.curr_packet.get_before_last_inst().immext
            | extract_bits(extension_target.field_value, 5, 0))
        # When an immediate value is being extended, just the lower 6 bits of its original value
        # remain, the rest are taken from the constant extender (`immext`). The `immext` value
        # has already been left shifted 6 positions.

        extension_target.is_extended = True

        return
Esempio n. 4
0
    def disasm_one_inst(self, inst, addr=0):
        """Disassemble one instruction value interpreted as an unsigned int.

        Args:
            inst (int): Actual instruction value.
            addr (Optional[int]): Address of the instruction being disassembled (used for
                packet processing purposes).

        Returns:
            HexagonInstruction: disassembled instruction.

        TODOs:
            * Define the input type, for now I it's an unsigned int with the endianness (little endiand) resolved.

        """
        if not isinstance(inst, int):
            raise UnexpectedException()

        if inst < 0 or inst > 0xFFFFFFFF:
            raise UnexpectedException()

        hi = HexagonInstruction()
        hi.addr = addr

        self.process_packet_info(hi, inst)

        hi.is_duplex = (hi.parse_bits == 0b00)

        if extract_bits(inst, 31, 28) == 0 and hi.is_duplex == False:

            # Constant extender instruction, extract the extension value:
            # bits 27:16 | 13:0, joined and moved to the upper 26 bits.

            hi.immext = (extract_bits(inst, 27, 16) << 14) | extract_bits(
                inst, 13, 0)
            hi.immext <<= 6
            # TODO: Move to a separate function.

        else:
            # Not a constant extender function. Search available templates for a match.
            if self.find_template(inst, hi):
                self.generate_instruction_operands(inst, hi)

        packet_prefix = '{ ' if hi.start_packet else '  '
        hi.text += packet_prefix

        try:
            hi.text += self.generate_inst_text(hi)
            # TODO: Move all str manipulation to `generate_inst_text` function? The nice thing of the
            # current arrangement is the exception catch, where I can have an unknown with {}
            # (i.e., ``{ <unknown> }``) even if the disassembly failed.

        except UnknownInstructionException as e:
            hi.text += "<unknown>"
            hi.is_unknown = True

        if hi.end_packet:
            hi.text += ' }'
            # Even if the instruction is unknown, the parity bits analysis is
            # still valid, so the start/end packet settings stand, e.g.,
            # ``{ <unknown> }`` is a valid text output.

        self.process_endloops(hi)
        if 0 in hi.endloop:
            hi.text += ':endloop0'

        return hi
Esempio n. 5
0
    def process_packet_info(self, hi, inst):
        """Process packet information.

        Keeping track of all the instructions in the packet is necessary as many
        instructions depend on previous ones (e.g., constant extenders), and this
        dependency is only limited to the packet: all the information needed to
        correctly disassemble the instructions is in the packet itself.

        The disassembler is designed to be used in sequential mode, disassembling
        all the instructions in the same packet one after the other. A single instruction
        can't be correctly analyzed outside that scope (although IDA analysis sometimes
        does that).

        During a packet disassembly, if an instruction from a different packet is
        disassembled (calling `disasm_one_inst`) all the current packet information
        is lost. All the instructions of a single packet have to be disassembled in
        continuous order.

        Args:
            hi (HexagonInstruction): Current instruction being disassembled.
            inst (int): Actual instruction value.

        Returns:
            None

        TODOs:
            * Review and move part of this docstring to the project documentation.

            * Remove the `inst` argument once it is added to the HexagonInstruction class.

        """

        # Check if a new packet is being disassembled, either because:
        #   1. This is the first ever instruction being disassembled (i.e.,
        #       ``curr_packet`` is None).
        #   2. The previous (contiguous) instruction was the end of its packet,
        #       therefore this instruction has to start a new one.
        #   3. The previous disassembled instruction is not contiguous (an address
        #       that is not 4 bytes back), so it has to be assumed (for lack of any
        #       other information) that a new packet is being disassembled. There
        #       is no way to know for sure that this instruction is indeed the first one
        #       in the packet (the parse bits only indicate the last, but not the
        #       first instruction), so it's the safest bet (assuming the disassembler
        #       is being correctly used a jump to the middle of tha packet is not allowed).

        if self.curr_packet is None:
            hi.start_packet = True
            # Case 1.

        elif hi.addr - INST_SIZE == self.curr_packet.get_last_inst().addr:
            # There's a continuity in the disassembler use.

            if self.curr_packet.get_last_inst().end_packet:
                hi.start_packet = True
                # Case 2.

            else:
                hi.start_packet = False
                # The current packet continues with this instruction.

        else:
            hi.start_packet = True
            # Case 3.

        if hi.start_packet:
            self.curr_packet = HexagonPacket(hi)
            # If it is the first instruction in the packet it has to be new one.

        else:
            self.curr_packet.add_next_inst(hi)
            # This instruction continues the current packet so it's added to the list.

        hi.packet = self.curr_packet
        # TODO: Maybe there's some overlapping here and I don't need `self.curr_packet`.

        # Check if this instruction is the end of the packet, which is indicated by
        # the PP (parity) bits if their value is:
        #   1. '11' for a normal instruction, signals packet end.
        #   2. '00' signals a duplex instruction, and from the manual: "The duplex
        #       must always appear as the last word in a packet."

        hi.parse_bits = extract_bits(inst, 15, 14)
        if hi.parse_bits in [0b00, 0b11]:
            hi.end_packet = True
        else:
            hi.end_packet = False
        # TODO: Perform two different checks. The normal PP == 11, and `hi.is_duplex` in
        # another if (`is_duplex` has to be set first, which is not happening now).

        return
Esempio n. 6
0
    def fill_in_reg_info(self, reg, hi):
        """Set the register operand value and text format.

        Args:
            reg (InstructionRegister): Target register operand.
            hi (HexagonInstruction): Current instruction being disassembled..

        Returns:
            None: the data is applied to the InstructionRegister itself.

        TODOs:
            * Split in two functions for register pair and single register.

            * And maybe also split in more functions regarding register type, particularly for New-value.

        """
        if reg.template.is_register_pair:

            # Register pair, e.g., R5:4. From the field value determine both
            # register numbers: odd and even.

            if hi.template.mult_inst is False:
                # TODO: It's not clear how the odd/even numbers of a register pair are specified.
                # I'm assuming that if the register field value is odd,
                # then it corresponds to the number of the first register of the pair,
                # if it's even, it's referring to the second number of the pair.
                # The order is always ``R_odd:even`` (odd > even), so the other register
                # number (that is not specified by the field value) is set accordingly
                # to respect this order.

                if reg.field_value % 2 == 0:
                    odd, even = reg.field_value + 1, reg.field_value
                else:
                    odd, even = reg.field_value, reg.field_value - 1

            else:  # Duplex instruction.

                # TODO: Differentiate between duplex and mult_inst (that includes compound).
                # I think this case applies only to the duplex case, so that attribute (and
                # not `mult_inst`) should be tested in the if.

                # Map a field value to a pair or register numbers. Copied from Table 10-3
                # of the manual, as not to make a miss, could be reduced to a formula.
                register_pair_map = {
                    0b000: (1, 0),
                    0b001: (3, 2),
                    0b010: (5, 4),
                    0b011: (7, 6),
                    0b100: (17, 16),
                    0b101: (19, 18),
                    0b110: (21, 20),
                    0b111: (23, 22),
                }

                odd, even = register_pair_map[reg.field_value]

            if self.objdump_compatible:
                reg.name = reg.template.syntax_name.replace(
                    reg.field_char * 2, "{:d}:{:d}".format(odd, even))
            else:
                reg.name = reg.template.syntax_name.replace(
                    reg.field_char * 2,
                    "{:d}:{:s}{:d}".format(odd, reg.template.syntax_name[0],
                                           even))
                # Prefer full register names: "r7:r6" (instead of "r7:6"), to take advantage of the IDA
                # text highlighting feature, to easily spot register references.

            return

        # Single register case.
        # ---------------------

        if reg.template.syntax_name[0] == 'N':
            # From the manual, 10.11 New-value operands: "Instructions that include a new-value
            # register operand specify in their encodings which instruction in the
            # packet has its destination register accessed as the new-value register."
            #
            # In the manual it mentions without a clear definition the terms consumer
            # and producer. I understand the term "producer" as the destination register
            # in a instruction with an assignment (a register to the left of '=').

            producer_distance = extract_bits(reg.field_value, 2,
                                             1)  # type: int
            # From the manual:
            #     Nt[2:1] encodes the distance (in instructions) from the producer to
            #     the consumer, as follows:
            #         Nt[2:1] = 00     // reserved
            #         Nt[2:1] = 01     // producer is +1 instruction ahead of consumer
            #         Nt[2:1] = 10     // producer is +2 instructions ahead of consumer
            #         Nt[2:1] = 11     // producer is +3 instructions ahead of consumer

            if producer_distance == 0:
                raise UnknownInstructionException(
                    "New-value operands with a (invalid) consumer distance of 0 (reserved value)"
                )

            # From the current consumer ('Nt') register, try to find the producer,
            # that is 1-3 instructions behind (in the same packet), "not counting
            # empty slots or constant extenders" (from the manual).
            #
            # I'm not sure what an "empty slot" is, besides maybe a nop, but real
            # cases show that nop is taken into account in the distance, and the
            # only thing that is ignored are constant extenders.

            producer_inst = None  # type: HexagonInstruction
            distance_walked = 0

            for packet_inst in reversed(self.curr_packet.instructions[0:-1]):
                # Walk the packet in reverse order, from the current instruction,
                # containing the consumer register, to the first one.
                # TODO: avoid direct access to 'self.curr_packet.instructions'.

                if packet_inst.immext is None:
                    # Not a constant extender instruction, applies to the distance count.
                    distance_walked += 1

                if distance_walked == producer_distance:
                    producer_inst = packet_inst
                    break

            if producer_inst is None:
                raise UnknownInstructionException(
                    "New-value register operand with a producer distance of {:d} "
                    "doesn't correspond to a producer instruction.".format(
                        producer_distance))

            # It may happen that the disassembler is called for random instruction (i.e.,
            # not in sequential address order), and I don't have the previous instructions
            # of the packet to find the producer.
            # TODO: Is there a better way to handle it than to raise an exception as before?

            # The instruction with the producer register has been found, now capture the
            # name of the producer register name inside that instruction.

            m = re.search(
                r"""
                # Looking for something like: "R14 = ..."

                (             # Open a capture group for the reg. name.
                    r         # The producer register is supposed to be a general
                              # purpose one (Rx). The reg. name is in lowercase (hence
                              # the use of a lower 'r'), converted by populate_syntax.
                    \d{1,2}   # Register number (0-31).
                )             # End of the capture group, only care for the reg. name.
                \s  *
                .?            # Used to cover for cases of compound assignment (e.g.,
                              # '+=', '&=', etc.)
                =             # The producer register has to be the target of an assignment
                              # (i.e., to the left of the '=')
            """, producer_inst.text, re.X)

            # TODO: There may be more than one assignment ('=' in the syntax), if there are multiple instructions.

            if m is None:
                raise UnknownInstructionException(
                    "New-value operand with a producer instruction that is not producing "
                    "a new register operand. The pattern 'Rx = ...' was not found."
                )

            reg.name = reg.template.syntax_name.replace(
                'N' + reg.field_char, m.group(1))
            # Replace the consumer register placeholder 'Nt.new' with the name of the actual
            # producer register, e.g., 'R14', resulting in the reg. name: 'R14.new'.

            return

        # Single register (not a new-value register operand).
        # TODO: The most common case ends up at the end of a very long function.

        reg_number = reg.field_value
        if hi.template.mult_inst:
            # TODO: Check and replace `mult_inst` with `is_duplex`. Those are two different checks
            # (even though it is working like this for unknown reasons).

            # Instruction duplex. Table 10-3: single register case. Field values from 0-7 match
            # exactly to reg. numbers 0-7. Field values from 8-15, on the other hand, match a
            # consecutive number range of 16-23, which is the field value plus 8.

            if reg_number > 7:
                reg_number += 8

        reg.name = reg.template.syntax_name.replace(reg.field_char,
                                                    str(reg_number))

        return
Esempio n. 7
0
    def process_constant_extender(self, hi):
        """Process (if exists) a constant extender from the previous instruction, and apply it to this one.

        If the previous instruction was a constant extender (``immext``), it has to be
        applied to one of the immediate operands of this instruction. Which one of the
        immediate operands it has to be applied to depends on the type of the instruction,
        as specified in Table 10-10.

        To avoid coding all the information of that table inside this function some
        simplifications have been applied. First, if the instruction has only one
        immediate operand, then it has to be applied to that one. Second, the
        ``HexagonInstructionDecoder``, in ``resolve_constant_extender``, takes advantage
        of the behavior of the instruction (``apply_extension`` function in the
        instruction's behavior) to infer which operand the extension applies.

        Note (from the manual): "When constant extenders are used, scaled immediates are
        not scaled by the processor. Instead, the assembler must encode the full 32-bit
        unscaled value."

        Args:
            hi (HexagonInstruction): Current instruction being disassembled.

        Returns:
            None: the extension is applied to the HexagonInstruction itself.

        """
        if self.curr_packet.n_inst() < 2:
            # There has to be at least 2 instructions in the packet so far to apply a constant
            # extension, the ``immext`` and the following instruction to apply it to.
            return

        if self.curr_packet.get_before_last_inst().immext is None:
            # Previous instruction was not a constant extender.
            return

        if len(hi.imm_ops) == 0:
            raise UnknownInstructionException(
                "Previous instruction was an 'immext', but current instruction doesn't have "
                "any immediate operands to apply the extension to."
            )

        if len(hi.imm_ops) > 2:
            raise UnknownInstructionException(
                "Instruction has more than 2 immediate operands ({:d}). No instruction "
                "studied so far has been observed to have more than that, this is probably "
                "an error from the parsing/decoding stages.".format(len(hi.imm_ops))
            )
            # Although having more than 2 imm. ops. impacts the logic of this function,
            # the check should be done prior to the disassembling stage.
            # TODO: Move this check to a more adequate function, maybe in the decoding stage.

        extension_target = None # type: InstructionImmediate
        # The immediate operand to which the constant extension will be applied.

        if len(hi.imm_ops) == 1:
            extension_target = hi.imm_ops[0]
            # If there is only one immediate operand, then this is the one to be extended.

        elif hi.template.imm_ext_op:
            extension_target = hi.get_real_operand(hi.template.imm_ext_op)
            # Two imm. operands, rely on the `imm_ext_op` indicator generated by the decoder.

        else:
            extension_target = hi.imm_ops[0]
            # The decoder couldn't figure out which of the two imm. op. the
            # extension applies to. Arbitrarily, decide to apply it to the
            # first one.
            # This case shouldn't be happening, there should always be a call
            # to ``apply_extension`` in the behavior of an instruction whose imm.
            # op. can be extended.
            # TODO: Log this case if it happens.

        extension_target.value = (
            self.curr_packet.get_before_last_inst().immext |
            extract_bits(extension_target.field_value, 5, 0)
        )
        # When an immediate value is being extended, just the lower 6 bits of its original value
        # remain, the rest are taken from the constant extender (`immext`). The `immext` value
        # has already been left shifted 6 positions.

        extension_target.is_extended = True

        return
Esempio n. 8
0
    def disasm_one_inst(self, inst, addr = 0):
        """Disassemble one instruction value interpreted as an unsigned int.

        Args:
            inst (int): Actual instruction value.
            addr (Optional[int]): Address of the instruction being disassembled (used for
                packet processing purposes).

        Returns:
            HexagonInstruction: disassembled instruction.

        TODOs:
            * Define the input type, for now I it's an unsigned int with the endianness (little endiand) resolved.

        """
        if not isinstance(inst, int):
            raise UnexpectedException()

        if inst < 0 or inst > 0xFFFFFFFF:
            raise UnexpectedException()
        
        hi = HexagonInstruction()
        hi.addr = addr

        self.process_packet_info(hi, inst)

        hi.is_duplex = (hi.parse_bits == 0b00)

        if extract_bits(inst, 31, 28) == 0 and hi.is_duplex == False:

            # Constant extender instruction, extract the extension value:
            # bits 27:16 | 13:0, joined and moved to the upper 26 bits.

            hi.immext = (extract_bits(inst, 27, 16) << 14) | extract_bits(inst, 13, 0)
            hi.immext <<= 6
            # TODO: Move to a separate function.

        else:
            # Not a constant extender function. Search available templates for a match.
            if self.find_template(inst, hi):
                self.generate_instruction_operands(inst, hi)

        packet_prefix = '{ ' if hi.start_packet else '  '
        hi.text += packet_prefix

        try:
            hi.text += self.generate_inst_text(hi)
            # TODO: Move all str manipulation to `generate_inst_text` function? The nice thing of the
            # current arrangement is the exception catch, where I can have an unknown with {}
            # (i.e., ``{ <unknown> }``) even if the disassembly failed.

        except UnknownInstructionException as e:
            hi.text += "<unknown>"
            hi.is_unknown = True

        if hi.end_packet:
            hi.text += ' }'
            # Even if the instruction is unknown, the parity bits analysis is
            # still valid, so the start/end packet settings stand, e.g.,
            # ``{ <unknown> }`` is a valid text output.

        self.process_endloops(hi)
        if 0 in hi.endloop:
            hi.text += ':endloop0'

        return hi
Esempio n. 9
0
    def process_packet_info(self, hi, inst):
        """Process packet information.

        Keeping track of all the instructions in the packet is necessary as many
        instructions depend on previous ones (e.g., constant extenders), and this
        dependency is only limited to the packet: all the information needed to
        correctly disassemble the instructions is in the packet itself.

        The disassembler is designed to be used in sequential mode, disassembling
        all the instructions in the same packet one after the other. A single instruction
        can't be correctly analyzed outside that scope (although IDA analysis sometimes
        does that).

        During a packet disassembly, if an instruction from a different packet is
        disassembled (calling `disasm_one_inst`) all the current packet information
        is lost. All the instructions of a single packet have to be disassembled in
        continuous order.

        Args:
            hi (HexagonInstruction): Current instruction being disassembled.
            inst (int): Actual instruction value.

        Returns:
            None

        TODOs:
            * Review and move part of this docstring to the project documentation.

            * Remove the `inst` argument once it is added to the HexagonInstruction class.

        """

        # Check if a new packet is being disassembled, either because:
        #   1. This is the first ever instruction being disassembled (i.e.,
        #       ``curr_packet`` is None).
        #   2. The previous (contiguous) instruction was the end of its packet,
        #       therefore this instruction has to start a new one.
        #   3. The previous disassembled instruction is not contiguous (an address
        #       that is not 4 bytes back), so it has to be assumed (for lack of any
        #       other information) that a new packet is being disassembled. There
        #       is no way to know for sure that this instruction is indeed the first one
        #       in the packet (the parse bits only indicate the last, but not the
        #       first instruction), so it's the safest bet (assuming the disassembler
        #       is being correctly used a jump to the middle of tha packet is not allowed).

        if self.curr_packet is None:
            hi.start_packet = True
            # Case 1.

        elif hi.addr - INST_SIZE == self.curr_packet.get_last_inst().addr:
            # There's a continuity in the disassembler use.

            if self.curr_packet.get_last_inst().end_packet:
                hi.start_packet = True
                # Case 2.

            else:
                hi.start_packet = False
                # The current packet continues with this instruction.

        else:
            hi.start_packet = True
            # Case 3.

        if hi.start_packet:
            self.curr_packet = HexagonPacket(hi)
            # If it is the first instruction in the packet it has to be new one.

        else:
            self.curr_packet.add_next_inst(hi)
            # This instruction continues the current packet so it's added to the list.

        hi.packet = self.curr_packet
        # TODO: Maybe there's some overlapping here and I don't need `self.curr_packet`.

        # Check if this instruction is the end of the packet, which is indicated by
        # the PP (parity) bits if their value is:
        #   1. '11' for a normal instruction, signals packet end.
        #   2. '00' signals a duplex instruction, and from the manual: "The duplex
        #       must always appear as the last word in a packet."

        hi.parse_bits = extract_bits(inst, 15, 14)
        if hi.parse_bits in [0b00, 0b11]:
            hi.end_packet = True
        else:
            hi.end_packet = False
        # TODO: Perform two different checks. The normal PP == 11, and `hi.is_duplex` in
        # another if (`is_duplex` has to be set first, which is not happening now).

        return
Esempio n. 10
0
    def fill_in_reg_info(self, reg, hi):
        """Set the register operand value and text format.

        Args:
            reg (InstructionRegister): Target register operand.
            hi (HexagonInstruction): Current instruction being disassembled..

        Returns:
            None: the data is applied to the InstructionRegister itself.

        TODOs:
            * Split in two functions for register pair and single register.

            * And maybe also split in more functions regarding register type, particularly for New-value.

        """
        if reg.template.is_register_pair:

            # Register pair, e.g., R5:4. From the field value determine both
            # register numbers: odd and even.

            if hi.template.mult_inst is False:
                # TODO: It's not clear how the odd/even numbers of a register pair are specified.
                # I'm assuming that if the register field value is odd,
                # then it corresponds to the number of the first register of the pair,
                # if it's even, it's referring to the second number of the pair.
                # The order is always ``R_odd:even`` (odd > even), so the other register
                # number (that is not specified by the field value) is set accordingly
                # to respect this order.

                if reg.field_value % 2 == 0:
                    odd, even = reg.field_value + 1, reg.field_value
                else:
                    odd, even = reg.field_value, reg.field_value - 1

            else:  # Duplex instruction.

                # TODO: Differentiate between duplex and mult_inst (that includes compound).
                # I think this case applies only to the duplex case, so that attribute (and
                # not `mult_inst`) should be tested in the if.

                # Map a field value to a pair or register numbers. Copied from Table 10-3
                # of the manual, as not to make a miss, could be reduced to a formula.
                register_pair_map = {
                    0b000: (1, 0),
                    0b001: (3, 2),
                    0b010: (5, 4),
                    0b011: (7, 6),

                    0b100: (17, 16),
                    0b101: (19, 18),
                    0b110: (21, 20),
                    0b111: (23, 22),
                }

                odd, even = register_pair_map[reg.field_value]

            if self.objdump_compatible:
                reg.name = reg.template.syntax_name.replace(
                    reg.field_char * 2,
                    "{:d}:{:d}".format(odd, even)
                )
            else:
                reg.name = reg.template.syntax_name.replace(
                    reg.field_char * 2,
                    "{:d}:{:s}{:d}".format(odd,reg.template.syntax_name[0], even)
                )
                # Prefer full register names: "r7:r6" (instead of "r7:6"), to take advantage of the IDA
                # text highlighting feature, to easily spot register references.

            return

        # Single register case.
        # ---------------------

        if reg.template.syntax_name[0] == 'N':
            # From the manual, 10.11 New-value operands: "Instructions that include a new-value
            # register operand specify in their encodings which instruction in the
            # packet has its destination register accessed as the new-value register."
            #
            # In the manual it mentions without a clear definition the terms consumer
            # and producer. I understand the term "producer" as the destination register
            # in a instruction with an assignment (a register to the left of '=').

            producer_distance = extract_bits(reg.field_value, 2, 1) # type: int
            # From the manual:
            #     Nt[2:1] encodes the distance (in instructions) from the producer to
            #     the consumer, as follows:
            #         Nt[2:1] = 00     // reserved
            #         Nt[2:1] = 01     // producer is +1 instruction ahead of consumer
            #         Nt[2:1] = 10     // producer is +2 instructions ahead of consumer
            #         Nt[2:1] = 11     // producer is +3 instructions ahead of consumer

            if producer_distance == 0:
                raise UnknownInstructionException(
                    "New-value operands with a (invalid) consumer distance of 0 (reserved value)"
                )

            # From the current consumer ('Nt') register, try to find the producer,
            # that is 1-3 instructions behind (in the same packet), "not counting
            # empty slots or constant extenders" (from the manual).
            #
            # I'm not sure what an "empty slot" is, besides maybe a nop, but real
            # cases show that nop is taken into account in the distance, and the
            # only thing that is ignored are constant extenders.

            producer_inst = None # type: HexagonInstruction
            distance_walked = 0

            for packet_inst in reversed(self.curr_packet.instructions[0:-1]):
                # Walk the packet in reverse order, from the current instruction,
                # containing the consumer register, to the first one.
                # TODO: avoid direct access to 'self.curr_packet.instructions'.

                if packet_inst.immext is None:
                    # Not a constant extender instruction, applies to the distance count.
                    distance_walked += 1

                if distance_walked == producer_distance:
                    producer_inst = packet_inst
                    break

            if producer_inst is None:
                raise UnknownInstructionException(
                    "New-value register operand with a producer distance of {:d} "
                    "doesn't correspond to a producer instruction.".format(producer_distance)
                )

            # It may happen that the disassembler is called for random instruction (i.e.,
            # not in sequential address order), and I don't have the previous instructions
            # of the packet to find the producer.
            # TODO: Is there a better way to handle it than to raise an exception as before?

            # The instruction with the producer register has been found, now capture the
            # name of the producer register name inside that instruction.

            m = re.search(r"""
                # Looking for something like: "R14 = ..."

                (             # Open a capture group for the reg. name.
                    r         # The producer register is supposed to be a general
                              # purpose one (Rx). The reg. name is in lowercase (hence
                              # the use of a lower 'r'), converted by populate_syntax.
                    \d{1,2}   # Register number (0-31).
                )             # End of the capture group, only care for the reg. name.
                \s  *
                .?            # Used to cover for cases of compound assignment (e.g.,
                              # '+=', '&=', etc.)
                =             # The producer register has to be the target of an assignment
                              # (i.e., to the left of the '=')
            """, producer_inst.text, re.X)

            # TODO: There may be more than one assignment ('=' in the syntax), if there are multiple instructions.

            if m is None:
                raise UnknownInstructionException(
                    "New-value operand with a producer instruction that is not producing "
                    "a new register operand. The pattern 'Rx = ...' was not found.")

            reg.name = reg.template.syntax_name.replace('N' + reg.field_char, m.group(1))
            # Replace the consumer register placeholder 'Nt.new' with the name of the actual
            # producer register, e.g., 'R14', resulting in the reg. name: 'R14.new'.

            return

        # Single register (not a new-value register operand).
        # TODO: The most common case ends up at the end of a very long function.

        reg_number = reg.field_value
        if hi.template.mult_inst:
            # TODO: Check and replace `mult_inst` with `is_duplex`. Those are two different checks
            # (even though it is working like this for unknown reasons).

            # Instruction duplex. Table 10-3: single register case. Field values from 0-7 match
            # exactly to reg. numbers 0-7. Field values from 8-15, on the other hand, match a
            # consecutive number range of 16-23, which is the field value plus 8.

            if reg_number > 7:
                reg_number += 8

        reg.name = reg.template.syntax_name.replace(reg.field_char, str(reg_number))

        return