Ejemplo n.º 1
0
def _codegen_jmp(context: Context, op: Op) -> Tuple[Context, Op]:
    """JMP pseudoinstruction: several underlying variants."""
    op = op._replace(args=parse_args_if_able(
        _PARSE_OPTIONS, context, op, Type.ADDRESS | Type.DEREF_REGISTER
        | Type.DEREF_ADDRESS))
    # Since this pseudoinstruction can produce code of different lengths, we
    # handle updating pos when "not all_args_parsed" in a special way.
    if not all_args_parsed(op.args):
        advance = 4 if op.args[0].argtype & Type.ADDRESS else 2
        return context.advance_by_bytes(advance), op

    # We are branching to an address literal.
    if op.args[0].argtype & Type.ADDRESS:
        _jmpdestcheck(op.args[0])
        op = op._replace(todo=None,
                         hex='D001{:04X}'.format(op.args[0].integer))

    # We are branching to an address stored at a memory location in a register.
    # (To branch to an address inside a register, use RET).
    elif op.args[0].argtype & Type.DEREF_REGISTER:
        _regderefcheck(op.args[0], postcrem_from=0, postcrem_to=0)
        op = op._replace(todo=None, hex='D0{:X}8'.format(op.args[0].integer))

    # We are branching to an address stored at a low memory location.
    else:
        _lowwordaddrcheck(op.args[0])
        op = op._replace(todo=None,
                         hex='20{:02X}'.format(op.args[0].integer // 2))

    return context.advance(op.hex), op
Ejemplo n.º 2
0
 def codegen_immed_to_reg(context: Context, op: Op) -> Tuple[Context, Op]:
     op = op._replace(args=parse_args_if_able(_PARSE_OPTIONS, context, op,
                                              Type.REGISTER, Type.NUMBER))
     if all_args_parsed(op.args):
         _regcheck(op.args[0])
         _bytecheck(op.args[1])
         digits = (nybble, op.args[0].integer, op.args[1].integer % 256)
         op = op._replace(todo=None, hex='{:X}{:X}{:02X}'.format(*digits))
     # We can still update pos whether we've parsed all args or not.
     return context.advance_by_bytes(2), op
Ejemplo n.º 3
0
def _codegen_org(context: Context, op: Op) -> Tuple[Context, Op]:
  """ORG pseudoinstruction: set current output stream position."""
  # Try to parse our one argument. If successful, update our stream position.
  # Otherwise, leaving the op's `todo` unchanged means we'll try again later.
  op = op._replace(args=parse_args_if_able(
      _PARSE_OPTIONS, context, op, Type.ADDRESS))
  if all_args_parsed(op.args):
    op = op._replace(hex='', todo=None)
    context = context._replace(pos=op.args[0].integer)
  return context, op
Ejemplo n.º 4
0
 def codegen_onereg(context: Context, op: Op) -> Tuple[Context, Op]:
     # Both register arguments to this opcode should be parseable.
     op = op._replace(args=parse_args_if_able(_PARSE_OPTIONS, context, op,
                                              Type.REGISTER))
     if all_args_parsed(op.args):
         _regcheck(*op.args)
         digits = (nybble_1, op.args[0].integer, nybble_2)
         op = op._replace(todo=None, hex=template.format(*digits))
     # We can still update pos whether we've parsed all args or not.
     return context.advance_by_bytes(2), op
Ejemplo n.º 5
0
def asmpass_codegen(context: Context, op: Op) -> Tuple[Context, Op]:
    """Attempts to generate binary code from a partially-parsed source code line.

  Except for a few "built-in" opcodes, this function defers code-generation
  to special opcode-specific handlers found in context.codegen. Intuitively,
  these handlers should "do their best" to complete the information in `op` and
  advance the current output position (`context.pos`). If they can do both,
  they should return an updated `op` where `op.todo` is None. In all other
  cases, `op.todo` should be set to this function for another try.

  Args:
    context: current assembler context.
    op: source code line data structure.

  Returns:
    context: updated assembler context.
    op: updated source code line data structure.
  """
    # If there are any labels, attempt to bind them now before an `org` statement
    # sends us packing to another binary location.
    for label in op.labels:
        context = context.bind_label(label)

    # Handle "built-in" opcodes.
    assert op.args is not None
    if op.opcode in ('cpu', '.cpu', 'arch', '.arch'):
        if len(op.args) != 1:
            raise ValueError('The {} pseudo-opcode takes one argument'.format(
                op.opcode.upper()))
        op = op._replace(todo=None)
        context = _switch_arch(op.lineno, op.line, context,
                               op.args[0].stripped)

    # Hand over remaining processing to "architecture specific" code generators.
    else:
        if op.opcode not in context.codegen:
            raise Error(
                op.lineno, op.line,
                'Opcode "{}" not recognised for architecture {}'.format(
                    op.opcode, context.arch))
        context, op = context.codegen[op.opcode](context, op)

    # If we haven't bound all the labels associated with this line of code, then
    # we've got to try generating this line of code again, no matter what the
    # opcode's code-generating handler thinks about it.
    if not all(label in context.labels for label in op.labels):
        op = op._replace(todo=asmpass_codegen)

    # If we haven't got an output location for the hex data generated from this
    # line of code, then we force another pass in that case, too.
    if context.pos is None:
        op = op._replace(todo=asmpass_codegen)

    return context, op
Ejemplo n.º 6
0
def _codegen_ctrl(context: Context, op: Op) -> Tuple[Context, Op]:
    """CTRL instruction."""
    op = op._replace(args=parse_args_if_able(_PARSE_OPTIONS, context, op,
                                             Type.ADDRESS, Type.NUMBER))
    if all_args_parsed(op.args):
        _devcheck(op.args[0])
        _bytecheck(op.args[1])
        digits = (op.args[0].integer, op.args[1].integer % 256)
        op = op._replace(todo=None, hex='1{:X}{:02X}'.format(*digits))
    # We can still update pos whether we've parsed all args or not.
    return context.advance_by_bytes(2), op
Ejemplo n.º 7
0
def _codegen_putb(context: Context, op: Op) -> Tuple[Context, Op]:
    """PUTB instruction."""
    op = op._replace(args=parse_args_if_able(
        _PARSE_OPTIONS, context, op, Type.ADDRESS, Type.DEREF_REGISTER))
    if all_args_parsed(op.args):
        _devcheck(op.args[0])
        _regderefcheck(op.args[1], postcrem_from=-4, postcrem_to=4)
        modifier = _postcrement_to_modifier(op.args[1].postcrement)
        digits = (op.args[0].integer, op.args[1].integer, modifier)
        op = op._replace(todo=None, hex='4{:X}{:X}{:X}'.format(*digits))
    # We can still update pos whether we've parsed all args or not.
    return context.advance_by_bytes(2), op
Ejemplo n.º 8
0
def _codegen_lwi(context: Context, op: Op) -> Tuple[Context, Op]:
    """LWI pseudoinstruction: MOVE RX, (RO)+; DW i."""
    op = op._replace(args=parse_args_if_able(_PARSE_OPTIONS, context, op,
                                             Type.REGISTER, Type.NUMBER))
    if all_args_parsed(op.args):
        _regcheck(op.args[0])
        if not -32767 <= op.args[1].integer <= 65535:
            raise ValueError(
                'Halfword literal {} not in range -32768..65535'.format(
                    op.args[1].stripped))
        digits = (op.args[0].integer, op.args[1].integer % 65536)
        op = op._replace(todo=None, hex='D{:X}01{:04X}'.format(*digits))
    # We can still update pos whether we've parsed all args or not.
    return context.advance_by_bytes(4), op
Ejemplo n.º 9
0
def _codegen_call(context: Context, op: Op) -> Tuple[Context, Op]:
    """CALL pseudoinstruction: several underlying variants."""
    op = op._replace(args=parse_args_if_able(
        _PARSE_OPTIONS, context, op, Type.ADDRESS | Type.REGISTER
        | Type.DEREF_REGISTER | Type.DEREF_ADDRESS, Type.REGISTER))
    # Since this pseudoinstruction can produce code of different lengths, we
    # handle updating pos when "not all_args_parsed" in a special way.
    if not all_args_parsed(op.args):
        advance = 6 if op.args[0].argtype & Type.ADDRESS else 4
        return context.advance_by_bytes(advance), op

    # We are calling an address literal. Note that there is a way to do this in
    # two halfwords: for that, use the RCALL pseudoinstruction.
    if op.args[0].argtype & Type.ADDRESS:
        _jmpdestcheck(op.args[0])
        _regcheck(op.args[1])
        digits_a = (op.args[1].integer, op.args[1].integer, op.args[0].integer)
        op = op._replace(todo=None,
                         hex='0{:X}03D0{:X}1{:04X}'.format(*digits_a))

    # We are calling an address stored inside a register.
    elif op.args[0].argtype & Type.REGISTER:
        _callregcheck(op.args[0], op.args[1])
        digits_r = (op.args[1].integer, op.args[0].integer)
        op = op._replace(todo=None, hex='0{:X}0300{:X}4'.format(*digits_r))

    # We are calling an address stored at a memory location in a register.
    elif op.args[0].argtype & Type.DEREF_REGISTER:
        _callregcheck(op.args[0], op.args[1])
        _regderefcheck(op.args[0], postcrem_from=-2, postcrem_to=2)  # Words.
        modifier = _postcrement_to_modifier(2 * op.args[0].postcrement)
        digits_d = (op.args[1].integer, op.args[0].integer, modifier)
        op = op._replace(todo=None, hex='0{:X}03D0{:X}{:X}'.format(*digits_d))

    # We are calling an address stored at a low memory location.
    else:
        _regcheck(op.args[1])
        _lowwordaddrcheck(op.args[0])
        assert op.opcode is not None  # mypy...
        if op.args[0].precrement or op.args[0].postcrement:
            raise ValueError(
                'No (in/de)crementation is allowed for address dereference arguments '
                'to {}'.format(op.opcode.upper()))
        digits = (op.args[1].integer, op.args[0].integer // 2)
        op = op._replace(todo=None, hex='0{:X}0320{:02X}'.format(*digits))

    return context.advance(op.hex), op
Ejemplo n.º 10
0
def _codegen_move(context: Context, op: Op) -> Tuple[Context, Op]:
    """MOVE instruction."""
    op = op._replace(
        args=parse_args_if_able(  # Note fractional crements enabled.
            _PARSE_OPTIONS._replace(
                fractional_crements=True), context, op, Type.ADDRESS
            | Type.REGISTER | Type.DEREF_REGISTER, Type.ADDRESS | Type.REGISTER
            | Type.DEREF_REGISTER))
    if all_args_parsed(op.args):

        if not any(arg.argtype & Type.REGISTER for arg in op.args):
            raise ValueError(
                'At least one argument to MOVE must be a register')

        # This is a move between registers.
        elif op.args[0].argtype == op.args[1].argtype == Type.REGISTER:
            _regcheck(*op.args)
            digits_r = (op.args[0].integer, op.args[1].integer)
            op = op._replace(todo=None, hex='0{:X}{:X}4'.format(*digits_r))

        # This is a move from/to an address found at a specified memory location.
        elif any(arg.argtype == Type.ADDRESS for arg in op.args):
            nybble, argaddr, argreg = ((2, op.args[1],
                                        op.args[0]) if op.args[0].argtype
                                       & Type.REGISTER else
                                       (3, op.args[0], op.args[1]))
            _regcheck(argreg)
            _lowwordaddrcheck(argaddr)
            digits_a = (nybble, argreg.integer, argaddr.integer // 2)
            op = op._replace(todo=None, hex='{:X}{:X}{:02X}'.format(*digits_a))

        # This is a move from/to an address found in a register.
        else:
            nybble, argderef, argreg = ((5, op.args[0],
                                         op.args[1]) if op.args[0].argtype
                                        & Type.DEREF_REGISTER else
                                        (0xD, op.args[1], op.args[0]))
            _regcheck(argreg)
            _regderefcheck(argderef, postcrem_from=-2, postcrem_to=2)  # Words.
            modifier = _postcrement_to_modifier(2 * argderef.postcrement)
            digits_d = (nybble, op.args[1].integer, op.args[0].integer,
                        modifier)
            op = op._replace(todo=None,
                             hex='{:X}{:X}{:X}{:X}'.format(*digits_d))

    # We can still update pos whether we've parsed all args or not.
    return context.advance_by_bytes(2), op
Ejemplo n.º 11
0
def read_source(source_input: TextIO) -> Tuple[List[Op], Tuple[Text, ...]]:
    """Load and preprocess source code from the input. Some lexing, too.

  Args:
    source_input: Source code input. All lines of this input will be consumed.

  Returns:
    A 2-tuple with these entries:
    [0]: Source-code lines loaded from the file, as Op objects. Only `lineno`,
         `line`, `labels`, `tokens`, and `todo` fields are specified in these
         Ops; the `todo` field directs that the line's next step lexing.
    [1]: Every line of the original file, with newlines removed.
  """
    ops: List[Op] = []  # Accumulates Op objects.
    lines: List[Text] = []  # Accumulates lines of source code text.
    current_labels: Set[Text] = set()  # Labels to refer to the next code line.
    claimed_labels: Dict[Text,
                         int] = {}  # Labels already set, and on which line.

    for lineno, line in enumerate(source_input):
        # Initial processing: strip newlines.
        line = line.rstrip('\r\n')
        lines.append(line)
        # Strip comments.
        match = _RE_CODE.match(line)  # Guaranteed to match at least once.
        assert match is not None  # Although mypy doesn't believe me.
        code = match[0]
        # Tokenise the code.
        tokens = tuple(_RE_TOKEN.findall(code))

        # Is there a label? If so, check validity and uniqueness. If checks pass,
        # add the label to current and claimed labels.
        if tokens and tokens[0].endswith(':') and LABEL_RE.match(
                tokens[0][:-1]):
            label, tokens = tokens[0][:-1], tokens[1:]
            if label in claimed_labels:
                raise Error(
                    lineno, line,
                    'The label {} was already used on line {}'.format(
                        label, claimed_labels[label]))
            current_labels.add(label)
            claimed_labels[label] = lineno

        # This rest of this line (if it exists) is apparently intended to be a line
        # of source code. Save it along with any labels assocated with the line,
        # and indicate that the next step for the line is lexing.
        if tokens:
            ops.append(
                Op(lineno=lineno,
                   line=line,
                   tokens=tokens,
                   labels=tuple(sorted(current_labels)),
                   todo=asmpass_lexer))
            current_labels.clear()

    return ops, tuple(lines)
Ejemplo n.º 12
0
def _codegen_bra(context: Context, op: Op) -> Tuple[Context, Op]:
    """BRA pseudoinstruction: ADD/SUB R0,#<dist>."""
    op = op._replace(
        args=parse_args_if_able(_PARSE_OPTIONS, context, op, Type.ADDRESS))
    if all_args_parsed(op.args) and context.pos is not None:
        _jmpdestcheck(op.args[0])
        offset = _reljmpoffset(context, op.args[0])
        if offset == 0:
            logging.warning(
                'Line %d: A BRA of +2 bytes (so, an ordinary PC increment) is not '
                'supported by the usual relative jump techniques; generating a NOP '
                '(MOVE R0, R0) instead', op.lineno)
            op = op._replace(todo=None, hex='0004')
        else:
            digits = (0xA, offset - 1) if offset > 0 else (0xF, -offset - 1)
            op = op._replace(todo=None, hex='{:X}0{:02X}'.format(*digits))

    # We can still update pos whether we've parsed all args or not.
    return context.advance_by_bytes(2), op
Ejemplo n.º 13
0
def _codegen_movb(context: Context, op: Op) -> Tuple[Context, Op]:
    """MOVB instruction."""
    op = op._replace(
        args=parse_args_if_able(_PARSE_OPTIONS, context, op, Type.REGISTER
                                | Type.DEREF_REGISTER, Type.REGISTER
                                | Type.DEREF_REGISTER))
    # Both arguments to this opcode should be parseable.
    if op.args[0].argtype == op.args[1].argtype:
        raise ValueError(
            'One MOVB argument should be a register, and the other should be a '
            'register dereference')
    if all_args_parsed(op.args):
        nybble, argderef, argreg = ((6, op.args[1],
                                     op.args[0]) if op.args[0].argtype
                                    & Type.REGISTER else
                                    (7, op.args[0], op.args[1]))
        _regcheck(argreg)
        _regderefcheck(argderef, postcrem_from=-4, postcrem_to=4)
        modifier = _postcrement_to_modifier(argderef.postcrement)
        digits = (nybble, argreg.integer, argderef.integer, modifier)
        op = op._replace(todo=None, hex='{:X}{:X}{:X}{:X}'.format(*digits))
    # We can still update pos whether we've parsed all args or not.
    return context.advance_by_bytes(2), op
Ejemplo n.º 14
0
    def codegen_add_or_sub(context: Context, op: Op) -> Tuple[Context, Op]:
        op = op._replace(args=parse_args_if_able(_PARSE_OPTIONS, context, op,
                                                 Type.REGISTER, Type.NUMBER
                                                 | Type.REGISTER))
        if all_args_parsed(op.args):
            _regcheck(op.args[0])

            # Adding/subtracting an immediate value to/from a register.
            if op.args[1].argtype & Type.NUMBER:
                if not 0 <= op.args[1].integer <= 256:
                    raise ValueError('Literal {!r} not in range 0..256'.format(
                        op.args[1].stripped))
                elif op.args[1].integer == 0:
                    assert op.opcode is not None  # mypy...
                    logging.warning(
                        'Line %d: A #0 literal argument to %s is not supported by the %s '
                        'instruction; generating a NOP (MOVE R0, R0) instead',
                        op.lineno, op.opcode.upper(), op.opcode.upper())
                    op = op._replace(todo=None, hex='0004')
                else:
                    digits = (0xA if add_or_sub == 'add' else 0xF,
                              op.args[0].integer,
                              (op.args[1].integer - 1) % 256)
                    op = op._replace(todo=None,
                                     hex='{:X}{:X}{:02X}'.format(*digits))

            # Adding/subtracting the LSB of one register to/from another register.
            else:
                _regcheck(op.args[1])
                digits = (op.args[0].integer, op.args[1].integer,
                          8 if add_or_sub == 'add' else 9)
                op = op._replace(todo=None,
                                 hex='0{:X}{:X}{:X}'.format(*digits))

        # We can still update pos whether we've parsed all args or not.
        return context.advance_by_bytes(2), op
Ejemplo n.º 15
0
def _codegen_rcall(context: Context, op: Op) -> Tuple[Context, Op]:
    """RCALL (R=relocatable) pseudoinstruction: INC2 Rx,R0; BRA <addr>."""
    op = op._replace(args=parse_args_if_able(_PARSE_OPTIONS, context, op,
                                             Type.ADDRESS, Type.REGISTER))
    assert op.args is not None
    if all_args_parsed(op.args) and context.pos is not None:
        _jmpdestcheck(op.args[0])
        _regcheck(op.args[1])
        offset = _reljmpoffset(context, op.args[0])
        if offset == 0:
            logging.warning(
                'Line %d: A +2-byte RCALL (so, an ordinary PC increment) is not '
                'supported by the usual relative jump techniques; generating a NOP '
                '(MOVE R0, R0) instead', op.lineno)
            op = op._replace(todo=None,
                             hex='0{:X}030004'.format(op.args[1].integer))
        else:
            digits = ((op.args[1].integer, 0xA, offset - 1) if offset > 0 else
                      (op.args[1].integer, 0xF, -offset - 1))
            op = op._replace(todo=None,
                             hex='0{:X}03{:X}0{:02X}'.format(*digits))

    # We can still update pos whether we've parsed all args or not.
    return context.advance_by_bytes(4), op
Ejemplo n.º 16
0
  def codegen_data(context: Context, op: Op) -> Tuple[Context, Op]:
    # Accumulate hex code here, and track whether we have its final value worked
    # out, or if we're still waiting on labels.
    hexparts = []
    all_hex_ok = True

    # Align the data to match the data quantum, if directed.
    if align:
      if element_size != 1:
        if context.pos is None: raise ValueError(
            'Unresolved labels above this line (or other factors) make it '
            'impossible to know how to align this data statement. Consider '
            "an ORG statement to make this data's memory location explicit.")
        hexparts.append('00' * (context.pos % element_size))

    # Generate data for each arg. Unlike nearly all other statements, we do most
    # of the parsing ourselves.
    for arg in op.args:
      # Is the argument a string?
      if arg.stripped.startswith('"') or arg.stripped.startswith("'"):
        hexparts.append(''.join(
            val.to_bytes(element_size, endianity).hex().upper()
            for val in parse_string(parse_options, context, arg.stripped)))

      # No, it must be a single integer value.
      else:
        # Does the argument look like a label? If so, try to resolve it and take
        # its value. If not, let's parse the argument as an integer value.
        if LABEL_RE.fullmatch(arg.stripped):
          all_hex_ok &= arg.stripped in context.labels
          val = context.labels[arg.stripped] if all_hex_ok else 0
        else:
          val = parse_integer(parse_options, context, arg.stripped)
        # Now encode the value as hex.
        hexparts.append(val.to_bytes(element_size, endianity).hex().upper())

    # Package the hex data from all the args and, if appropriate, mark our job
    # as complete.
    op = op._replace(todo=None if all_hex_ok else op.todo,
                     hex=''.join(hexparts))
    return context.advance(op.hex), op
Ejemplo n.º 17
0
def asmpass_lexer(context: Context, op: Op) -> Tuple[Context, Op]:
    """Perform lexical analysis on a source code line.

  Ultimately, this means breaking the line up into an opcode and its arguments.
  This sets the `opcode` and `args` fields of `op`.

  Args:
    context: current assembler context.
    op: source code line data structure.

  Returns:
    context: updated assembler context.
    op: updated source code line data structure.
  """
    # Obtain opcode and arguments. At least the opcode is guaranteed to exist.
    opcode, etcetera = op.tokens[0], op.tokens[1:]
    opcode = opcode.casefold()  # Canonicalise opcode.
    args = tuple(Arg(stripped=a.strip()) for a in etcetera)
    # Update op with opcode and args, then trigger code generation in the next
    # pass. Argument parsing occurs during code generation, allowing for symbols
    # to be bound as late as possible.
    op = op._replace(opcode=opcode, args=args, todo=asmpass_codegen)
    return context, op
Ejemplo n.º 18
0
def _codegen_nop(context: Context, op: Op) -> Tuple[Context, Op]:
    """NOP pseudoinstruction: MOVE R0, R0."""
    # This opcode takes no arguments. We still parse to make sure there are none.
    op = op._replace(args=parse_args_if_able(_PARSE_OPTIONS, context, op))
    op = op._replace(todo=None, hex='0004')
    return context.advance(op.hex), op