Example #1
0
def _codegen_jmp(context: Context, op: Op) -> Tuple[Context, Op]:
    """JMP pseudoinstruction: several underlying variants."""
    op = op._replace(args=parse_args_if_able(
        _PARSE_OPTIONS, context, op, Type.ADDRESS | Type.DEREF_REGISTER
        | Type.DEREF_ADDRESS))
    # Since this pseudoinstruction can produce code of different lengths, we
    # handle updating pos when "not all_args_parsed" in a special way.
    if not all_args_parsed(op.args):
        advance = 4 if op.args[0].argtype & Type.ADDRESS else 2
        return context.advance_by_bytes(advance), op

    # We are branching to an address literal.
    if op.args[0].argtype & Type.ADDRESS:
        _jmpdestcheck(op.args[0])
        op = op._replace(todo=None,
                         hex='D001{:04X}'.format(op.args[0].integer))

    # We are branching to an address stored at a memory location in a register.
    # (To branch to an address inside a register, use RET).
    elif op.args[0].argtype & Type.DEREF_REGISTER:
        _regderefcheck(op.args[0], postcrem_from=0, postcrem_to=0)
        op = op._replace(todo=None, hex='D0{:X}8'.format(op.args[0].integer))

    # We are branching to an address stored at a low memory location.
    else:
        _lowwordaddrcheck(op.args[0])
        op = op._replace(todo=None,
                         hex='20{:02X}'.format(op.args[0].integer // 2))

    return context.advance(op.hex), op
Example #2
0
def _codegen_call(context: Context, op: Op) -> Tuple[Context, Op]:
    """CALL pseudoinstruction: several underlying variants."""
    op = op._replace(args=parse_args_if_able(
        _PARSE_OPTIONS, context, op, Type.ADDRESS | Type.REGISTER
        | Type.DEREF_REGISTER | Type.DEREF_ADDRESS, Type.REGISTER))
    # Since this pseudoinstruction can produce code of different lengths, we
    # handle updating pos when "not all_args_parsed" in a special way.
    if not all_args_parsed(op.args):
        advance = 6 if op.args[0].argtype & Type.ADDRESS else 4
        return context.advance_by_bytes(advance), op

    # We are calling an address literal. Note that there is a way to do this in
    # two halfwords: for that, use the RCALL pseudoinstruction.
    if op.args[0].argtype & Type.ADDRESS:
        _jmpdestcheck(op.args[0])
        _regcheck(op.args[1])
        digits_a = (op.args[1].integer, op.args[1].integer, op.args[0].integer)
        op = op._replace(todo=None,
                         hex='0{:X}03D0{:X}1{:04X}'.format(*digits_a))

    # We are calling an address stored inside a register.
    elif op.args[0].argtype & Type.REGISTER:
        _callregcheck(op.args[0], op.args[1])
        digits_r = (op.args[1].integer, op.args[0].integer)
        op = op._replace(todo=None, hex='0{:X}0300{:X}4'.format(*digits_r))

    # We are calling an address stored at a memory location in a register.
    elif op.args[0].argtype & Type.DEREF_REGISTER:
        _callregcheck(op.args[0], op.args[1])
        _regderefcheck(op.args[0], postcrem_from=-2, postcrem_to=2)  # Words.
        modifier = _postcrement_to_modifier(2 * op.args[0].postcrement)
        digits_d = (op.args[1].integer, op.args[0].integer, modifier)
        op = op._replace(todo=None, hex='0{:X}03D0{:X}{:X}'.format(*digits_d))

    # We are calling an address stored at a low memory location.
    else:
        _regcheck(op.args[1])
        _lowwordaddrcheck(op.args[0])
        assert op.opcode is not None  # mypy...
        if op.args[0].precrement or op.args[0].postcrement:
            raise ValueError(
                'No (in/de)crementation is allowed for address dereference arguments '
                'to {}'.format(op.opcode.upper()))
        digits = (op.args[1].integer, op.args[0].integer // 2)
        op = op._replace(todo=None, hex='0{:X}0320{:02X}'.format(*digits))

    return context.advance(op.hex), op
Example #3
0
 def codegen_immed_to_reg(context: Context, op: Op) -> Tuple[Context, Op]:
     op = op._replace(args=parse_args_if_able(_PARSE_OPTIONS, context, op,
                                              Type.REGISTER, Type.NUMBER))
     if all_args_parsed(op.args):
         _regcheck(op.args[0])
         _bytecheck(op.args[1])
         digits = (nybble, op.args[0].integer, op.args[1].integer % 256)
         op = op._replace(todo=None, hex='{:X}{:X}{:02X}'.format(*digits))
     # We can still update pos whether we've parsed all args or not.
     return context.advance_by_bytes(2), op
Example #4
0
def _codegen_org(context: Context, op: Op) -> Tuple[Context, Op]:
  """ORG pseudoinstruction: set current output stream position."""
  # Try to parse our one argument. If successful, update our stream position.
  # Otherwise, leaving the op's `todo` unchanged means we'll try again later.
  op = op._replace(args=parse_args_if_able(
      _PARSE_OPTIONS, context, op, Type.ADDRESS))
  if all_args_parsed(op.args):
    op = op._replace(hex='', todo=None)
    context = context._replace(pos=op.args[0].integer)
  return context, op
Example #5
0
 def codegen_onereg(context: Context, op: Op) -> Tuple[Context, Op]:
     # Both register arguments to this opcode should be parseable.
     op = op._replace(args=parse_args_if_able(_PARSE_OPTIONS, context, op,
                                              Type.REGISTER))
     if all_args_parsed(op.args):
         _regcheck(*op.args)
         digits = (nybble_1, op.args[0].integer, nybble_2)
         op = op._replace(todo=None, hex=template.format(*digits))
     # We can still update pos whether we've parsed all args or not.
     return context.advance_by_bytes(2), op
Example #6
0
def _codegen_ctrl(context: Context, op: Op) -> Tuple[Context, Op]:
    """CTRL instruction."""
    op = op._replace(args=parse_args_if_able(_PARSE_OPTIONS, context, op,
                                             Type.ADDRESS, Type.NUMBER))
    if all_args_parsed(op.args):
        _devcheck(op.args[0])
        _bytecheck(op.args[1])
        digits = (op.args[0].integer, op.args[1].integer % 256)
        op = op._replace(todo=None, hex='1{:X}{:02X}'.format(*digits))
    # We can still update pos whether we've parsed all args or not.
    return context.advance_by_bytes(2), op
Example #7
0
def asmpass_codegen(context: Context, op: Op) -> Tuple[Context, Op]:
    """Attempts to generate binary code from a partially-parsed source code line.

  Except for a few "built-in" opcodes, this function defers code-generation
  to special opcode-specific handlers found in context.codegen. Intuitively,
  these handlers should "do their best" to complete the information in `op` and
  advance the current output position (`context.pos`). If they can do both,
  they should return an updated `op` where `op.todo` is None. In all other
  cases, `op.todo` should be set to this function for another try.

  Args:
    context: current assembler context.
    op: source code line data structure.

  Returns:
    context: updated assembler context.
    op: updated source code line data structure.
  """
    # If there are any labels, attempt to bind them now before an `org` statement
    # sends us packing to another binary location.
    for label in op.labels:
        context = context.bind_label(label)

    # Handle "built-in" opcodes.
    assert op.args is not None
    if op.opcode in ('cpu', '.cpu', 'arch', '.arch'):
        if len(op.args) != 1:
            raise ValueError('The {} pseudo-opcode takes one argument'.format(
                op.opcode.upper()))
        op = op._replace(todo=None)
        context = _switch_arch(op.lineno, op.line, context,
                               op.args[0].stripped)

    # Hand over remaining processing to "architecture specific" code generators.
    else:
        if op.opcode not in context.codegen:
            raise Error(
                op.lineno, op.line,
                'Opcode "{}" not recognised for architecture {}'.format(
                    op.opcode, context.arch))
        context, op = context.codegen[op.opcode](context, op)

    # If we haven't bound all the labels associated with this line of code, then
    # we've got to try generating this line of code again, no matter what the
    # opcode's code-generating handler thinks about it.
    if not all(label in context.labels for label in op.labels):
        op = op._replace(todo=asmpass_codegen)

    # If we haven't got an output location for the hex data generated from this
    # line of code, then we force another pass in that case, too.
    if context.pos is None:
        op = op._replace(todo=asmpass_codegen)

    return context, op
Example #8
0
def _codegen_putb(context: Context, op: Op) -> Tuple[Context, Op]:
    """PUTB instruction."""
    op = op._replace(args=parse_args_if_able(
        _PARSE_OPTIONS, context, op, Type.ADDRESS, Type.DEREF_REGISTER))
    if all_args_parsed(op.args):
        _devcheck(op.args[0])
        _regderefcheck(op.args[1], postcrem_from=-4, postcrem_to=4)
        modifier = _postcrement_to_modifier(op.args[1].postcrement)
        digits = (op.args[0].integer, op.args[1].integer, modifier)
        op = op._replace(todo=None, hex='4{:X}{:X}{:X}'.format(*digits))
    # We can still update pos whether we've parsed all args or not.
    return context.advance_by_bytes(2), op
Example #9
0
def main(FLAGS: argparse.Namespace):
    """Main function."""
    # Create assembler context; load code generators for the chosen architecture.
    context = _switch_arch(
        -1, '--arch={}'.format(FLAGS.arch),
        Context(arch='', codegen={}, encode_str=lambda s: bytes(s, 'ascii')),
        FLAGS.arch)
    # Run the assembler.
    try:
        assemble(context, FLAGS.input_file, FLAGS.output_file, FLAGS.listing)
    except Error as error:
        print(error, file=sys.stderr)
Example #10
0
def _codegen_lwi(context: Context, op: Op) -> Tuple[Context, Op]:
    """LWI pseudoinstruction: MOVE RX, (RO)+; DW i."""
    op = op._replace(args=parse_args_if_able(_PARSE_OPTIONS, context, op,
                                             Type.REGISTER, Type.NUMBER))
    if all_args_parsed(op.args):
        _regcheck(op.args[0])
        if not -32767 <= op.args[1].integer <= 65535:
            raise ValueError(
                'Halfword literal {} not in range -32768..65535'.format(
                    op.args[1].stripped))
        digits = (op.args[0].integer, op.args[1].integer % 65536)
        op = op._replace(todo=None, hex='D{:X}01{:04X}'.format(*digits))
    # We can still update pos whether we've parsed all args or not.
    return context.advance_by_bytes(4), op
Example #11
0
def _codegen_move(context: Context, op: Op) -> Tuple[Context, Op]:
    """MOVE instruction."""
    op = op._replace(
        args=parse_args_if_able(  # Note fractional crements enabled.
            _PARSE_OPTIONS._replace(
                fractional_crements=True), context, op, Type.ADDRESS
            | Type.REGISTER | Type.DEREF_REGISTER, Type.ADDRESS | Type.REGISTER
            | Type.DEREF_REGISTER))
    if all_args_parsed(op.args):

        if not any(arg.argtype & Type.REGISTER for arg in op.args):
            raise ValueError(
                'At least one argument to MOVE must be a register')

        # This is a move between registers.
        elif op.args[0].argtype == op.args[1].argtype == Type.REGISTER:
            _regcheck(*op.args)
            digits_r = (op.args[0].integer, op.args[1].integer)
            op = op._replace(todo=None, hex='0{:X}{:X}4'.format(*digits_r))

        # This is a move from/to an address found at a specified memory location.
        elif any(arg.argtype == Type.ADDRESS for arg in op.args):
            nybble, argaddr, argreg = ((2, op.args[1],
                                        op.args[0]) if op.args[0].argtype
                                       & Type.REGISTER else
                                       (3, op.args[0], op.args[1]))
            _regcheck(argreg)
            _lowwordaddrcheck(argaddr)
            digits_a = (nybble, argreg.integer, argaddr.integer // 2)
            op = op._replace(todo=None, hex='{:X}{:X}{:02X}'.format(*digits_a))

        # This is a move from/to an address found in a register.
        else:
            nybble, argderef, argreg = ((5, op.args[0],
                                         op.args[1]) if op.args[0].argtype
                                        & Type.DEREF_REGISTER else
                                        (0xD, op.args[1], op.args[0]))
            _regcheck(argreg)
            _regderefcheck(argderef, postcrem_from=-2, postcrem_to=2)  # Words.
            modifier = _postcrement_to_modifier(2 * argderef.postcrement)
            digits_d = (nybble, op.args[1].integer, op.args[0].integer,
                        modifier)
            op = op._replace(todo=None,
                             hex='{:X}{:X}{:X}{:X}'.format(*digits_d))

    # We can still update pos whether we've parsed all args or not.
    return context.advance_by_bytes(2), op
Example #12
0
def _switch_arch(
    lineno: int,
    line: Text,
    context: Context,
    arch: Text,
) -> Context:
    """Switch the architecture we're generating code for."""
    try:
        module = importlib.import_module('.' + arch, 'tsasm.codegen')
        context = context._replace(arch=arch,
                                   codegen=getattr(module, 'get_codegen')(),
                                   encode_str=getattr(module, 'encode_str'))
    except (ModuleNotFoundError, AttributeError):
        raise Error(
            lineno, line,
            'Failed to load a code-generation library for architecture '
            '{!r}'.format(arch))
    return context
Example #13
0
def _codegen_bra(context: Context, op: Op) -> Tuple[Context, Op]:
    """BRA pseudoinstruction: ADD/SUB R0,#<dist>."""
    op = op._replace(
        args=parse_args_if_able(_PARSE_OPTIONS, context, op, Type.ADDRESS))
    if all_args_parsed(op.args) and context.pos is not None:
        _jmpdestcheck(op.args[0])
        offset = _reljmpoffset(context, op.args[0])
        if offset == 0:
            logging.warning(
                'Line %d: A BRA of +2 bytes (so, an ordinary PC increment) is not '
                'supported by the usual relative jump techniques; generating a NOP '
                '(MOVE R0, R0) instead', op.lineno)
            op = op._replace(todo=None, hex='0004')
        else:
            digits = (0xA, offset - 1) if offset > 0 else (0xF, -offset - 1)
            op = op._replace(todo=None, hex='{:X}0{:02X}'.format(*digits))

    # We can still update pos whether we've parsed all args or not.
    return context.advance_by_bytes(2), op
Example #14
0
  def codegen_data(context: Context, op: Op) -> Tuple[Context, Op]:
    # Accumulate hex code here, and track whether we have its final value worked
    # out, or if we're still waiting on labels.
    hexparts = []
    all_hex_ok = True

    # Align the data to match the data quantum, if directed.
    if align:
      if element_size != 1:
        if context.pos is None: raise ValueError(
            'Unresolved labels above this line (or other factors) make it '
            'impossible to know how to align this data statement. Consider '
            "an ORG statement to make this data's memory location explicit.")
        hexparts.append('00' * (context.pos % element_size))

    # Generate data for each arg. Unlike nearly all other statements, we do most
    # of the parsing ourselves.
    for arg in op.args:
      # Is the argument a string?
      if arg.stripped.startswith('"') or arg.stripped.startswith("'"):
        hexparts.append(''.join(
            val.to_bytes(element_size, endianity).hex().upper()
            for val in parse_string(parse_options, context, arg.stripped)))

      # No, it must be a single integer value.
      else:
        # Does the argument look like a label? If so, try to resolve it and take
        # its value. If not, let's parse the argument as an integer value.
        if LABEL_RE.fullmatch(arg.stripped):
          all_hex_ok &= arg.stripped in context.labels
          val = context.labels[arg.stripped] if all_hex_ok else 0
        else:
          val = parse_integer(parse_options, context, arg.stripped)
        # Now encode the value as hex.
        hexparts.append(val.to_bytes(element_size, endianity).hex().upper())

    # Package the hex data from all the args and, if appropriate, mark our job
    # as complete.
    op = op._replace(todo=None if all_hex_ok else op.todo,
                     hex=''.join(hexparts))
    return context.advance(op.hex), op
Example #15
0
def _codegen_movb(context: Context, op: Op) -> Tuple[Context, Op]:
    """MOVB instruction."""
    op = op._replace(
        args=parse_args_if_able(_PARSE_OPTIONS, context, op, Type.REGISTER
                                | Type.DEREF_REGISTER, Type.REGISTER
                                | Type.DEREF_REGISTER))
    # Both arguments to this opcode should be parseable.
    if op.args[0].argtype == op.args[1].argtype:
        raise ValueError(
            'One MOVB argument should be a register, and the other should be a '
            'register dereference')
    if all_args_parsed(op.args):
        nybble, argderef, argreg = ((6, op.args[1],
                                     op.args[0]) if op.args[0].argtype
                                    & Type.REGISTER else
                                    (7, op.args[0], op.args[1]))
        _regcheck(argreg)
        _regderefcheck(argderef, postcrem_from=-4, postcrem_to=4)
        modifier = _postcrement_to_modifier(argderef.postcrement)
        digits = (nybble, argreg.integer, argderef.integer, modifier)
        op = op._replace(todo=None, hex='{:X}{:X}{:X}{:X}'.format(*digits))
    # We can still update pos whether we've parsed all args or not.
    return context.advance_by_bytes(2), op
Example #16
0
    def codegen_add_or_sub(context: Context, op: Op) -> Tuple[Context, Op]:
        op = op._replace(args=parse_args_if_able(_PARSE_OPTIONS, context, op,
                                                 Type.REGISTER, Type.NUMBER
                                                 | Type.REGISTER))
        if all_args_parsed(op.args):
            _regcheck(op.args[0])

            # Adding/subtracting an immediate value to/from a register.
            if op.args[1].argtype & Type.NUMBER:
                if not 0 <= op.args[1].integer <= 256:
                    raise ValueError('Literal {!r} not in range 0..256'.format(
                        op.args[1].stripped))
                elif op.args[1].integer == 0:
                    assert op.opcode is not None  # mypy...
                    logging.warning(
                        'Line %d: A #0 literal argument to %s is not supported by the %s '
                        'instruction; generating a NOP (MOVE R0, R0) instead',
                        op.lineno, op.opcode.upper(), op.opcode.upper())
                    op = op._replace(todo=None, hex='0004')
                else:
                    digits = (0xA if add_or_sub == 'add' else 0xF,
                              op.args[0].integer,
                              (op.args[1].integer - 1) % 256)
                    op = op._replace(todo=None,
                                     hex='{:X}{:X}{:02X}'.format(*digits))

            # Adding/subtracting the LSB of one register to/from another register.
            else:
                _regcheck(op.args[1])
                digits = (op.args[0].integer, op.args[1].integer,
                          8 if add_or_sub == 'add' else 9)
                op = op._replace(todo=None,
                                 hex='0{:X}{:X}{:X}'.format(*digits))

        # We can still update pos whether we've parsed all args or not.
        return context.advance_by_bytes(2), op
Example #17
0
def _codegen_rcall(context: Context, op: Op) -> Tuple[Context, Op]:
    """RCALL (R=relocatable) pseudoinstruction: INC2 Rx,R0; BRA <addr>."""
    op = op._replace(args=parse_args_if_able(_PARSE_OPTIONS, context, op,
                                             Type.ADDRESS, Type.REGISTER))
    assert op.args is not None
    if all_args_parsed(op.args) and context.pos is not None:
        _jmpdestcheck(op.args[0])
        _regcheck(op.args[1])
        offset = _reljmpoffset(context, op.args[0])
        if offset == 0:
            logging.warning(
                'Line %d: A +2-byte RCALL (so, an ordinary PC increment) is not '
                'supported by the usual relative jump techniques; generating a NOP '
                '(MOVE R0, R0) instead', op.lineno)
            op = op._replace(todo=None,
                             hex='0{:X}030004'.format(op.args[1].integer))
        else:
            digits = ((op.args[1].integer, 0xA, offset - 1) if offset > 0 else
                      (op.args[1].integer, 0xF, -offset - 1))
            op = op._replace(todo=None,
                             hex='0{:X}03{:X}0{:02X}'.format(*digits))

    # We can still update pos whether we've parsed all args or not.
    return context.advance_by_bytes(4), op
Example #18
0
def assemble(
    context: Context,
    input_file: TextIO,
    output_file: BinaryIO,
    listing_file: Optional[TextIO],
):
    """Assemble source code from a file.

  Args:
    context: an assembler context.
    input_file: handle for file containing input source code.
    output_file: handle for file receiving binary output.
    listing_file: optional handle for file receiving a text listing.

  Raises:
    Error: if any error is encountered.
  """
    # Load source code from the input.
    ops, lines = read_source(input_file)
    if not ops: raise Error(-1, '<EOF>', 'No code to compile in the input?')

    # Track where each op will commit its hex data to RAM. Entries are None when
    # we don't know that yet.
    addrs: List[Optional[int]] = [None] * len(ops)

    # Keep making passes through all of the ops until the number of pending
    # invocations of `asmpass_codegen` stops changing.
    num_ops_with_codegen_todos = None
    for pass_count in itertools.count(start=1):

        # At the beginning of the pass, reset the current output position to 0.
        context = context._replace(pos=0)

        # Perform a pass through the code. When catching errors, ValueErrors are
        # "normal" errors owing to bugs in user code; other types are "internal"
        # errors that are likely our fault.
        for i in range(len(ops)):
            # If the position of this op has already been calculated, that value is
            # authoritative. Otherwise, if we have new knowledge of this position,
            # and we're at or after code generation, save it.
            if addrs[i] is not None:
                context = context._replace(pos=addrs[i])
            elif context.pos is not None and ops[i].todo is not asmpass_lexer:
                addrs[i] = context.pos

            # If this op has a `todo`, execute it and apply some checks.
            if ops[i].todo is not None:
                try:
                    context, ops[i] = ops[i].todo(context, ops[i])
                    if ops[i].hex and len(ops[i].hex) % 2:
                        raise Error(ops[i].lineno, ops[i].line,
                                    'Extra nybble in generated hex.')
                except ValueError as error:
                    raise Error(ops[i].lineno, ops[i].line, str(error))
                except Exception as error:
                    raise Error(ops[i].lineno, ops[i].line,
                                'Internal error, sorry!\n  {}'.format(error))

        # With the pass complete, see if it's time to stop.
        ops_with_codegen_todos = tuple(op for op in ops
                                       if op.todo == asmpass_codegen)
        if num_ops_with_codegen_todos == len(ops_with_codegen_todos): break
        num_ops_with_codegen_todos = len(ops_with_codegen_todos)

    # See if compilation was successful.
    if ops_with_codegen_todos:
        raise Error(
            ops[-1].lineno + 1, '<EOF>',
            'After {} passes, {} statements still have unresolved labels or other '
            'issues preventing full assembly. These statements are:\n'
            '  {}\n'.format(
                pass_count, len(ops_with_codegen_todos),
                '\n  '.join('{:>5}: {}'.format(op.lineno, op.line)
                            for op in ops_with_codegen_todos)))

    # Construct a mapping from memory addresses to ops whose binary data will
    # start at those addresses. Complain if multiple ops that actually generate
    # binary data attempt to start in the same location.
    addr_to_op: Dict[int, Op] = {}
    for addr, op in zip(addrs, ops):
        maybe_old_op = addr_to_op.setdefault(addr, op)
        if maybe_old_op is not op and maybe_old_op.hex and op.hex:
            logging.warning(
                'At memory location $%X: replacing previously-generated code.\n'
                '   old - %5d: %s\n   new - %5d: %s', addr,
                maybe_old_op.lineno, maybe_old_op.line, op.lineno, op.line)

    # Write binary output.
    _emit_binary(output_file, addr_to_op)

    # Write listing.
    if listing_file: _emit_listing(listing_file, lines, addr_to_op)
Example #19
0
def _codegen_nop(context: Context, op: Op) -> Tuple[Context, Op]:
    """NOP pseudoinstruction: MOVE R0, R0."""
    # This opcode takes no arguments. We still parse to make sure there are none.
    op = op._replace(args=parse_args_if_able(_PARSE_OPTIONS, context, op))
    op = op._replace(todo=None, hex='0004')
    return context.advance(op.hex), op