def _codegen_jmp(context: Context, op: Op) -> Tuple[Context, Op]: """JMP pseudoinstruction: several underlying variants.""" op = op._replace(args=parse_args_if_able( _PARSE_OPTIONS, context, op, Type.ADDRESS | Type.DEREF_REGISTER | Type.DEREF_ADDRESS)) # Since this pseudoinstruction can produce code of different lengths, we # handle updating pos when "not all_args_parsed" in a special way. if not all_args_parsed(op.args): advance = 4 if op.args[0].argtype & Type.ADDRESS else 2 return context.advance_by_bytes(advance), op # We are branching to an address literal. if op.args[0].argtype & Type.ADDRESS: _jmpdestcheck(op.args[0]) op = op._replace(todo=None, hex='D001{:04X}'.format(op.args[0].integer)) # We are branching to an address stored at a memory location in a register. # (To branch to an address inside a register, use RET). elif op.args[0].argtype & Type.DEREF_REGISTER: _regderefcheck(op.args[0], postcrem_from=0, postcrem_to=0) op = op._replace(todo=None, hex='D0{:X}8'.format(op.args[0].integer)) # We are branching to an address stored at a low memory location. else: _lowwordaddrcheck(op.args[0]) op = op._replace(todo=None, hex='20{:02X}'.format(op.args[0].integer // 2)) return context.advance(op.hex), op
def _codegen_call(context: Context, op: Op) -> Tuple[Context, Op]: """CALL pseudoinstruction: several underlying variants.""" op = op._replace(args=parse_args_if_able( _PARSE_OPTIONS, context, op, Type.ADDRESS | Type.REGISTER | Type.DEREF_REGISTER | Type.DEREF_ADDRESS, Type.REGISTER)) # Since this pseudoinstruction can produce code of different lengths, we # handle updating pos when "not all_args_parsed" in a special way. if not all_args_parsed(op.args): advance = 6 if op.args[0].argtype & Type.ADDRESS else 4 return context.advance_by_bytes(advance), op # We are calling an address literal. Note that there is a way to do this in # two halfwords: for that, use the RCALL pseudoinstruction. if op.args[0].argtype & Type.ADDRESS: _jmpdestcheck(op.args[0]) _regcheck(op.args[1]) digits_a = (op.args[1].integer, op.args[1].integer, op.args[0].integer) op = op._replace(todo=None, hex='0{:X}03D0{:X}1{:04X}'.format(*digits_a)) # We are calling an address stored inside a register. elif op.args[0].argtype & Type.REGISTER: _callregcheck(op.args[0], op.args[1]) digits_r = (op.args[1].integer, op.args[0].integer) op = op._replace(todo=None, hex='0{:X}0300{:X}4'.format(*digits_r)) # We are calling an address stored at a memory location in a register. elif op.args[0].argtype & Type.DEREF_REGISTER: _callregcheck(op.args[0], op.args[1]) _regderefcheck(op.args[0], postcrem_from=-2, postcrem_to=2) # Words. modifier = _postcrement_to_modifier(2 * op.args[0].postcrement) digits_d = (op.args[1].integer, op.args[0].integer, modifier) op = op._replace(todo=None, hex='0{:X}03D0{:X}{:X}'.format(*digits_d)) # We are calling an address stored at a low memory location. else: _regcheck(op.args[1]) _lowwordaddrcheck(op.args[0]) assert op.opcode is not None # mypy... if op.args[0].precrement or op.args[0].postcrement: raise ValueError( 'No (in/de)crementation is allowed for address dereference arguments ' 'to {}'.format(op.opcode.upper())) digits = (op.args[1].integer, op.args[0].integer // 2) op = op._replace(todo=None, hex='0{:X}0320{:02X}'.format(*digits)) return context.advance(op.hex), op
def codegen_immed_to_reg(context: Context, op: Op) -> Tuple[Context, Op]: op = op._replace(args=parse_args_if_able(_PARSE_OPTIONS, context, op, Type.REGISTER, Type.NUMBER)) if all_args_parsed(op.args): _regcheck(op.args[0]) _bytecheck(op.args[1]) digits = (nybble, op.args[0].integer, op.args[1].integer % 256) op = op._replace(todo=None, hex='{:X}{:X}{:02X}'.format(*digits)) # We can still update pos whether we've parsed all args or not. return context.advance_by_bytes(2), op
def _codegen_org(context: Context, op: Op) -> Tuple[Context, Op]: """ORG pseudoinstruction: set current output stream position.""" # Try to parse our one argument. If successful, update our stream position. # Otherwise, leaving the op's `todo` unchanged means we'll try again later. op = op._replace(args=parse_args_if_able( _PARSE_OPTIONS, context, op, Type.ADDRESS)) if all_args_parsed(op.args): op = op._replace(hex='', todo=None) context = context._replace(pos=op.args[0].integer) return context, op
def codegen_onereg(context: Context, op: Op) -> Tuple[Context, Op]: # Both register arguments to this opcode should be parseable. op = op._replace(args=parse_args_if_able(_PARSE_OPTIONS, context, op, Type.REGISTER)) if all_args_parsed(op.args): _regcheck(*op.args) digits = (nybble_1, op.args[0].integer, nybble_2) op = op._replace(todo=None, hex=template.format(*digits)) # We can still update pos whether we've parsed all args or not. return context.advance_by_bytes(2), op
def _codegen_ctrl(context: Context, op: Op) -> Tuple[Context, Op]: """CTRL instruction.""" op = op._replace(args=parse_args_if_able(_PARSE_OPTIONS, context, op, Type.ADDRESS, Type.NUMBER)) if all_args_parsed(op.args): _devcheck(op.args[0]) _bytecheck(op.args[1]) digits = (op.args[0].integer, op.args[1].integer % 256) op = op._replace(todo=None, hex='1{:X}{:02X}'.format(*digits)) # We can still update pos whether we've parsed all args or not. return context.advance_by_bytes(2), op
def asmpass_codegen(context: Context, op: Op) -> Tuple[Context, Op]: """Attempts to generate binary code from a partially-parsed source code line. Except for a few "built-in" opcodes, this function defers code-generation to special opcode-specific handlers found in context.codegen. Intuitively, these handlers should "do their best" to complete the information in `op` and advance the current output position (`context.pos`). If they can do both, they should return an updated `op` where `op.todo` is None. In all other cases, `op.todo` should be set to this function for another try. Args: context: current assembler context. op: source code line data structure. Returns: context: updated assembler context. op: updated source code line data structure. """ # If there are any labels, attempt to bind them now before an `org` statement # sends us packing to another binary location. for label in op.labels: context = context.bind_label(label) # Handle "built-in" opcodes. assert op.args is not None if op.opcode in ('cpu', '.cpu', 'arch', '.arch'): if len(op.args) != 1: raise ValueError('The {} pseudo-opcode takes one argument'.format( op.opcode.upper())) op = op._replace(todo=None) context = _switch_arch(op.lineno, op.line, context, op.args[0].stripped) # Hand over remaining processing to "architecture specific" code generators. else: if op.opcode not in context.codegen: raise Error( op.lineno, op.line, 'Opcode "{}" not recognised for architecture {}'.format( op.opcode, context.arch)) context, op = context.codegen[op.opcode](context, op) # If we haven't bound all the labels associated with this line of code, then # we've got to try generating this line of code again, no matter what the # opcode's code-generating handler thinks about it. if not all(label in context.labels for label in op.labels): op = op._replace(todo=asmpass_codegen) # If we haven't got an output location for the hex data generated from this # line of code, then we force another pass in that case, too. if context.pos is None: op = op._replace(todo=asmpass_codegen) return context, op
def _codegen_putb(context: Context, op: Op) -> Tuple[Context, Op]: """PUTB instruction.""" op = op._replace(args=parse_args_if_able( _PARSE_OPTIONS, context, op, Type.ADDRESS, Type.DEREF_REGISTER)) if all_args_parsed(op.args): _devcheck(op.args[0]) _regderefcheck(op.args[1], postcrem_from=-4, postcrem_to=4) modifier = _postcrement_to_modifier(op.args[1].postcrement) digits = (op.args[0].integer, op.args[1].integer, modifier) op = op._replace(todo=None, hex='4{:X}{:X}{:X}'.format(*digits)) # We can still update pos whether we've parsed all args or not. return context.advance_by_bytes(2), op
def main(FLAGS: argparse.Namespace): """Main function.""" # Create assembler context; load code generators for the chosen architecture. context = _switch_arch( -1, '--arch={}'.format(FLAGS.arch), Context(arch='', codegen={}, encode_str=lambda s: bytes(s, 'ascii')), FLAGS.arch) # Run the assembler. try: assemble(context, FLAGS.input_file, FLAGS.output_file, FLAGS.listing) except Error as error: print(error, file=sys.stderr)
def _codegen_lwi(context: Context, op: Op) -> Tuple[Context, Op]: """LWI pseudoinstruction: MOVE RX, (RO)+; DW i.""" op = op._replace(args=parse_args_if_able(_PARSE_OPTIONS, context, op, Type.REGISTER, Type.NUMBER)) if all_args_parsed(op.args): _regcheck(op.args[0]) if not -32767 <= op.args[1].integer <= 65535: raise ValueError( 'Halfword literal {} not in range -32768..65535'.format( op.args[1].stripped)) digits = (op.args[0].integer, op.args[1].integer % 65536) op = op._replace(todo=None, hex='D{:X}01{:04X}'.format(*digits)) # We can still update pos whether we've parsed all args or not. return context.advance_by_bytes(4), op
def _codegen_move(context: Context, op: Op) -> Tuple[Context, Op]: """MOVE instruction.""" op = op._replace( args=parse_args_if_able( # Note fractional crements enabled. _PARSE_OPTIONS._replace( fractional_crements=True), context, op, Type.ADDRESS | Type.REGISTER | Type.DEREF_REGISTER, Type.ADDRESS | Type.REGISTER | Type.DEREF_REGISTER)) if all_args_parsed(op.args): if not any(arg.argtype & Type.REGISTER for arg in op.args): raise ValueError( 'At least one argument to MOVE must be a register') # This is a move between registers. elif op.args[0].argtype == op.args[1].argtype == Type.REGISTER: _regcheck(*op.args) digits_r = (op.args[0].integer, op.args[1].integer) op = op._replace(todo=None, hex='0{:X}{:X}4'.format(*digits_r)) # This is a move from/to an address found at a specified memory location. elif any(arg.argtype == Type.ADDRESS for arg in op.args): nybble, argaddr, argreg = ((2, op.args[1], op.args[0]) if op.args[0].argtype & Type.REGISTER else (3, op.args[0], op.args[1])) _regcheck(argreg) _lowwordaddrcheck(argaddr) digits_a = (nybble, argreg.integer, argaddr.integer // 2) op = op._replace(todo=None, hex='{:X}{:X}{:02X}'.format(*digits_a)) # This is a move from/to an address found in a register. else: nybble, argderef, argreg = ((5, op.args[0], op.args[1]) if op.args[0].argtype & Type.DEREF_REGISTER else (0xD, op.args[1], op.args[0])) _regcheck(argreg) _regderefcheck(argderef, postcrem_from=-2, postcrem_to=2) # Words. modifier = _postcrement_to_modifier(2 * argderef.postcrement) digits_d = (nybble, op.args[1].integer, op.args[0].integer, modifier) op = op._replace(todo=None, hex='{:X}{:X}{:X}{:X}'.format(*digits_d)) # We can still update pos whether we've parsed all args or not. return context.advance_by_bytes(2), op
def _switch_arch( lineno: int, line: Text, context: Context, arch: Text, ) -> Context: """Switch the architecture we're generating code for.""" try: module = importlib.import_module('.' + arch, 'tsasm.codegen') context = context._replace(arch=arch, codegen=getattr(module, 'get_codegen')(), encode_str=getattr(module, 'encode_str')) except (ModuleNotFoundError, AttributeError): raise Error( lineno, line, 'Failed to load a code-generation library for architecture ' '{!r}'.format(arch)) return context
def _codegen_bra(context: Context, op: Op) -> Tuple[Context, Op]: """BRA pseudoinstruction: ADD/SUB R0,#<dist>.""" op = op._replace( args=parse_args_if_able(_PARSE_OPTIONS, context, op, Type.ADDRESS)) if all_args_parsed(op.args) and context.pos is not None: _jmpdestcheck(op.args[0]) offset = _reljmpoffset(context, op.args[0]) if offset == 0: logging.warning( 'Line %d: A BRA of +2 bytes (so, an ordinary PC increment) is not ' 'supported by the usual relative jump techniques; generating a NOP ' '(MOVE R0, R0) instead', op.lineno) op = op._replace(todo=None, hex='0004') else: digits = (0xA, offset - 1) if offset > 0 else (0xF, -offset - 1) op = op._replace(todo=None, hex='{:X}0{:02X}'.format(*digits)) # We can still update pos whether we've parsed all args or not. return context.advance_by_bytes(2), op
def codegen_data(context: Context, op: Op) -> Tuple[Context, Op]: # Accumulate hex code here, and track whether we have its final value worked # out, or if we're still waiting on labels. hexparts = [] all_hex_ok = True # Align the data to match the data quantum, if directed. if align: if element_size != 1: if context.pos is None: raise ValueError( 'Unresolved labels above this line (or other factors) make it ' 'impossible to know how to align this data statement. Consider ' "an ORG statement to make this data's memory location explicit.") hexparts.append('00' * (context.pos % element_size)) # Generate data for each arg. Unlike nearly all other statements, we do most # of the parsing ourselves. for arg in op.args: # Is the argument a string? if arg.stripped.startswith('"') or arg.stripped.startswith("'"): hexparts.append(''.join( val.to_bytes(element_size, endianity).hex().upper() for val in parse_string(parse_options, context, arg.stripped))) # No, it must be a single integer value. else: # Does the argument look like a label? If so, try to resolve it and take # its value. If not, let's parse the argument as an integer value. if LABEL_RE.fullmatch(arg.stripped): all_hex_ok &= arg.stripped in context.labels val = context.labels[arg.stripped] if all_hex_ok else 0 else: val = parse_integer(parse_options, context, arg.stripped) # Now encode the value as hex. hexparts.append(val.to_bytes(element_size, endianity).hex().upper()) # Package the hex data from all the args and, if appropriate, mark our job # as complete. op = op._replace(todo=None if all_hex_ok else op.todo, hex=''.join(hexparts)) return context.advance(op.hex), op
def _codegen_movb(context: Context, op: Op) -> Tuple[Context, Op]: """MOVB instruction.""" op = op._replace( args=parse_args_if_able(_PARSE_OPTIONS, context, op, Type.REGISTER | Type.DEREF_REGISTER, Type.REGISTER | Type.DEREF_REGISTER)) # Both arguments to this opcode should be parseable. if op.args[0].argtype == op.args[1].argtype: raise ValueError( 'One MOVB argument should be a register, and the other should be a ' 'register dereference') if all_args_parsed(op.args): nybble, argderef, argreg = ((6, op.args[1], op.args[0]) if op.args[0].argtype & Type.REGISTER else (7, op.args[0], op.args[1])) _regcheck(argreg) _regderefcheck(argderef, postcrem_from=-4, postcrem_to=4) modifier = _postcrement_to_modifier(argderef.postcrement) digits = (nybble, argreg.integer, argderef.integer, modifier) op = op._replace(todo=None, hex='{:X}{:X}{:X}{:X}'.format(*digits)) # We can still update pos whether we've parsed all args or not. return context.advance_by_bytes(2), op
def codegen_add_or_sub(context: Context, op: Op) -> Tuple[Context, Op]: op = op._replace(args=parse_args_if_able(_PARSE_OPTIONS, context, op, Type.REGISTER, Type.NUMBER | Type.REGISTER)) if all_args_parsed(op.args): _regcheck(op.args[0]) # Adding/subtracting an immediate value to/from a register. if op.args[1].argtype & Type.NUMBER: if not 0 <= op.args[1].integer <= 256: raise ValueError('Literal {!r} not in range 0..256'.format( op.args[1].stripped)) elif op.args[1].integer == 0: assert op.opcode is not None # mypy... logging.warning( 'Line %d: A #0 literal argument to %s is not supported by the %s ' 'instruction; generating a NOP (MOVE R0, R0) instead', op.lineno, op.opcode.upper(), op.opcode.upper()) op = op._replace(todo=None, hex='0004') else: digits = (0xA if add_or_sub == 'add' else 0xF, op.args[0].integer, (op.args[1].integer - 1) % 256) op = op._replace(todo=None, hex='{:X}{:X}{:02X}'.format(*digits)) # Adding/subtracting the LSB of one register to/from another register. else: _regcheck(op.args[1]) digits = (op.args[0].integer, op.args[1].integer, 8 if add_or_sub == 'add' else 9) op = op._replace(todo=None, hex='0{:X}{:X}{:X}'.format(*digits)) # We can still update pos whether we've parsed all args or not. return context.advance_by_bytes(2), op
def _codegen_rcall(context: Context, op: Op) -> Tuple[Context, Op]: """RCALL (R=relocatable) pseudoinstruction: INC2 Rx,R0; BRA <addr>.""" op = op._replace(args=parse_args_if_able(_PARSE_OPTIONS, context, op, Type.ADDRESS, Type.REGISTER)) assert op.args is not None if all_args_parsed(op.args) and context.pos is not None: _jmpdestcheck(op.args[0]) _regcheck(op.args[1]) offset = _reljmpoffset(context, op.args[0]) if offset == 0: logging.warning( 'Line %d: A +2-byte RCALL (so, an ordinary PC increment) is not ' 'supported by the usual relative jump techniques; generating a NOP ' '(MOVE R0, R0) instead', op.lineno) op = op._replace(todo=None, hex='0{:X}030004'.format(op.args[1].integer)) else: digits = ((op.args[1].integer, 0xA, offset - 1) if offset > 0 else (op.args[1].integer, 0xF, -offset - 1)) op = op._replace(todo=None, hex='0{:X}03{:X}0{:02X}'.format(*digits)) # We can still update pos whether we've parsed all args or not. return context.advance_by_bytes(4), op
def assemble( context: Context, input_file: TextIO, output_file: BinaryIO, listing_file: Optional[TextIO], ): """Assemble source code from a file. Args: context: an assembler context. input_file: handle for file containing input source code. output_file: handle for file receiving binary output. listing_file: optional handle for file receiving a text listing. Raises: Error: if any error is encountered. """ # Load source code from the input. ops, lines = read_source(input_file) if not ops: raise Error(-1, '<EOF>', 'No code to compile in the input?') # Track where each op will commit its hex data to RAM. Entries are None when # we don't know that yet. addrs: List[Optional[int]] = [None] * len(ops) # Keep making passes through all of the ops until the number of pending # invocations of `asmpass_codegen` stops changing. num_ops_with_codegen_todos = None for pass_count in itertools.count(start=1): # At the beginning of the pass, reset the current output position to 0. context = context._replace(pos=0) # Perform a pass through the code. When catching errors, ValueErrors are # "normal" errors owing to bugs in user code; other types are "internal" # errors that are likely our fault. for i in range(len(ops)): # If the position of this op has already been calculated, that value is # authoritative. Otherwise, if we have new knowledge of this position, # and we're at or after code generation, save it. if addrs[i] is not None: context = context._replace(pos=addrs[i]) elif context.pos is not None and ops[i].todo is not asmpass_lexer: addrs[i] = context.pos # If this op has a `todo`, execute it and apply some checks. if ops[i].todo is not None: try: context, ops[i] = ops[i].todo(context, ops[i]) if ops[i].hex and len(ops[i].hex) % 2: raise Error(ops[i].lineno, ops[i].line, 'Extra nybble in generated hex.') except ValueError as error: raise Error(ops[i].lineno, ops[i].line, str(error)) except Exception as error: raise Error(ops[i].lineno, ops[i].line, 'Internal error, sorry!\n {}'.format(error)) # With the pass complete, see if it's time to stop. ops_with_codegen_todos = tuple(op for op in ops if op.todo == asmpass_codegen) if num_ops_with_codegen_todos == len(ops_with_codegen_todos): break num_ops_with_codegen_todos = len(ops_with_codegen_todos) # See if compilation was successful. if ops_with_codegen_todos: raise Error( ops[-1].lineno + 1, '<EOF>', 'After {} passes, {} statements still have unresolved labels or other ' 'issues preventing full assembly. These statements are:\n' ' {}\n'.format( pass_count, len(ops_with_codegen_todos), '\n '.join('{:>5}: {}'.format(op.lineno, op.line) for op in ops_with_codegen_todos))) # Construct a mapping from memory addresses to ops whose binary data will # start at those addresses. Complain if multiple ops that actually generate # binary data attempt to start in the same location. addr_to_op: Dict[int, Op] = {} for addr, op in zip(addrs, ops): maybe_old_op = addr_to_op.setdefault(addr, op) if maybe_old_op is not op and maybe_old_op.hex and op.hex: logging.warning( 'At memory location $%X: replacing previously-generated code.\n' ' old - %5d: %s\n new - %5d: %s', addr, maybe_old_op.lineno, maybe_old_op.line, op.lineno, op.line) # Write binary output. _emit_binary(output_file, addr_to_op) # Write listing. if listing_file: _emit_listing(listing_file, lines, addr_to_op)
def _codegen_nop(context: Context, op: Op) -> Tuple[Context, Op]: """NOP pseudoinstruction: MOVE R0, R0.""" # This opcode takes no arguments. We still parse to make sure there are none. op = op._replace(args=parse_args_if_able(_PARSE_OPTIONS, context, op)) op = op._replace(todo=None, hex='0004') return context.advance(op.hex), op