def _codegen_jmp(context: Context, op: Op) -> Tuple[Context, Op]: """JMP pseudoinstruction: several underlying variants.""" op = op._replace(args=parse_args_if_able( _PARSE_OPTIONS, context, op, Type.ADDRESS | Type.DEREF_REGISTER | Type.DEREF_ADDRESS)) # Since this pseudoinstruction can produce code of different lengths, we # handle updating pos when "not all_args_parsed" in a special way. if not all_args_parsed(op.args): advance = 4 if op.args[0].argtype & Type.ADDRESS else 2 return context.advance_by_bytes(advance), op # We are branching to an address literal. if op.args[0].argtype & Type.ADDRESS: _jmpdestcheck(op.args[0]) op = op._replace(todo=None, hex='D001{:04X}'.format(op.args[0].integer)) # We are branching to an address stored at a memory location in a register. # (To branch to an address inside a register, use RET). elif op.args[0].argtype & Type.DEREF_REGISTER: _regderefcheck(op.args[0], postcrem_from=0, postcrem_to=0) op = op._replace(todo=None, hex='D0{:X}8'.format(op.args[0].integer)) # We are branching to an address stored at a low memory location. else: _lowwordaddrcheck(op.args[0]) op = op._replace(todo=None, hex='20{:02X}'.format(op.args[0].integer // 2)) return context.advance(op.hex), op
def _codegen_org(context: Context, op: Op) -> Tuple[Context, Op]: """ORG pseudoinstruction: set current output stream position.""" # Try to parse our one argument. If successful, update our stream position. # Otherwise, leaving the op's `todo` unchanged means we'll try again later. op = op._replace(args=parse_args_if_able( _PARSE_OPTIONS, context, op, Type.ADDRESS)) if all_args_parsed(op.args): op = op._replace(hex='', todo=None) context = context._replace(pos=op.args[0].integer) return context, op
def codegen_immed_to_reg(context: Context, op: Op) -> Tuple[Context, Op]: op = op._replace(args=parse_args_if_able(_PARSE_OPTIONS, context, op, Type.REGISTER, Type.NUMBER)) if all_args_parsed(op.args): _regcheck(op.args[0]) _bytecheck(op.args[1]) digits = (nybble, op.args[0].integer, op.args[1].integer % 256) op = op._replace(todo=None, hex='{:X}{:X}{:02X}'.format(*digits)) # We can still update pos whether we've parsed all args or not. return context.advance_by_bytes(2), op
def codegen_onereg(context: Context, op: Op) -> Tuple[Context, Op]: # Both register arguments to this opcode should be parseable. op = op._replace(args=parse_args_if_able(_PARSE_OPTIONS, context, op, Type.REGISTER)) if all_args_parsed(op.args): _regcheck(*op.args) digits = (nybble_1, op.args[0].integer, nybble_2) op = op._replace(todo=None, hex=template.format(*digits)) # We can still update pos whether we've parsed all args or not. return context.advance_by_bytes(2), op
def asmpass_codegen(context: Context, op: Op) -> Tuple[Context, Op]: """Attempts to generate binary code from a partially-parsed source code line. Except for a few "built-in" opcodes, this function defers code-generation to special opcode-specific handlers found in context.codegen. Intuitively, these handlers should "do their best" to complete the information in `op` and advance the current output position (`context.pos`). If they can do both, they should return an updated `op` where `op.todo` is None. In all other cases, `op.todo` should be set to this function for another try. Args: context: current assembler context. op: source code line data structure. Returns: context: updated assembler context. op: updated source code line data structure. """ # If there are any labels, attempt to bind them now before an `org` statement # sends us packing to another binary location. for label in op.labels: context = context.bind_label(label) # Handle "built-in" opcodes. assert op.args is not None if op.opcode in ('cpu', '.cpu', 'arch', '.arch'): if len(op.args) != 1: raise ValueError('The {} pseudo-opcode takes one argument'.format( op.opcode.upper())) op = op._replace(todo=None) context = _switch_arch(op.lineno, op.line, context, op.args[0].stripped) # Hand over remaining processing to "architecture specific" code generators. else: if op.opcode not in context.codegen: raise Error( op.lineno, op.line, 'Opcode "{}" not recognised for architecture {}'.format( op.opcode, context.arch)) context, op = context.codegen[op.opcode](context, op) # If we haven't bound all the labels associated with this line of code, then # we've got to try generating this line of code again, no matter what the # opcode's code-generating handler thinks about it. if not all(label in context.labels for label in op.labels): op = op._replace(todo=asmpass_codegen) # If we haven't got an output location for the hex data generated from this # line of code, then we force another pass in that case, too. if context.pos is None: op = op._replace(todo=asmpass_codegen) return context, op
def _codegen_ctrl(context: Context, op: Op) -> Tuple[Context, Op]: """CTRL instruction.""" op = op._replace(args=parse_args_if_able(_PARSE_OPTIONS, context, op, Type.ADDRESS, Type.NUMBER)) if all_args_parsed(op.args): _devcheck(op.args[0]) _bytecheck(op.args[1]) digits = (op.args[0].integer, op.args[1].integer % 256) op = op._replace(todo=None, hex='1{:X}{:02X}'.format(*digits)) # We can still update pos whether we've parsed all args or not. return context.advance_by_bytes(2), op
def _codegen_putb(context: Context, op: Op) -> Tuple[Context, Op]: """PUTB instruction.""" op = op._replace(args=parse_args_if_able( _PARSE_OPTIONS, context, op, Type.ADDRESS, Type.DEREF_REGISTER)) if all_args_parsed(op.args): _devcheck(op.args[0]) _regderefcheck(op.args[1], postcrem_from=-4, postcrem_to=4) modifier = _postcrement_to_modifier(op.args[1].postcrement) digits = (op.args[0].integer, op.args[1].integer, modifier) op = op._replace(todo=None, hex='4{:X}{:X}{:X}'.format(*digits)) # We can still update pos whether we've parsed all args or not. return context.advance_by_bytes(2), op
def _codegen_lwi(context: Context, op: Op) -> Tuple[Context, Op]: """LWI pseudoinstruction: MOVE RX, (RO)+; DW i.""" op = op._replace(args=parse_args_if_able(_PARSE_OPTIONS, context, op, Type.REGISTER, Type.NUMBER)) if all_args_parsed(op.args): _regcheck(op.args[0]) if not -32767 <= op.args[1].integer <= 65535: raise ValueError( 'Halfword literal {} not in range -32768..65535'.format( op.args[1].stripped)) digits = (op.args[0].integer, op.args[1].integer % 65536) op = op._replace(todo=None, hex='D{:X}01{:04X}'.format(*digits)) # We can still update pos whether we've parsed all args or not. return context.advance_by_bytes(4), op
def _codegen_call(context: Context, op: Op) -> Tuple[Context, Op]: """CALL pseudoinstruction: several underlying variants.""" op = op._replace(args=parse_args_if_able( _PARSE_OPTIONS, context, op, Type.ADDRESS | Type.REGISTER | Type.DEREF_REGISTER | Type.DEREF_ADDRESS, Type.REGISTER)) # Since this pseudoinstruction can produce code of different lengths, we # handle updating pos when "not all_args_parsed" in a special way. if not all_args_parsed(op.args): advance = 6 if op.args[0].argtype & Type.ADDRESS else 4 return context.advance_by_bytes(advance), op # We are calling an address literal. Note that there is a way to do this in # two halfwords: for that, use the RCALL pseudoinstruction. if op.args[0].argtype & Type.ADDRESS: _jmpdestcheck(op.args[0]) _regcheck(op.args[1]) digits_a = (op.args[1].integer, op.args[1].integer, op.args[0].integer) op = op._replace(todo=None, hex='0{:X}03D0{:X}1{:04X}'.format(*digits_a)) # We are calling an address stored inside a register. elif op.args[0].argtype & Type.REGISTER: _callregcheck(op.args[0], op.args[1]) digits_r = (op.args[1].integer, op.args[0].integer) op = op._replace(todo=None, hex='0{:X}0300{:X}4'.format(*digits_r)) # We are calling an address stored at a memory location in a register. elif op.args[0].argtype & Type.DEREF_REGISTER: _callregcheck(op.args[0], op.args[1]) _regderefcheck(op.args[0], postcrem_from=-2, postcrem_to=2) # Words. modifier = _postcrement_to_modifier(2 * op.args[0].postcrement) digits_d = (op.args[1].integer, op.args[0].integer, modifier) op = op._replace(todo=None, hex='0{:X}03D0{:X}{:X}'.format(*digits_d)) # We are calling an address stored at a low memory location. else: _regcheck(op.args[1]) _lowwordaddrcheck(op.args[0]) assert op.opcode is not None # mypy... if op.args[0].precrement or op.args[0].postcrement: raise ValueError( 'No (in/de)crementation is allowed for address dereference arguments ' 'to {}'.format(op.opcode.upper())) digits = (op.args[1].integer, op.args[0].integer // 2) op = op._replace(todo=None, hex='0{:X}0320{:02X}'.format(*digits)) return context.advance(op.hex), op
def _codegen_move(context: Context, op: Op) -> Tuple[Context, Op]: """MOVE instruction.""" op = op._replace( args=parse_args_if_able( # Note fractional crements enabled. _PARSE_OPTIONS._replace( fractional_crements=True), context, op, Type.ADDRESS | Type.REGISTER | Type.DEREF_REGISTER, Type.ADDRESS | Type.REGISTER | Type.DEREF_REGISTER)) if all_args_parsed(op.args): if not any(arg.argtype & Type.REGISTER for arg in op.args): raise ValueError( 'At least one argument to MOVE must be a register') # This is a move between registers. elif op.args[0].argtype == op.args[1].argtype == Type.REGISTER: _regcheck(*op.args) digits_r = (op.args[0].integer, op.args[1].integer) op = op._replace(todo=None, hex='0{:X}{:X}4'.format(*digits_r)) # This is a move from/to an address found at a specified memory location. elif any(arg.argtype == Type.ADDRESS for arg in op.args): nybble, argaddr, argreg = ((2, op.args[1], op.args[0]) if op.args[0].argtype & Type.REGISTER else (3, op.args[0], op.args[1])) _regcheck(argreg) _lowwordaddrcheck(argaddr) digits_a = (nybble, argreg.integer, argaddr.integer // 2) op = op._replace(todo=None, hex='{:X}{:X}{:02X}'.format(*digits_a)) # This is a move from/to an address found in a register. else: nybble, argderef, argreg = ((5, op.args[0], op.args[1]) if op.args[0].argtype & Type.DEREF_REGISTER else (0xD, op.args[1], op.args[0])) _regcheck(argreg) _regderefcheck(argderef, postcrem_from=-2, postcrem_to=2) # Words. modifier = _postcrement_to_modifier(2 * argderef.postcrement) digits_d = (nybble, op.args[1].integer, op.args[0].integer, modifier) op = op._replace(todo=None, hex='{:X}{:X}{:X}{:X}'.format(*digits_d)) # We can still update pos whether we've parsed all args or not. return context.advance_by_bytes(2), op
def _codegen_bra(context: Context, op: Op) -> Tuple[Context, Op]: """BRA pseudoinstruction: ADD/SUB R0,#<dist>.""" op = op._replace( args=parse_args_if_able(_PARSE_OPTIONS, context, op, Type.ADDRESS)) if all_args_parsed(op.args) and context.pos is not None: _jmpdestcheck(op.args[0]) offset = _reljmpoffset(context, op.args[0]) if offset == 0: logging.warning( 'Line %d: A BRA of +2 bytes (so, an ordinary PC increment) is not ' 'supported by the usual relative jump techniques; generating a NOP ' '(MOVE R0, R0) instead', op.lineno) op = op._replace(todo=None, hex='0004') else: digits = (0xA, offset - 1) if offset > 0 else (0xF, -offset - 1) op = op._replace(todo=None, hex='{:X}0{:02X}'.format(*digits)) # We can still update pos whether we've parsed all args or not. return context.advance_by_bytes(2), op
def _codegen_movb(context: Context, op: Op) -> Tuple[Context, Op]: """MOVB instruction.""" op = op._replace( args=parse_args_if_able(_PARSE_OPTIONS, context, op, Type.REGISTER | Type.DEREF_REGISTER, Type.REGISTER | Type.DEREF_REGISTER)) # Both arguments to this opcode should be parseable. if op.args[0].argtype == op.args[1].argtype: raise ValueError( 'One MOVB argument should be a register, and the other should be a ' 'register dereference') if all_args_parsed(op.args): nybble, argderef, argreg = ((6, op.args[1], op.args[0]) if op.args[0].argtype & Type.REGISTER else (7, op.args[0], op.args[1])) _regcheck(argreg) _regderefcheck(argderef, postcrem_from=-4, postcrem_to=4) modifier = _postcrement_to_modifier(argderef.postcrement) digits = (nybble, argreg.integer, argderef.integer, modifier) op = op._replace(todo=None, hex='{:X}{:X}{:X}{:X}'.format(*digits)) # We can still update pos whether we've parsed all args or not. return context.advance_by_bytes(2), op
def codegen_add_or_sub(context: Context, op: Op) -> Tuple[Context, Op]: op = op._replace(args=parse_args_if_able(_PARSE_OPTIONS, context, op, Type.REGISTER, Type.NUMBER | Type.REGISTER)) if all_args_parsed(op.args): _regcheck(op.args[0]) # Adding/subtracting an immediate value to/from a register. if op.args[1].argtype & Type.NUMBER: if not 0 <= op.args[1].integer <= 256: raise ValueError('Literal {!r} not in range 0..256'.format( op.args[1].stripped)) elif op.args[1].integer == 0: assert op.opcode is not None # mypy... logging.warning( 'Line %d: A #0 literal argument to %s is not supported by the %s ' 'instruction; generating a NOP (MOVE R0, R0) instead', op.lineno, op.opcode.upper(), op.opcode.upper()) op = op._replace(todo=None, hex='0004') else: digits = (0xA if add_or_sub == 'add' else 0xF, op.args[0].integer, (op.args[1].integer - 1) % 256) op = op._replace(todo=None, hex='{:X}{:X}{:02X}'.format(*digits)) # Adding/subtracting the LSB of one register to/from another register. else: _regcheck(op.args[1]) digits = (op.args[0].integer, op.args[1].integer, 8 if add_or_sub == 'add' else 9) op = op._replace(todo=None, hex='0{:X}{:X}{:X}'.format(*digits)) # We can still update pos whether we've parsed all args or not. return context.advance_by_bytes(2), op
def _codegen_rcall(context: Context, op: Op) -> Tuple[Context, Op]: """RCALL (R=relocatable) pseudoinstruction: INC2 Rx,R0; BRA <addr>.""" op = op._replace(args=parse_args_if_able(_PARSE_OPTIONS, context, op, Type.ADDRESS, Type.REGISTER)) assert op.args is not None if all_args_parsed(op.args) and context.pos is not None: _jmpdestcheck(op.args[0]) _regcheck(op.args[1]) offset = _reljmpoffset(context, op.args[0]) if offset == 0: logging.warning( 'Line %d: A +2-byte RCALL (so, an ordinary PC increment) is not ' 'supported by the usual relative jump techniques; generating a NOP ' '(MOVE R0, R0) instead', op.lineno) op = op._replace(todo=None, hex='0{:X}030004'.format(op.args[1].integer)) else: digits = ((op.args[1].integer, 0xA, offset - 1) if offset > 0 else (op.args[1].integer, 0xF, -offset - 1)) op = op._replace(todo=None, hex='0{:X}03{:X}0{:02X}'.format(*digits)) # We can still update pos whether we've parsed all args or not. return context.advance_by_bytes(4), op
def codegen_data(context: Context, op: Op) -> Tuple[Context, Op]: # Accumulate hex code here, and track whether we have its final value worked # out, or if we're still waiting on labels. hexparts = [] all_hex_ok = True # Align the data to match the data quantum, if directed. if align: if element_size != 1: if context.pos is None: raise ValueError( 'Unresolved labels above this line (or other factors) make it ' 'impossible to know how to align this data statement. Consider ' "an ORG statement to make this data's memory location explicit.") hexparts.append('00' * (context.pos % element_size)) # Generate data for each arg. Unlike nearly all other statements, we do most # of the parsing ourselves. for arg in op.args: # Is the argument a string? if arg.stripped.startswith('"') or arg.stripped.startswith("'"): hexparts.append(''.join( val.to_bytes(element_size, endianity).hex().upper() for val in parse_string(parse_options, context, arg.stripped))) # No, it must be a single integer value. else: # Does the argument look like a label? If so, try to resolve it and take # its value. If not, let's parse the argument as an integer value. if LABEL_RE.fullmatch(arg.stripped): all_hex_ok &= arg.stripped in context.labels val = context.labels[arg.stripped] if all_hex_ok else 0 else: val = parse_integer(parse_options, context, arg.stripped) # Now encode the value as hex. hexparts.append(val.to_bytes(element_size, endianity).hex().upper()) # Package the hex data from all the args and, if appropriate, mark our job # as complete. op = op._replace(todo=None if all_hex_ok else op.todo, hex=''.join(hexparts)) return context.advance(op.hex), op
def asmpass_lexer(context: Context, op: Op) -> Tuple[Context, Op]: """Perform lexical analysis on a source code line. Ultimately, this means breaking the line up into an opcode and its arguments. This sets the `opcode` and `args` fields of `op`. Args: context: current assembler context. op: source code line data structure. Returns: context: updated assembler context. op: updated source code line data structure. """ # Obtain opcode and arguments. At least the opcode is guaranteed to exist. opcode, etcetera = op.tokens[0], op.tokens[1:] opcode = opcode.casefold() # Canonicalise opcode. args = tuple(Arg(stripped=a.strip()) for a in etcetera) # Update op with opcode and args, then trigger code generation in the next # pass. Argument parsing occurs during code generation, allowing for symbols # to be bound as late as possible. op = op._replace(opcode=opcode, args=args, todo=asmpass_codegen) return context, op
def _codegen_nop(context: Context, op: Op) -> Tuple[Context, Op]: """NOP pseudoinstruction: MOVE R0, R0.""" # This opcode takes no arguments. We still parse to make sure there are none. op = op._replace(args=parse_args_if_able(_PARSE_OPTIONS, context, op)) op = op._replace(todo=None, hex='0004') return context.advance(op.hex), op