def _encode_write_operands(self, add_dst=REGISTERS['null'], mul_dst=REGISTERS['null']): """Encode waddr_add, waddr_mul, write_swap, pack and pm from given two destination registers. """ assert (add_dst.unpack_bits == 0) assert (mul_dst.unpack_bits == 0) if add_dst.pack_bits != 0 and mul_dst.pack_bits != 0: raise AssembleError( 'Conflict packing of two registers: {} {}'.format( add_dst, mul_dst)) pack_bits = add_dst.pack_bits or mul_dst.pack_bits if add_dst.spec & enc._REG_AW and mul_dst.spec & enc._REG_BW: return add_dst.addr, mul_dst.addr, False, pack_bits elif mul_dst.spec & enc._REG_AW and add_dst.spec & enc._REG_BW: return add_dst.addr, mul_dst.addr, True, pack_bits raise AssembleError( 'Invalid combination of destination registers: {} {}'.format( add_dst, mul_dst))
def _emit(self, op_add, dst=REGISTERS['null'], opd1=REGISTERS['r0'], opd2=REGISTERS['r0'], sig='no signal', set_flags=True, **kwargs): muxes, raddr_a, raddr_b, use_imm, unpack, read_pm = \ self._encode_read_operands(opd1, opd2) if use_imm: if sig != 'no signal' and sig != 'alu small imm': raise AssembleError( '\'{}\' can not be used with immediate'.format(sig)) sig = 'alu small imm' sig_bits = enc._SIGNAL[sig] if op_add == enc._ADD_INSN['nop']: set_flags = False waddr_add, waddr_mul, write_swap, pack = \ self._encode_write_operands(dst) pm = 0 if unpack and pack: if read_pm != 0: raise AssembleError('Conflict of packing and unpacking') pm = read_pm elif unpack and not pack: pm = read_pm elif pack and not unpack: pm = 0 cond_add_str = kwargs.get('cond', 'always') cond_add = enc._COND[cond_add_str] cond_mul = enc._COND['never'] insn = enc.AluInsn( sig=sig_bits, unpack=unpack, pm=pm, pack=pack, sf=set_flags, ws=write_swap, cond_add=cond_add, cond_mul=cond_mul, op_add=op_add, op_mul=enc._MUL_INSN['nop'], waddr_add=waddr_add, waddr_mul=waddr_mul, raddr_a=raddr_a, raddr_b=raddr_b, add_a=muxes[0], add_b=muxes[1], mul_a=muxes[2], mul_b=muxes[3] ) if self.asm.sanity_check: insn.verbose = AddInstr(enc._ADD_INSN_REV[op_add], dst, opd1, opd2, sig, set_flags, cond_add_str) self.asm._emit(insn) # Create MulEmitter which holds arguments of Add ALU for dual # issuing. return MulEmitter( self.asm, op_add=op_add, add_dst=dst, add_opd1=opd1, add_opd2=opd2, cond_add=cond_add, sig=sig, set_flags=set_flags, increment=False)
def _emit(self, cond_br, target=None, reg=None, absolute=False, link=REGISTERS['null']): if target is None: imm = 0 elif isinstance(target, Label): target.pinned = False self.asm._add_backpatch_item(target.name) imm = 0 elif isinstance(target, int): imm = target else: raise AssembleError('Invalid branch target: {}'.format(target)) if reg: if (not (reg.spec & enc._REG_AR) or reg.name not in enc.GENERAL_PURPOSE_REGISTERS): raise AssembleError( 'Must be general purpose regfile A register {}'.format( reg)) assert (reg.addr < 32) raddr_a = reg.addr use_reg = True else: raddr_a = 0 use_reg = False waddr_add, waddr_mul, write_swap, pack = \ self._encode_write_operands(link) if pack: raise AssembleError('Packing is not available for link register') insn = enc.BranchInsn(sig=0xF, cond_br=cond_br, rel=not absolute, reg=use_reg, raddr_a=raddr_a, ws=write_swap, waddr_add=waddr_add, waddr_mul=waddr_mul, immediate=imm) if self.asm.sanity_check: insn.verbose = BranchInstr(enc._COND_REV[cond_br], target, reg, absolute, link) self.asm._emit(insn)
def _encode_imm(self, val): if isinstance(val, float): return unpack('<L', pack('f', val))[0], 0 elif isinstance(val, numbers.Number): fmt = 'l' if val < 0 else 'L' return unpack('L', pack(fmt, val))[0], 0 elif isinstance(val, (list, tuple, numpy.ndarray)): return self._encode_per_element_imm(list(val)) raise AssembleError('Unsupported immediate value {}'.format(val))
def setup_dma_store_stride(asm, val, blockmode=False, tmp_reg=REGISTERS['r0']): if not isinstance(val, int) and val.name == tmp_reg.name: raise AssembleError('setup_dma_store_stride uses \'{}\' internally' .format(tmp_reg.name)) if isinstance(val, int): asm.ldi(REGISTERS['vpmvcd_wr_setup'], (3<<30)|(blockmode<<16)|val) else: asm.ldi(tmp_reg, (3<<30)|(blockmode<<16)) asm.bor(REGISTERS['vpmvcd_wr_setup'], tmp_reg, val)
def setup_dma_load_stride(asm, val, tmp_reg=REGISTERS['r0']): if not isinstance(val, int) and val.name == tmp_reg.name: raise AssembleError('setup_dma_store_stride uses \'{}\' internally' .format(tmp_reg.name)) if isinstance(val, int): return asm.ldi(REGISTERS['vpmvcd_rd_setup'], (9<<28)|val) else: asm.ldi(tmp_reg, 9<<28) return asm.bor(REGISTERS['vpmvcd_rd_setup'], tmp_reg, val)
def _emit(self, *args, **kwargs): """Load immediate. Store ``value`` to the register ``a``. >>> ldi(a, value) You can use two destination registers. ``value`` will be stored to both register ``a`` and ``b``. >>> ldi(a, b, value) Available immediate values: * signed and unsigned integers. * floating point numbers. * List of 2-bit signed and unsigned integers. Its maximum length is 16. The third behaves exceptionally. Values of the list will be stored to each SIMD element one by one. When the length of the list is shorter than 16, 0s will are stored for remaining elements. """ reg1 = args[0] if len(args) == 2: reg2 = REGISTERS['null'] imm = args[1] else: reg2 = args[1] imm = args[2] sig = kwargs.get('sig', 'load') if sig != 'load': raise AssembleError('Conflict of signals') waddr_add, waddr_mul, write_swap, pack = \ self._encode_write_operands(reg1, reg2) imm, unpack = self._encode_imm(imm) cond_add = cond_mul = enc._COND[kwargs.get('cond', 'always')] set_flags = kwargs.get('set_flags', False) insn = enc.LoadInsn(sig=0xe, unpack=unpack, pm=0, pack=pack, cond_add=cond_add, cond_mul=cond_mul, sf=set_flags, ws=write_swap, waddr_add=waddr_add, waddr_mul=waddr_mul, immediate=imm) if self.asm.sanity_check: insn.verbose = LoadImmInstr(reg1, reg2, imm) self.asm._emit(insn)
def _encode_per_element_imm(self, values): if len(values) > 16: raise AssembleError('Too many immediate values {}'.format(values)) values.extend([0] * (16 - len(values))) unsigned = all(map(lambda x: x >= 0, values)) high = 0 low = 0 for i in reversed(range(16)): high <<= 1 low <<= 1 v = values[i] if (not unsigned and (v >= 2 or v < -2)) or (unsigned and v >= 4): raise AssembleError('{} is not a 2-bit {}signed value'.format( v, ['', 'un'][unsigned])) high |= (v & 0x2) >> 1 low |= v & 0x1 return (high << 16) | low, 2 * unsigned + 1
def _fix_labels(self): new_labels = [] label_dict = {} for label, pc in self._labels: if not label.pinned: continue if label.name in label_dict: raise AssembleError('Duplicated label: {}'.format(label.name)) label_dict[label.name] = pc new_labels.append((label, pc)) self._labels = new_labels return label_dict
def _emit(self, sa, sema_id): if not (0 <= sema_id and sema_id <= 15): raise AssembleError('Semaphore id must be in range (0..15)') null_addr = REGISTERS['null'].addr insn = enc.SemaInsn( sig=0xE, unpack=4, pm=0, pack=0, cond_add=1, cond_mul=1, sf=0, ws=0, waddr_add=null_addr, waddr_mul=null_addr, sa=sa, semaphore=sema_id) if self.asm.sanity_check: insn.verbose = SemaInstr(sa, sema_id) self.asm._emit(insn)
def _backpatch(self): 'Backpatch immediates of branch _instructions' labels = self._fix_labels() for i, label in self._backpatch_list: if label not in labels: raise AssembleError('Undefined label {}'.format(label)) insn = self._instructions[i] assert (isinstance(insn, enc.BranchInsn)) assert (insn.rel) insn.immediate = labels[label] - 8 * (i + 4) self._backpatch_list = []
def qpu(f): """Decorator for writing QPU assembly language. To write a QPU assembly program, decorate a function which has a parameter ``asm`` as the first argument with @qpu like this:: @qpu def code(asm): mov(r0, uniform) iadd(r0, r0, 1) ... exit() This code is equivalent to:: def code(asm): asm.mov(asm.r0, asm.uniform) asm.iadd(asm.r0, asm.r0, 1) ... asm.exit() """ args, _, _, _ = inspect.getargspec(f) if 'asm' not in args: raise AssembleError('Argument named \'asm\' is necessary') def decorate(f): def decorated(asm, *args, **kwargs): g = f.__globals__ for reg in Assembler._REGISTERS: g[str(reg)] = asm._REGISTERS[str(reg)] g['ra'] = [g['ra{}'.format(i)] for i in range(32)] g['rb'] = [g['rb{}'.format(i)] for i in range(32)] for i in dir(Assembler): if i[0] != '_': g[str(i)] = getattr(asm, str(i)) g['L'] = asm.L g['namespace'] = asm.namespace f(asm, *args, **kwargs) return decorated return decorate(f)
def _encode_read_operands(self, add_a=REGISTERS['r0'], add_b=REGISTERS['r0'], mul_a=REGISTERS['r0'], mul_b=REGISTERS['r0']): """Encode input muxes, raddr_a, raddr_b, unpack from given four source registers. """ operands = [add_a, add_b, mul_a, mul_b] muxes = [None, None, None, None] unpack_bits = 0 pm_bit = 0 raddr_a = None raddr_b = None small_imm = None # Encode unpacking. for opd in operands: if not isinstance(opd, Register): continue if opd.unpack_bits: if unpack_bits == 0: unpack_bits = opd.unpack_bits pm_bit = opd.pm_bit elif (opd.unpack_bits != unpack_bits or opd.pm_bit != pm_bit): raise AssembleError('Conflict of unpacking') # Assign input muxes for accumulators. for i, opd in enumerate(operands): if isinstance(opd, Register) and opd.name in enc.ACCUMULATORS: muxes[i] = enc._INPUT_MUXES[opd.name] if all(m is not None for m in muxes): null_addr = REGISTERS['null'].addr return [muxes, null_addr, null_addr, False, unpack_bits, pm_bit] # Locate operands which have to be regfile B register. for i, opd in enumerate(operands): if muxes[i] is not None or not isinstance(opd, Register): continue if opd.spec & enc._REG_BR and not (opd.spec & enc._REG_AR): if raddr_b is None: raddr_b = opd.addr muxes[i] = enc._INPUT_MUXES['B'] elif raddr_b == opd.addr: muxes[i] = enc._INPUT_MUXES['B'] else: raise AssembleError('Too many regfile B source operand') # Locate small immediates. for i, opd in enumerate(operands): if muxes[i] is not None or isinstance(opd, Register): continue imm = enc._SMALL_IMM[repr(opd)] if small_imm is None: small_imm = imm muxes[i] = enc._INPUT_MUXES['B'] elif small_imm == imm: muxes[i] = enc._INPUT_MUXES['B'] else: raise AssembleError('Too many immediates') # Check of raddr_b conflict. if small_imm is not None and raddr_b is not None: raise AssembleError( 'Conflict of regfile B source operand and immedaite value') if small_imm is not None: raddr_b = small_imm # Locate operands which have to be regfile A register. for i, opd in enumerate(operands): if muxes[i] is not None: continue if opd.spec & enc._REG_AR and not (opd.spec & enc._REG_BR): if raddr_a is None: raddr_a = opd.addr muxes[i] = enc._INPUT_MUXES['A'] elif raddr_a == opd.addr: muxes[i] = enc._INPUT_MUXES['A'] else: raise AssembleError('Too many regfile A source operand') # Locate remaining operands. for i, opd in enumerate(operands): if muxes[i] is not None: continue if not (opd.spec & (enc._REG_AR | enc._REG_BR)): raise AssembleError('{} can not be a read operand'.format(opd)) if raddr_a is None or raddr_a == opd.addr: raddr_a = opd.addr muxes[i] = enc._INPUT_MUXES['A'] elif (small_imm is None and raddr_b is None) or raddr_b == opd.addr: raddr_b = opd.addr muxes[i] = enc._INPUT_MUXES['B'] else: raise AssembleError('Failed to locate operand {}'.format(opd)) if raddr_a is None: raddr_a = REGISTERS['null'].addr if raddr_b is None: raddr_b = REGISTERS['null'].addr use_small_imm = (small_imm is not None) return [muxes, raddr_a, raddr_b, use_small_imm, unpack_bits, pm_bit]
def _emit_with_defaults(self, op_mul, mul_dst, mul_opd1, mul_opd2, rotate=0, pack='nop', **kwargs): mul_pack = enc._MUL_PACK[pack] muxes, raddr_a, raddr_b, use_imm, unpack, read_pm = \ self._encode_read_operands(self.add_opd1, self.add_opd2, mul_opd1, mul_opd2) waddr_add, waddr_mul, write_swap, regA_pack = \ self._encode_write_operands(self.add_dst, mul_dst) if mul_pack and regA_pack: raise AssembleError('Multiple pack operationss') write_pm = (mul_pack != 0) pack = mul_pack or regA_pack pm = 0 if unpack and pack: if read_pm != write_pm: raise AssembleError('Conflict of packing and unpacking') pm = read_pm elif unpack and not pack: pm = read_pm elif pack and not unpack: pm = write_pm sig = kwargs.get('sig', 'no signal') if self.sig != 'no signal': if sig != 'no signal': raise AssembleError('Conflict of signals') sig = self.sig if use_imm or rotate: if sig != 'no signal' and sig != 'alu small imm': raise AssembleError( '\'{}\' can not be used with immediate'.format(sig)) sig = 'alu small imm' sig_bits = enc._SIGNAL[sig] if rotate: if muxes[2] == 5 or muxes[2] == 7 or muxes[3] == 5 or muxes[3] == 7: raise AssembleError('Rotate operation is only available when' ' inputs are taken from r0-r4 or ra') if use_imm: # 'r5 rotate' represents -1. # 'n-upward rotate' represents n-16. # So these combinations can be used (1 <= n <= 15): # +-----------+--------+ # | small imm | rotate | # +-----------+--------+ # | -16 | r5 | # | -n | -n | # +-----------+--------+ # c.f. https://vc4-notes.tumblr.com/post/153467713064/ if rotate == REGISTERS['r5']: if raddr_b != enc._SMALL_IMM['-16']: raise AssembleError( 'Conflict immediate value and r5 rotate') elif raddr_b != enc._SMALL_IMM[str(rotate % 16 - 16)]: raise AssembleError( 'Conflict immediate value and n rotate') if rotate == REGISTERS['r5']: raddr_b = 48 else: raddr_b = 48 + rotate % 16 cond_add = self.cond_add cond_mul_str = kwargs.get('cond', 'always') cond_mul = enc._COND[cond_mul_str] insn = enc.AluInsn(sig=sig_bits, unpack=unpack, pm=pm, pack=pack, sf=self.set_flags, ws=write_swap, cond_add=cond_add, cond_mul=cond_mul, op_add=self.op_add, op_mul=op_mul, waddr_add=waddr_add, waddr_mul=waddr_mul, raddr_a=raddr_a, raddr_b=raddr_b, add_a=muxes[0], add_b=muxes[1], mul_a=muxes[2], mul_b=muxes[3]) if self.asm.sanity_check: insn.verbose = MulInstr(enc._MUL_INSN_REV[op_mul], mul_dst, mul_opd1, mul_opd2, sig, self.set_flags, cond_mul_str) self.asm._emit(insn, increment=self.increment)