def FLN_fc(family): PE = PE_fc(family) class FLN(Peak): def __init__(self): self.pe_get_mant = PE() self.pe_get_exp = PE() self.rom = MEM() self.pe_mult = PE() self.pe_add = PE() #result = ln(op_a) def __call__(self, in0: Data) -> Data: inst1 = asm.fgetmant() inst2 = asm.fcnvexp2f() inst3 = asm.fp_mul() inst4 = asm.fp_add() rom_instr = mem_asm.rom([TLUT.ln_lut(i) for i in range(0, 128)] + [0x0000] * (depth - 128)) op_a = in0 ln2 = math.log(2) ln2_bf = int(float2bfbin(ln2), 2) const_ln2 = Data(ln2_bf) mant, _, _ = self.pe_get_mant(inst1, op_a, Data(0)) fexp, _, _ = self.pe_get_exp(inst2, op_a, Data(0)) lookup_result = self.rom(rom_instr, mant, Data(0)) mult, _, _ = self.pe_mult(inst3, fexp, const_ln2) result, _, _ = self.pe_mult(inst4, lookup_result, mult) return result return FLN
def RoundToZero_fc(family): Data = family.BitVector[16] PE = PE_fc(family) RoundToZeroBounded = RoundToZeroBounded_fc(family) class RoundToZero(Peak): def __init__(self): self.round_to_zero_bounded = RoundToZeroBounded() self.pe1 = PE() self.pe2 = PE() self.pe3 = PE() def __call__(self, in0: Data) -> Data: exp_mask = Data(((1 << _EXPONENT_SIZE) - 1) << _MANTISSA_SIZE) mask_exponent = asm.and_(rb_mode=asm.Mode_t.CONST, rb_const=exp_mask) exp, _, _ = self.pe1(mask_exponent, in0) cutoff = Data((_EXPONENT_BIAS + _MANTISSA_SIZE) << _MANTISSA_SIZE) cmp_exp = asm.ult(rb_mode=asm.Mode_t.CONST, rb_const=cutoff) _, has_frac_bits, _ = self.pe2(cmp_exp, exp) r2z = Data(self.round_to_zero_bounded(in0)) sel = asm.sel() out, _, _ = self.pe3(sel, r2z, in0, has_frac_bits) return out return RoundToZero
def FDiv_fc(family): PE = PE_fc(family) class FDiv(Peak): def __init__(self): self.pe_get_mant = PE() self.rom = MEM() self.pe_scale_res = PE() self.pe_mult = PE() #result = op_a/op_b; def __call__(self, in0: Data, in1: Data) -> Data: inst1 = asm.fgetmant() inst2 = asm.fsubexp() inst3 = asm.fp_mul() rom_instr = mem_asm.rom([TLUT.div_lut(i) for i in range(0, 128)] + [0x0000] * (depth - 128)) op_a = in0 op_b = in1 mant, _, _ = self.pe_get_mant(inst1, op_b, Data(0)) lookup_result = self.rom(rom_instr, mant, Data(0)) scaled_result, _, _ = self.pe_scale_res(inst2, lookup_result, op_b) result, _, _ = self.pe_mult(inst3, scaled_result, op_a) return result return FDiv
def RoundToZeroBounded_fc(family): Data = family.BitVector[16] PE = PE_fc(family) class RoundToZeroBounded(Peak): def __init__(self): self.pe1 = PE() self.pe2 = PE() def __call__(self, in0: Data) -> Data: f2i = asm.fgetfint() i2f = asm.fcnvsint2f() pe1_out,_,_ = self.pe1(f2i, in0) pe2_out,_,_ = self.pe2(i2f, pe1_out) return pe2_out return RoundToZeroBounded
def Add32_fc(family): PE = PE_fc(family) Data16 = BitVector[16] Data32 = BitVector[32] class Add32(Peak): def __init__(self): self.pe_lsb = PE() self.pe_msb = PE() def __call__(self, in0 : Data32, in1 : Data32) -> Data32: inst_lsb = asm.inst(asm.ALU_t.Add, cond=asm.Cond_t.C) inst_msb = asm.adc() lsb, cout, _ = self.pe_lsb(inst_lsb, data0=in0[:16], data1=in1[:16]) msb, _, _ = self.pe_msb(inst_msb, data0=in0[16:], data1=in1[16:], bit0=cout) return Data32.concat(lsb, msb) return Add32
def FMA_fc(family): Data = BitVector[DATAWIDTH] PE = PE_fc(family) class FMA(Peak): def __init__(self): self.pe1 = PE() self.pe2 = PE() def __call__(self, in0: Data, in1: Data, in2: Data) -> Data: inst1 = asm.smult0() inst2 = asm.add() pe1_out, _, _ = self.pe1(inst1, in0, in1) pe2_out, _, _ = self.pe2(inst2, pe1_out, in2) return pe2_out return FMA
def FExp_fc(family): PE = PE_fc(family) class FExp(Peak): def __init__(self): self.pe_get_int = PE() self.pe_get_frac = PE() self.pe_rom_idx = PE() self.rom = MEM() self.pe_incr_exp = PE() self.pe_div_mult = PE() #result = ln(op_a) def __call__(self, in0: Data) -> Data: # Perform op_a/ln(2) inst1 = asm.fp_mul() # Compute 2**op_a inst2 = asm.fgetfint() inst3 = asm.fgetffrac() inst4 = asm.and_() inst5 = asm.faddiexp() rom_instr = mem_asm.rom([TLUT.exp_lut(i) for i in range(0, 128)] + [TLUT.exp_lut(i) for i in range(-128, 0)] + [0x0000] * (depth - 256)) op_a = in0 ln2_inv = 1.0 / math.log(2) ln2_inv_bf = int(float2bfbin(ln2_inv), 2) const_ln2_inv = Data(ln2_inv_bf) div_res, _, _ = self.pe_div_mult(inst1, const_ln2_inv, op_a) fint, _, _ = self.pe_get_int(inst2, div_res, Data(0)) ffrac, _, _ = self.pe_get_frac(inst3, div_res, Data(0)) idx, _, _ = self.pe_rom_idx(inst4, ffrac, Data(0xFF)) lookup_result = self.rom(rom_instr, idx, Data(0)) result, _, _ = self.pe_incr_exp(inst5, lookup_result, fint) return result return FExp
import pytest from hwtypes import SIntVector, UIntVector, BitVector, Bit from peak.family import PyFamily import lassen.asm as asm from lassen import PE_fc, Inst_fc from lassen.common import DATAWIDTH, BFloat16_fc from lassen.utils import float2bfbin, bfbin2float from rtl_utils import rtl_tester Inst = Inst_fc(PyFamily()) Mode_t = Inst.rega PE = PE_fc(PyFamily()) pe = PE() BFloat16 = BFloat16_fc(PyFamily()) Data = BitVector[DATAWIDTH] op = namedtuple("op", ["inst", "func"]) NTESTS = 16 #container for a floating point value easily indexed by sign, exp, and frac fpdata = namedtuple("fpdata", ["sign", "exp", "frac"]) def is_nan_or_inf(fpdata): return fpdata.exp==BitVector[8](-1) #Convert fpdata to a BFloat value
import magma from peak import wrap_with_disassembler from peak.assembler import Assembler from peak.family import PyFamily, MagmaFamily from lassen import PE_fc, Inst_fc from lassen.common import DATAWIDTH, BFloat16_fc class HashableDict(dict): def __hash__(self): return hash(tuple(sorted(self.keys()))) Inst = Inst_fc(PyFamily()) Mode_t = Inst.rega PE_bv = PE_fc(PyFamily()) BFloat16 = BFloat16_fc(PyFamily()) Data = BitVector[DATAWIDTH] # create these variables in global space so that we can reuse them easily inst_name = 'inst' inst_type = PE_bv.input_t.field_dict[inst_name] _assembler = Assembler(inst_type) assembler = _assembler.assemble disassembler = _assembler.disassemble width = _assembler.width layout = _assembler.layout #PE_magma = PE_fc(MagmaFamily(), use_assembler=True) PE_magma = PE_fc(MagmaFamily())