def test_reduce_parallel(): width = 11 numIn = 13 c = coreir.Context() cirb = CoreIRBackend(c) scope = Scope() inType = In(Array(numIn, Array(width, BitIn))) outType = Out(Array(width, Bit)) args = ['I', inType, 'O', outType] + ClockInterface(False, False) testcircuit = DefineCircuit('Test_Reduce_Parallel', *args) reducePar = ReduceParallel(cirb, numIn, renameCircuitForReduce(DefineAdd(width))) coreirConst = DefineCoreirConst(width, 0)() wire(reducePar.I.data, testcircuit.I) wire(reducePar.I.identity, coreirConst.out) wire(testcircuit.O, reducePar.out) EndCircuit() sim = CoreIRSimulator(testcircuit, testcircuit.CLK, context=cirb.context, namespaces=[ "aetherlinglib", "commonlib", "mantle", "coreir", "global" ]) for i in range(numIn): sim.set_value(testcircuit.I[i], int2seq(i, width), scope) sim.evaluate() assert seq2int(sim.get_value(testcircuit.O, scope)) == sum(range(numIn))
def definition(cls): enabled = DefineCoreirConst(1, 1)().O[0] if has_valid: enabled = cls.valid_up & enabled wire(cls.valid_up, cls.valid_down) if has_ce: enabled = bit(cls.CE) & enabled # don't need valid on these shift_t as they'll be getting it from the enable signal shift_t_xs = [] for i in range(ni): shift_amount_t = (ni - i + shift_amount - 1) // ni if shift_amount_t == 0: shift_t_xs.append(None) else: shift_t_xs.append( DefineShift_T(no, io, shift_amount_t, elem_t, True, has_reset, False)()) for i in range(ni): if shift_t_xs[i] is None: wire(cls.I[(i - shift_amount) % ni], cls.O[i]) else: wire(cls.I[(i - shift_amount) % ni], shift_t_xs[i].I) wire(shift_t_xs[i].O, cls.O[i]) wire(enabled, shift_t_xs[i].CE) if has_reset: wire(cls.RESET, shift_t_xs[i].RESET)
def test_const(): Const8 = DefineCoreirConst(width=4, value=8) assert repr(Const8) == """\ coreir_const48 = DeclareCircuit("coreir_const48", "O", Out(Bits(4)))""" compile("build/test_const", wrap(Const8), output="coreir") assert check_files_equal(__file__, "build/test_const.json", "gold/test_const.json")
def definition(cls): enabled = DefineCoreirConst(1, 1)().O[0] if has_valid: enabled = cls.valid_up & enabled wire(cls.valid_up, cls.valid_down) if has_ce: enabled = bit(cls.CE) & enabled value_store = DefineRAM_ST(elem_t, shift_amount, has_reset=has_reset)() # write and read from same location # will write on first iteration through element, write and read on later iterations # output for first iteration is undefined, so ok to read anything next_ram_addr = DefineNestedCounters(elem_t, has_ce=True, has_reset=has_reset)() # its fine that this doesn't account for the invalid clocks of outer TSeq # after the invalid clocks, the next iteration will start from # an index that is possibly not 0. That doesn't matter # as will just loop around ram_addr = AESizedCounterModM(shift_amount, has_ce=True, has_reset=has_reset) # this handles invalid clocks of inner TSeq inner_valid_t = ST_Int() for i in range(len(nis))[::-1]: inner_valid_t = ST_TSeq(nis[i], iis[i], inner_valid_t) inner_valid = DefineNestedCounters(inner_valid_t, has_last=False, has_ce=True, has_reset=has_reset, valid_when_ce_off=True)() wire(ram_addr.O, value_store.WADDR) wire(ram_addr.O, value_store.RADDR) wire(enabled & inner_valid.valid, value_store.WE) wire(enabled & next_ram_addr.last, inner_valid.CE) #wire(inner_valid.valid, cls.inner_valid) wire(enabled & inner_valid.valid, value_store.RE) wire(enabled & next_ram_addr.last & inner_valid.valid, ram_addr.CE) wire(enabled, next_ram_addr.CE) next_ram_addr_term = TermAnyType(Bit) wire(next_ram_addr.valid, next_ram_addr_term.I) wire(cls.I, value_store.WDATA) wire(value_store.RDATA, cls.O) if has_reset: wire(value_store.RESET, cls.RESET) wire(ram_addr.RESET, cls.RESET) wire(next_ram_addr.RESET, cls.RESET) wire(inner_valid.RESET, cls.RESET)
def definition(cls): dir_path = os.path.dirname(os.path.realpath(__file__)) op = m.DefineFromVerilogFile(os.path.join(dir_path, "pipelined", "mul.v"), type_map={"CLK": m.In(m.Clock)})[0]() zero_const = DefineCoreirConst(1, 0)() one_const = DefineCoreirConst(1, 1)() wire(zero_const.O[0], op.rst) wire(one_const.O[0], op.ce) wire(cls.I[0], op.a) wire(cls.I[1], op.b) wire(op.p[0:8], cls.O) term = DefineTerm(8)() wire(op.p[8:16], term.I) if has_valid: reg0 = DefineRegister(1)() reg1 = DefineRegister(1)() wire(cls.valid_up, reg0.I[0]) wire(reg0.O, reg1.I) wire(reg1.O[0], cls.valid_down)
def definition(cls): enabled = DefineCoreirConst(1, 1)().O[0] if has_valid: enabled = cls.valid_up & enabled wire(cls.valid_up, cls.valid_down) if has_ce: enabled = bit(cls.CE) & enabled value_store = DefineRAM_ST(elem_t, 1, has_reset=has_reset)() # write to value_store for first element, read for next element_time_counter = DefineNestedCounters(elem_t, has_ce=True, has_reset=has_reset)() element_idx_counter = AESizedCounterModM(n + i, has_ce=True, has_reset=has_reset) is_first_element = Decode(0, element_idx_counter.O.N)(element_idx_counter.O) zero_addr = DefineCoreirConst(1, 0)().O wire(zero_addr, value_store.WADDR) wire(zero_addr, value_store.RADDR) wire(enabled & is_first_element, value_store.WE) wire(enabled, value_store.RE) wire(enabled, element_time_counter.CE) wire(enabled & element_time_counter.last, element_idx_counter.CE) element_time_counter_term = TermAnyType(Bit) wire(element_time_counter.valid, element_time_counter_term.I) wire(cls.I, value_store.WDATA) output_selector = DefineMuxAnyType(elem_t.magma_repr(), 2)() # on first element, send the input directly out. otherwise, use the register wire(is_first_element, output_selector.sel[0]) wire(value_store.RDATA, output_selector.data[0]) wire(cls.I, output_selector.data[1]) wire(output_selector.out, cls.O) if has_reset: wire(value_store.RESET, cls.RESET) wire(element_time_counter.RESET, cls.RESET) wire(element_idx_counter.RESET, cls.RESET)
def test_const(): Const8 = DefineCoreirConst(width=4, value=8) assert repr(Const8) == """\ coreir_const48 = DefineCircuit("coreir_const48", "out", Out(Bits(4))) wire(0, coreir_const48.out[0]) wire(0, coreir_const48.out[1]) wire(0, coreir_const48.out[2]) wire(1, coreir_const48.out[3]) EndCircuit()""" compile("build/test_const", Const8, output="coreir") assert check_files_equal(__file__, "build/test_const.json", "gold/test_const.json")
def test_term(): width = 11 T = Array[width, BitIn] args = ['I', In(T), 'O', Out(T)] testcircuit = DefineCircuit('Test_Term', *args) wire(testcircuit.I, testcircuit.O) term = TermAnyType(T) t_const = DefineCoreirConst(width, 0)() wire(t_const.O, term.I) EndCircuit() tester = fault.Tester(testcircuit) tester.circuit.I = 2 tester.eval() tester.circuit.O.expect(2) compile_and_run(tester)
def definition(cls): enabled = DefineCoreirConst(1, 1)().O[0] if has_valid: enabled = cls.valid_up & enabled wire(cls.valid_up, cls.valid_down) if has_ce: enabled = bit(cls.CE) & enabled value_store = DefineRAM_ST(elem_t, shift_amount, has_reset=has_reset)() # write and read from same location # will write on first iteration through element, write and read on later iterations # output for first iteration is undefined, so ok to read anything next_ram_addr = DefineNestedCounters(elem_t, has_ce=True, has_reset=has_reset)() # its fine that this doesn't account for the invalid clocks. # after the invalid clocks, the next iteration will start from # an index that is possibly not 0. That doesn't matter # as will just loop around ram_addr = AESizedCounterModM(shift_amount, has_ce=True, has_reset=has_reset) wire(ram_addr.O, value_store.WADDR) wire(ram_addr.O, value_store.RADDR) wire(enabled, value_store.WE) wire(enabled, value_store.RE) wire(enabled & next_ram_addr.last, ram_addr.CE) wire(enabled, next_ram_addr.CE) next_ram_addr_term = TermAnyType(Bit) wire(next_ram_addr.valid, next_ram_addr_term.I) wire(cls.I, value_store.WDATA) wire(value_store.RDATA, cls.O) if has_reset: wire(value_store.RESET, cls.RESET) wire(ram_addr.RESET, cls.RESET) wire(next_ram_addr.RESET, cls.RESET)
def definition(BitonicSort): # generate the max value (all 1's) and feed it to all inputs to # power 2 bitonic sorting network not used by inputs t_size = T.size() n_raised_to_nearest_pow2 = pow(2, ceil(log2(n))) if n_raised_to_nearest_pow2 > n: max_const_flat = DefineCoreirConst(t_size, pow(2, t_size) - 1)() max_const = Hydrate(T) wire(max_const_flat.O, max_const.I) pow2_sort = DefineBitonicSortPow2(T, n_raised_to_nearest_pow2, cmp_component)() for i in range(n_raised_to_nearest_pow2): if i < n: wire(BitonicSort.I[i], pow2_sort.I[i]) wire(BitonicSort.O[i], pow2_sort.O[i]) else: wire(max_const.out, pow2_sort.I[i]) term = TermAnyType(T) wire(term.I, pow2_sort.O[i])
def test_noop(): testVal = 21 scope = Scope() inType = Array[8, In(Bit)] outType = Array[8, Out(Bit)] args = ['I', inType, 'O', outType] + ClockInterface(False, False) testcircuit = DefineCircuit('Test', *args) noopInst = DefineNoop(DefineCoreirConst(8, 0))() wire(noopInst.in_O, testcircuit.I) wire(testcircuit.O, noopInst.O) EndCircuit() sim = CoreIRSimulator(testcircuit, testcircuit.CLK) sim.set_value(testcircuit.I, int2seq(testVal, 8), scope) sim.evaluate() sim.advance_cycle() sim.evaluate() assert seq2int(sim.get_value(testcircuit.O, scope)) == testVal
def definition(cls): one_const = DefineCoreirConst(1, 1)().O[0] if delay == 0: enabled = one_const else: delay_counter = InitialDelayCounter(delay) wire(delay_counter.CE, one_const) enabled = delay_counter.valid if has_ce: enabled = bit(cls.CE) & enabled luts = DefineLUTAnyType(t.magma_repr(), t.time(), ts_arrays_to_bits(ts_values))() lut_position_counter = AESizedCounterModM(t.time(), has_ce=True, has_reset=has_reset) wire(lut_position_counter.O, luts.addr) wire(cls.O, luts.data) wire(enabled, lut_position_counter.CE) if has_reset: wire(cls.RESET, lut_position_counter.RESET) if has_valid: valid_up_term = TermAnyType(Bit) wire(cls.valid_up, valid_up_term.I) wire(enabled, cls.valid_down)
In(Bit), 'ready_data_in', Out(Bit), 'valid_data_out', Out(Bit), 'ready_data_out', In(Bit), ] + ClockInterface(has_ce=True) downsample_256x256_to_32x32_16px_in_per_clk = DefineCircuit( 'downsample_256x256_to_32x32_16px_in_per_clk_Circuit', *args) magmaInstance0 = DefineTwoDimensionalLineBuffer(Array[8, In(Bit)], 16, 1, 2, 2, 256, 256, 2, 2, 0, 0)() magmaInstance1 = DefineNoop( DefineTwoDimensionalLineBuffer(Array[8, In(Bit)], 16, 1, 2, 2, 256, 256, 2, 2, 0, 0))() magmaInstance2 = DefineCoreirConst(8, 1)() magmaInstance3 = DefineCoreirConst(8, 1)() magmaInstance4 = DefineCoreirConst(8, 1)() magmaInstance5 = DefineCoreirConst(8, 1)() magmaInstance6 = DefineCoreirConst(8, 2)() magmaInstance7 = DefineCoreirConst(8, 2)() magmaInstance8 = DefineCoreirConst(8, 2)() magmaInstance9 = DefineCoreirConst(8, 2)() magmaInstance13 = DefineCoreirConst(8, 3)() magmaInstance14 = DefineCoreirConst(8, 3)() magmaInstance15 = DefineCoreirConst(8, 3)() magmaInstance16 = DefineCoreirConst(8, 3)() magmaInstance17 = DefineCoreirConst(8, 4)() magmaInstance18 = DefineCoreirConst(8, 4)() magmaInstance19 = DefineCoreirConst(8, 4)() magmaInstance20 = DefineCoreirConst(8, 4)()
def definition(cls): per_clock_type = t.magma_repr() if delay == 1: reg = DefineRegisterAnyType(t.magma_repr(), has_ce=False, has_reset=has_reset)() wire(reg.I, cls.I) wire(reg.O, cls.O) if has_reset: wire(reg.RESET, cls.RESET) if has_valid: valid_reg = DefineRegisterAnyType(Bit, has_ce=False, has_reset=has_reset)() wire(valid_reg.I, cls.valid_up) wire(valid_reg.O, cls.valid_down) if has_reset: wire(valid_reg.RESET, cls.RESET) else: enabled = DefineCoreirConst(1, 1)().O[0] if has_valid: enabled = cls.valid_up & enabled if has_ce: enabled = bit(cls.CE) & enabled read_counter = AESizedCounterModM(delay - 1, has_ce=True, has_reset=has_reset) write_counter = AESizedCounterModM(delay - 1, has_ce=True, has_reset=has_reset) fifo_buffer = DefineRAMAnyType(per_clock_type, delay - 1)() reg = DefineRegisterAnyType(t.magma_repr(), has_ce=False, has_reset=has_reset)() # delay read for delay clocks internal_delay_counter = DefineInitialDelayCounter(delay - 1)() advance_read_counter = internal_delay_counter.valid wire(enabled, internal_delay_counter.CE) wire(advance_read_counter & enabled, read_counter.CE) wire(enabled, write_counter.CE) if has_reset: wire(cls.RESET, read_counter.RESET) wire(cls.RESET, write_counter.RESET) wire(cls.RESET, internal_delay_counter.RESET) wire(fifo_buffer.WADDR, write_counter.O) wire(fifo_buffer.RADDR, read_counter.O) wire(fifo_buffer.WDATA, cls.I) wire(fifo_buffer.RDATA, reg.I) wire(reg.O, cls.O) wire(fifo_buffer.WE, enabled) if has_valid: valid_reg = DefineRegister(1)() wire(advance_read_counter, valid_reg.I[0]) wire(valid_reg.O[0], cls.valid_down)
def definition(TSBankGenerator): flat_idx_width = getRAMAddrWidth(no * ni) # next element each time_per_element clock if time_per_element > 1: index_in_cur_element = SizedCounterModM(time_per_element, has_ce=has_ce, has_reset=has_reset) next_element = Decode(time_per_element - 1, index_in_cur_element.O.N)( index_in_cur_element.O) else: next_element = DefineCoreirConst(1, 1)() # each element of the SSeq is a separate vector lane first_lane_flat_idx = SizedCounterModM((no + io) * ni, incr=ni, has_ce=True, has_reset=has_reset)() time_counter = SizedCounterModM(no + io, has_ce=True, has_reset=has_reset) wire(next_element.O, first_lane_flat_idx.CE) wire(next_element.O, time_counter.CE) if has_ce: wire(TSBankGenerator.CE, index_in_cur_element.CE) if has_reset: wire(TSBankGenerator.RESET, index_in_cur_element.RESET) wire(TSBankGenerator.RESET, first_lane_flat_idx.RESET) wire(TSBankGenerator.RESET, time_counter.RESET) lane_flat_idxs = [first_lane_flat_idx.O] # compute the current flat_idx for each lane for i in range(1, ni): cur_lane_flat_idx_adder = DefineAdd(flat_idx_width)() wire(cur_lane_flat_idx_adder.I0, first_lane_flat_idx.O) wire(cur_lane_flat_idx_adder.I1, DefineCoreirConst(flat_idx_width, i * no)().O) lane_flat_idxs += [cur_lane_flat_idx_adder.O] lane_flat_div_lcms = [] # conmpute flat_idx / lcm_dim for each lane for i in range(ni): cur_lane_lcm_div = DefineUDiv(flat_idx_width)() wire(cur_lane_lcm_div.I0, lane_flat_idxs[0].O) wire(cur_lane_lcm_div.I1, DefineCoreirConst(lcm(no, ni), flat_idx_width)().O) lane_flat_div_lcms += [cur_lane_flat_idx_adder.O] # compute ((flat_idx % sseq_dim) + (flat_idx / lcm_dim)) % sseq_dim for each lane # note that s_ts == flat_idx % sseq_dim # only need to mod sseq_dim at end as that is same as also doing it flat_idx before addition for i in range(ni): pre_mod_add = DefineAdd(flat_idx_width)() wire(pre_mod_add.I0, lane_flat_idxs[i]) wire(pre_mod_add.I1, lane_flat_div_lcms[i]) bank_mod = DefineUMod(flat_idx_width)() wire(bank_mod.I0, pre_mod_add.O) wire(bank_mod.I0, DefineCoreirConst(flat_idx_width, ni)().O) wire(TSBankGenerator.bank[i], bank_mod.O[0:TSBankGenerator.bank_width]) # compute t for each lane addr for i in range(0, ni): wire(TSBankGenerator.addr[i], time_counter.O[0:TSBankGenerator.addr_width])
outType2 = m.Out(m.Array(16, Bit)) # Test circuit has line buffer's input and reduce's output args = ['I', inType, 'O', outType2, 'WE', BitIn, 'V', m.Out(m.Bit), 'L00', TOUT, 'L01', TOUT, 'L10', TOUT, 'L11', TOUT] + \ m.ClockInterface(False, False) testcircuit = m.DefineCircuit('STEN', *args) # Line buffer declaration lb = Linebuffer(cirb, inType, outType, imgType, True) m.wire(lb.I, testcircuit.I) m.wire(lb.wen, testcircuit.WE) # # Reduce declaration reducePar = ReduceParallel(cirb, 4, renameCircuitForReduce(DeclareAdd(16))) coreirConst = DefineCoreirConst(16, 0)() m.wire(reducePar.I.data[0], lb.out[0][0]) m.wire(reducePar.I.data[1], lb.out[0][1]) m.wire(reducePar.I.data[2], lb.out[1][0]) m.wire(reducePar.I.data[3], lb.out[1][1]) m.wire(reducePar.I.identity, coreirConst.O) m.wire(testcircuit.O, reducePar.out) m.wire(testcircuit.V, lb.valid) m.wire(lb.out[0][0], testcircuit.L00) m.wire(lb.out[0][1], testcircuit.L01) m.wire(lb.out[1][0], testcircuit.L10) m.wire(lb.out[1][1], testcircuit.L11) m.EndCircuit()
from mantle.coreir.compare import * from mantle.coreir import DefineCoreirConst from mantle.coreir.LUT import * from aetherling.modules.upsample import * from aetherling.modules.downsample import * from aetherling.modules.reduce import * from aetherling.modules.native_linebuffer.two_dimensional_native_linebuffer import DefineTwoDimensionalLineBuffer c = coreir.Context() cirb = CoreIRBackend(c) args = ['I0', Array[8, In(Bit)], 'I1', Array[8, In(Bit)], 'O0', Array[8, Out(Bit)], 'valid_data_in', In(Bit), 'ready_data_in', Out(Bit), 'valid_data_out', Out(Bit), 'ready_data_out', In(Bit), ] + ClockInterface(has_ce=True) downsampleStencilChain1Per32 = DefineCircuit('downsampleStencilChain1Per32_Circuit', *args) magmaInstance0 = DefineTwoDimensionalLineBuffer(Array[8, In(Bit)], 2, 1, 2, 2, 16, 16, 2, 2, 0, 0)() magmaInstance1 = DefineNoop(DefineTwoDimensionalLineBuffer(Array[8, In(Bit)], 2, 1, 2, 2, 16, 16, 2, 2, 0, 0))() magmaInstance2 = DefineCoreirConst(8, 1)() magmaInstance3 = DefineCoreirConst(8, 2)() magmaInstance4 = DefineCoreirConst(8, 3)() magmaInstance5 = DefineCoreirConst(8, 4)() wire(magmaInstance0.O[0][0][0], magmaInstance1.in_O[0][0][0]) wire(magmaInstance0.O[0][0][1], magmaInstance1.in_O[0][0][1]) wire(magmaInstance0.O[0][1][0], magmaInstance1.in_O[0][1][0]) wire(magmaInstance0.O[0][1][1], magmaInstance1.in_O[0][1][1]) magmaInstance6 = DefineCoreirMul(8)() magmaInstance7 = DefineCoreirMul(8)() magmaInstance8 = DefineCoreirMul(8)() magmaInstance9 = DefineCoreirMul(8)() wire(magmaInstance1.O[0][0][0], magmaInstance6.I0) wire(magmaInstance2.O, magmaInstance6.I1) wire(magmaInstance1.O[0][0][1], magmaInstance7.I0) wire(magmaInstance3.O, magmaInstance7.I1)
def definition(STBankGenerator): flat_idx_width = getRAMAddrWidth(no * ni) # next element each time_per_element clock if time_per_element > 1: index_in_cur_element = SizedCounterModM(time_per_element, has_ce=has_ce, has_reset=has_reset) next_element = Decode(time_per_element - 1, index_in_cur_element.O.N)( index_in_cur_element.O) else: next_element = DefineCoreirConst(1, 1)() # each element of the SSeq is a separate vector lane first_lane_flat_idx = DefineCounterModM(ni + ii, flat_idx_width, cout=False, has_ce=True, has_reset=has_reset)() wire(next_element.O[0], first_lane_flat_idx.CE) if has_ce: wire(STBankGenerator.CE, index_in_cur_element.CE) if has_reset: wire(STBankGenerator.RESET, index_in_cur_element.RESET) wire(STBankGenerator.RESET, first_lane_flat_idx.RESET) lane_flat_idxs = [first_lane_flat_idx.O] # compute the current flat_idx for each lane for i in range(1, no): cur_lane_flat_idx_adder = DefineAdd(flat_idx_width)() wire(cur_lane_flat_idx_adder.I0, first_lane_flat_idx.O) wire(cur_lane_flat_idx_adder.I1, DefineCoreirConst(flat_idx_width, i * ni)().O) lane_flat_idxs += [cur_lane_flat_idx_adder.O] lane_flat_div_lcms = [] lcm_dim = DefineCoreirConst(flat_idx_width, lcm(no, ni))() # conmpute flat_idx / lcm_dim for each lane for i in range(no): cur_lane_lcm_div = DefineUDiv(flat_idx_width)() wire(cur_lane_lcm_div.I0, lane_flat_idxs[i]) wire(cur_lane_lcm_div.I1, lcm_dim.O) lane_flat_div_lcms += [cur_lane_lcm_div.O] # compute ((flat_idx % sseq_dim) + (flat_idx / lcm_dim)) % sseq_dim for each lane # only need to mod sseq_dim at end as that is same as also doing it flat_idx before addition for i in range(no): pre_mod_add = DefineAdd(flat_idx_width)() wire(pre_mod_add.I0, lane_flat_idxs[i]) wire(pre_mod_add.I1, lane_flat_div_lcms[i]) bank_mod = DefineUMod(flat_idx_width)() wire(bank_mod.I0, pre_mod_add.O) wire(bank_mod.I1, DefineCoreirConst(flat_idx_width, no)().O) wire(STBankGenerator.bank[i], bank_mod.O[0:STBankGenerator.bank_width]) if len(bank_mod.O) > STBankGenerator.bank_width: bits_to_term = len(bank_mod.O) - STBankGenerator.bank_width term = TermAnyType(Array[bits_to_term, Bit]) wire(bank_mod.O[STBankGenerator.bank_width:], term.I) # compute flat_idx / sseq_dim for each lane addr for i in range(no): flat_idx_sseq_dim_div = DefineUDiv(flat_idx_width)() wire(flat_idx_sseq_dim_div.I0, lane_flat_idxs[0]) wire(flat_idx_sseq_dim_div.I1, DefineCoreirConst(flat_idx_width, no)().O) wire(STBankGenerator.addr[i], flat_idx_sseq_dim_div.O[0:STBankGenerator.addr_width]) if len(flat_idx_sseq_dim_div.O) > STBankGenerator.addr_width: bits_to_term = len(bank_mod.O) - STBankGenerator.addr_width term = TermAnyType(Array[bits_to_term, Bit]) wire(flat_idx_sseq_dim_div.O[STBankGenerator.addr_width:], term.I)
'O0', Array[8, Out(Bit)], 'O1', Array[8, Out(Bit)], 'valid_data_in', In(Bit), 'ready_data_in', Out(Bit), 'valid_data_out', Out(Bit), 'ready_data_out', In(Bit), ] + ClockInterface(has_ce=True) partialParallelSimpleAdd = DefineCircuit('partialParallelSimpleAdd_Circuit', *args) magmaInstance0 = DefineNoop(DefineCoreirConst(8, 1))() magmaInstance1 = DefineNoop(DefineCoreirConst(8, 1))() magmaInstance2 = DefineCoreirConst(8, 1)() magmaInstance3 = DefineCoreirConst(8, 1)() magmaInstance5 = DefineAdd(8)() magmaInstance6 = DefineAdd(8)() wire(magmaInstance0.O, magmaInstance5.I0) wire(magmaInstance2.O, magmaInstance5.I1) wire(magmaInstance1.O, magmaInstance6.I0) wire(magmaInstance3.O, magmaInstance6.I1) wire(partialParallelSimpleAdd.I0, magmaInstance0.in_O) wire(partialParallelSimpleAdd.I1, magmaInstance1.in_O) wire(partialParallelSimpleAdd.O0, magmaInstance5.O) wire(partialParallelSimpleAdd.O1, magmaInstance6.O) wire(partialParallelSimpleAdd.ready_data_out, partialParallelSimpleAdd.ready_data_in)
import magma as m from magma.clock import * from magma.backend.coreir_ import CoreIRBackend from magma.bitutils import * from coreir.context import * from magma.simulator.coreir_simulator import CoreIRSimulator import coreir from magma.scope import Scope from mantle.coreir import DefineCoreirConst from mantle import CounterModM, Decode, SIPO from magma.frontend.coreir_ import GetCoreIRModule from mantle.coreir.arith import * from mantle.primitives import DeclareAdd c = coreir.Context() cirb = CoreIRBackend(c) scope = Scope() width = 16 addID = DefineCoreirConst(width, 0)() rpp = ReducePartiallyParallel(cirb, 8, 2, renameCircuitForReduce(DeclareAdd(width))) m.wire(addID.out, rpp.C) m.EndCircuit() module = GetCoreIRModule(cirb, rpp) module.save_to_file("reducehybrid.json")
outType2 = TOUT # Top level module: line buffer input, reduce output args = ['I', inType, 'O', outType2, 'WE', m.BitIn, 'V', m.Out(m.Bit)] + \ m.ClockInterface(False, False) dscale = m.DefineCircuit('Downscale', *args) # Line buffer declaration lb = Linebuffer(cirb, inType, outType, imgType, True) m.wire(lb.I, dscale.I) m.wire(lb.wen, dscale.WE) # Reduce declaration red = ReduceParallel(cirb, samples, renameCircuitForReduce(DeclareAdd(width))) # additive identity coreirConst = DefineCoreirConst(width, 0)() # select 16 samples to keep k = 0 for i in [0, 3, 5, 8]: for j in [0, 3, 7, 10]: m.wire(red.I.data[k], lb.out[i][j]) k += 1 m.wire(red.I.identity, coreirConst.O) m.wire(dscale.O, red.out) m.wire(dscale.V, lb.valid) m.EndCircuit() module = GetCoreIRModule(cirb, dscale)
In(Bit), 'ready_data_in', Out(Bit), 'valid_data_out', Out(Bit), 'ready_data_out', In(Bit), ] + ClockInterface(has_ce=True) downsample_256x256_to_32x32_64px_in_per_clk = DefineCircuit( 'downsample_256x256_to_32x32_64px_in_per_clk_Circuit', *args) magmaInstance0 = DefineTwoDimensionalLineBuffer(Array[8, In(Bit)], 64, 1, 2, 2, 256, 256, 2, 2, 0, 0)() magmaInstance1 = DefineNoop( DefineTwoDimensionalLineBuffer(Array[8, In(Bit)], 64, 1, 2, 2, 256, 256, 2, 2, 0, 0))() magmaInstance2 = DefineCoreirConst(8, 1)() magmaInstance3 = DefineCoreirConst(8, 1)() magmaInstance4 = DefineCoreirConst(8, 1)() magmaInstance5 = DefineCoreirConst(8, 1)() magmaInstance6 = DefineCoreirConst(8, 1)() magmaInstance7 = DefineCoreirConst(8, 1)() magmaInstance8 = DefineCoreirConst(8, 1)() magmaInstance9 = DefineCoreirConst(8, 1)() magmaInstance10 = DefineCoreirConst(8, 1)() magmaInstance11 = DefineCoreirConst(8, 1)() magmaInstance12 = DefineCoreirConst(8, 1)() magmaInstance13 = DefineCoreirConst(8, 1)() magmaInstance14 = DefineCoreirConst(8, 1)() magmaInstance15 = DefineCoreirConst(8, 1)() magmaInstance16 = DefineCoreirConst(8, 1)() magmaInstance17 = DefineCoreirConst(8, 1)()
In(Bit), 'ready_data_in', Out(Bit), 'valid_data_out', Out(Bit), 'ready_data_out', In(Bit), ] + ClockInterface(has_ce=True) downsample_256x256_to_32x32_8px_in_per_clk = DefineCircuit( 'downsample_256x256_to_32x32_8px_in_per_clk_Circuit', *args) magmaInstance0 = DefineTwoDimensionalLineBuffer(Array[8, In(Bit)], 8, 1, 2, 2, 256, 256, 2, 2, 0, 0)() magmaInstance1 = DefineNoop( DefineTwoDimensionalLineBuffer(Array[8, In(Bit)], 8, 1, 2, 2, 256, 256, 2, 2, 0, 0))() magmaInstance2 = DefineCoreirConst(8, 1)() magmaInstance3 = DefineCoreirConst(8, 1)() magmaInstance4 = DefineCoreirConst(8, 2)() magmaInstance5 = DefineCoreirConst(8, 2)() magmaInstance7 = DefineCoreirConst(8, 3)() magmaInstance8 = DefineCoreirConst(8, 3)() magmaInstance9 = DefineCoreirConst(8, 4)() magmaInstance10 = DefineCoreirConst(8, 4)() wire(magmaInstance0.O[0][0][0], magmaInstance1.in_O[0][0][0]) wire(magmaInstance0.O[0][0][1], magmaInstance1.in_O[0][0][1]) wire(magmaInstance0.O[0][1][0], magmaInstance1.in_O[0][1][0]) wire(magmaInstance0.O[0][1][1], magmaInstance1.in_O[0][1][1]) wire(magmaInstance0.O[1][0][0], magmaInstance1.in_O[1][0][0]) wire(magmaInstance0.O[1][0][1], magmaInstance1.in_O[1][0][1]) wire(magmaInstance0.O[1][1][0], magmaInstance1.in_O[1][1][0]) wire(magmaInstance0.O[1][1][1], magmaInstance1.in_O[1][1][1])
import coreir from magma.scope import Scope from mantle.coreir.arith import * from mantle.coreir.logic import * from mantle.coreir.compare import * from mantle.coreir import DefineCoreirConst from mantle.coreir.LUT import * from aetherling.modules.upsample import * from aetherling.modules.downsample import * from aetherling.modules.reduce import * from aetherling.modules.native_linebuffer.two_dimensional_native_linebuffer import DefineTwoDimensionalLineBuffer args = ['I0', Array[8, In(Bit)], 'I1', Array[8, In(Bit)], 'I2', Array[8, In(Bit)], 'I3', Array[8, In(Bit)], 'O0', Array[8, Out(Bit)], 'O1', Array[8, Out(Bit)], 'O2', Array[8, Out(Bit)], 'O3', Array[8, Out(Bit)], 'valid_data_in', In(Bit), 'ready_data_in', Out(Bit), 'valid_data_out', Out(Bit), 'ready_data_out', In(Bit), ] + ClockInterface(has_ce=True) convolution_32x32Im_2x2Win_4px_in_per_clk = DefineCircuit('convolution_32x32Im_2x2Win_4px_in_per_clk_Circuit', *args) magmaInstance0 = DefineTwoDimensionalLineBuffer(Array[8, In(Bit)], 4, 1, 2, 2, 32, 32, 1, 1, 0, 0)() magmaInstance1 = DefineCoreirConst(8, 1)() magmaInstance2 = DefineCoreirConst(8, 1)() magmaInstance3 = DefineCoreirConst(8, 1)() magmaInstance4 = DefineCoreirConst(8, 1)() magmaInstance5 = DefineCoreirConst(8, 2)() magmaInstance6 = DefineCoreirConst(8, 2)() magmaInstance7 = DefineCoreirConst(8, 2)() magmaInstance8 = DefineCoreirConst(8, 2)() magmaInstance12 = DefineCoreirConst(8, 2)() magmaInstance13 = DefineCoreirConst(8, 2)() magmaInstance14 = DefineCoreirConst(8, 2)() magmaInstance15 = DefineCoreirConst(8, 2)() magmaInstance16 = DefineCoreirConst(8, 1)() magmaInstance17 = DefineCoreirConst(8, 1)() magmaInstance18 = DefineCoreirConst(8, 1)() magmaInstance19 = DefineCoreirConst(8, 1)()
def definition(cls): # first section creates the RAMs and LUTs that set values in them and the sorting network shared_and_diff_subtypes = get_shared_and_diff_subtypes( t_in, t_out) t_in_diff = shared_and_diff_subtypes.diff_input t_out_diff = shared_and_diff_subtypes.diff_output graph = build_permutation_graph(ST_TSeq(2, 0, t_in_diff), ST_TSeq(2, 0, t_out_diff)) banks_write_addr_per_input_lane = get_banks_addr_per_lane( graph.input_nodes) input_lane_write_addr_per_bank = get_lane_addr_per_banks( graph.input_nodes) output_lane_read_addr_per_bank = get_lane_addr_per_banks( graph.output_nodes) # each ram only needs to be large enough to handle the number of addresses assigned to it # all rams receive the same number of writes # but some of those writes don't happen as the data is invalid, so don't need storage for them max_ram_addrs = [ max([bank_clock_data.addr for bank_clock_data in bank_data]) for bank_data in output_lane_read_addr_per_bank ] # rams also handle parallelism from outer_shared type as this affects all banks the same outer_shared_sseqs = remove_tseqs( shared_and_diff_subtypes.shared_outer) if outer_shared_sseqs == ST_Tombstone(): ram_element_type = shared_and_diff_subtypes.shared_inner else: ram_element_type = replace_tombstone( outer_shared_sseqs, shared_and_diff_subtypes.shared_inner) # can use wider rams rather than duplicate for outer_shared_sseqs because will # transpose dimenions of input wires below to wire up as if outer, shared dimensions # were on the inside rams = [ DefineRAM_ST(ram_element_type, ram_max_addr + 1)() for ram_max_addr in max_ram_addrs ] rams_addr_widths = [ram.WADDR.N for ram in rams] # for bank, the addresses to write to each clock write_addr_for_bank_luts = [] for bank_idx in range(len(rams)): ram_addr_width = rams_addr_widths[bank_idx] num_addrs = len(input_lane_write_addr_per_bank[bank_idx]) #assert num_addrs == t_in_diff.time() write_addrs = [ builtins.tuple( int2seq(write_data_per_bank_per_clock.addr, ram_addr_width)) for write_data_per_bank_per_clock in input_lane_write_addr_per_bank[bank_idx] ] write_addr_for_bank_luts.append( DefineLUTAnyType(Array[ram_addr_width, Bit], num_addrs, builtins.tuple(write_addrs))()) # for bank, whether to actually write this clock write_valid_for_bank_luts = [] for bank_idx in range(len(rams)): num_valids = len(input_lane_write_addr_per_bank[bank_idx]) #assert num_valids == t_in_diff.time() valids = [ builtins.tuple([write_data_per_bank_per_clock.valid]) for write_data_per_bank_per_clock in input_lane_write_addr_per_bank[bank_idx] ] write_valid_for_bank_luts.append( DefineLUTAnyType(Bit, num_valids, builtins.tuple(valids))()) # for each input lane, the bank to write to each clock write_bank_for_input_lane_luts = [] bank_idx_width = getRAMAddrWidth(len(rams)) for lane_idx in range(len(banks_write_addr_per_input_lane)): num_bank_idxs = len(banks_write_addr_per_input_lane[lane_idx]) #assert num_bank_idxs == t_in_diff.time() bank_idxs = [ builtins.tuple( int2seq(write_data_per_lane_per_clock.bank, bank_idx_width)) for write_data_per_lane_per_clock in banks_write_addr_per_input_lane[lane_idx] ] write_bank_for_input_lane_luts.append( DefineLUTAnyType(Array[bank_idx_width, Bit], num_bank_idxs, builtins.tuple(bank_idxs))()) # for each bank, the address to read from each clock read_addr_for_bank_luts = [] for bank_idx in range(len(rams)): ram_addr_width = rams_addr_widths[bank_idx] num_addrs = len(output_lane_read_addr_per_bank[bank_idx]) #assert num_addrs == t_in_diff.time() read_addrs = [ builtins.tuple( int2seq(read_data_per_bank_per_clock.addr, ram_addr_width)) for read_data_per_bank_per_clock in output_lane_read_addr_per_bank[bank_idx] ] read_addr_for_bank_luts.append( DefineLUTAnyType(Array[ram_addr_width, Bit], num_addrs, builtins.tuple(read_addrs))()) # for each bank, the lane to send each read to output_lane_for_bank_luts = [] # number of lanes equals number of banks # some the lanes are just always invalid, added so input lane width equals output lane width lane_idx_width = getRAMAddrWidth(len(rams)) for bank_idx in range(len(rams)): num_lane_idxs = len(output_lane_read_addr_per_bank[bank_idx]) #assert num_lane_idxs == t_in_diff.time() lane_idxs = [ builtins.tuple( int2seq(read_data_per_bank_per_clock.s, lane_idx_width)) for read_data_per_bank_per_clock in output_lane_read_addr_per_bank[bank_idx] ] output_lane_for_bank_luts.append( DefineLUTAnyType(Array[lane_idx_width, Bit], num_lane_idxs, builtins.tuple(lane_idxs))()) # second part creates the counters that index into the LUTs # elem_per counts time per element of the reshape elem_per_reshape_counter = AESizedCounterModM( ram_element_type.time(), has_ce=True) end_cur_elem = Decode(ram_element_type.time() - 1, elem_per_reshape_counter.O.N)( elem_per_reshape_counter.O) # reshape counts which element in the reshape num_clocks = len(output_lane_read_addr_per_bank[0]) reshape_write_counter = AESizedCounterModM(num_clocks, has_ce=True, has_reset=has_reset) reshape_read_counter = AESizedCounterModM(num_clocks, has_ce=True, has_reset=has_reset) output_delay = ( get_output_latencies(graph)[0]) * ram_element_type.time() # this is present so testing knows the delay cls.output_delay = output_delay reshape_read_delay_counter = DefineInitialDelayCounter( output_delay, has_ce=True, has_reset=has_reset)() # outer counter the repeats the reshape #wire(reshape_write_counter.O, cls.reshape_write_counter) enabled = DefineCoreirConst(1, 1)().O[0] if has_valid: enabled = cls.valid_up & enabled wire(reshape_read_delay_counter.valid, cls.valid_down) if has_ce: enabled = bit(cls.CE) & enabled wire(enabled, elem_per_reshape_counter.CE) wire(enabled, reshape_read_delay_counter.CE) wire(enabled & end_cur_elem, reshape_write_counter.CE) wire(enabled & end_cur_elem & reshape_read_delay_counter.valid, reshape_read_counter.CE) if has_reset: wire(cls.RESET, elem_per_reshape_counter.RESET) wire(cls.RESET, reshape_read_delay_counter.RESET) wire(cls.RESET, reshape_write_counter.RESET) wire(cls.RESET, reshape_read_counter.RESET) # wire read and write counters to all LUTs for lut in write_bank_for_input_lane_luts: wire(reshape_write_counter.O, lut.addr) for lut in write_addr_for_bank_luts: wire(reshape_write_counter.O, lut.addr) for lut in write_valid_for_bank_luts: wire(reshape_write_counter.O, lut.addr) for lut in read_addr_for_bank_luts: wire(reshape_read_counter.O, lut.addr) for lut in output_lane_for_bank_luts: wire(reshape_read_counter.O, lut.addr) # third and final instance creation part creates the sorting networks that map lanes to banks input_sorting_network_t = Tuple( bank=Array[write_bank_for_input_lane_luts[0].data.N, Bit], val=ram_element_type.magma_repr()) input_sorting_network = DefineBitonicSort(input_sorting_network_t, len(rams), lambda x: x.bank)() output_sorting_network_t = Tuple( lane=Array[output_lane_for_bank_luts[0].data.N, Bit], val=ram_element_type.magma_repr()) output_sorting_network = DefineBitonicSort( output_sorting_network_t, len(rams), lambda x: x.lane)() # wire luts, sorting networks, inputs, and rams # flatten all the sseq_layers to get flat magma type of inputs and outputs # tseqs don't affect magma types num_sseq_layers_inputs = num_nested_layers( remove_tseqs(shared_and_diff_subtypes.diff_input)) num_sseq_layers_to_remove_inputs = max(0, num_sseq_layers_inputs - 1) num_sseq_layers_outputs = num_nested_layers( remove_tseqs(shared_and_diff_subtypes.diff_output)) num_sseq_layers_to_remove_outputs = max( 0, num_sseq_layers_outputs - 1) if remove_tseqs( shared_and_diff_subtypes.shared_outer) != ST_Tombstone(): #num_sseq_layers_inputs += num_nested_layers(remove_tseqs(shared_and_diff_subtypes.shared_outer)) #num_sseq_layers_outputs += num_nested_layers(remove_tseqs(shared_and_diff_subtypes.shared_outer)) input_ports = flatten_ports( transpose_outer_dimensions( shared_and_diff_subtypes.shared_outer, shared_and_diff_subtypes.diff_input, cls.I), num_sseq_layers_to_remove_inputs) output_ports = flatten_ports( transpose_outer_dimensions( shared_and_diff_subtypes.shared_outer, shared_and_diff_subtypes.diff_output, cls.O), num_sseq_layers_to_remove_outputs) else: input_ports = flatten_ports(cls.I, num_sseq_layers_to_remove_inputs) output_ports = flatten_ports( cls.O, num_sseq_layers_to_remove_outputs) # this is only used if the shared outer layers contains any sseqs sseq_layers_to_flatten = max( num_nested_layers( remove_tseqs(shared_and_diff_subtypes.shared_outer)) - 1, 0) for idx in range(len(rams)): # wire input and bank to input sorting network wire(write_bank_for_input_lane_luts[idx].data, input_sorting_network.I[idx].bank) #if idx == 0: # wire(cls.first_valid, write_valid_for_bank_luts[idx].data) if idx < t_in_diff.port_width(): # since the input_ports are lists, need to wire them individually to the sorting ports if remove_tseqs(shared_and_diff_subtypes.shared_outer ) != ST_Tombstone(): cur_input_port = flatten_ports(input_ports[idx], sseq_layers_to_flatten) cur_sort_port = flatten_ports( input_sorting_network.I[idx].val, sseq_layers_to_flatten) for i in range(len(cur_input_port)): wire(cur_input_port[i], cur_sort_port[i]) else: if num_sseq_layers_inputs == 0: # input_ports will be an array of bits for 1 element # if no sseq in t_in wire(input_ports, input_sorting_network.I[idx].val) else: wire(input_ports[idx], input_sorting_network.I[idx].val) #wire(cls.ram_wr, input_sorting_network.O[idx].val) #wire(cls.ram_rd, rams[idx].RDATA) else: zero_const = DefineCoreirConst( ram_element_type.magma_repr().size(), 0)().O cur_sn_input = input_sorting_network.I[idx].val while len(cur_sn_input) != len(zero_const): cur_sn_input = cur_sn_input[0] wire(zero_const, cur_sn_input) # wire input sorting network, write addr, and write valid luts to banks wire(input_sorting_network.O[idx].val, rams[idx].WDATA) wire(write_addr_for_bank_luts[idx].data, rams[idx].WADDR) #wire(write_addr_for_bank_luts[idx].data[0], cls.addr_wr[idx]) if has_ce: wire(write_valid_for_bank_luts[idx].data & bit(cls.CE), rams[idx].WE) else: wire(write_valid_for_bank_luts[idx].data, rams[idx].WE) # wire output sorting network, read addr, read bank, and read enable wire(rams[idx].RDATA, output_sorting_network.I[idx].val) wire(output_lane_for_bank_luts[idx].data, output_sorting_network.I[idx].lane) wire(read_addr_for_bank_luts[idx].data, rams[idx].RADDR) #wire(read_addr_for_bank_luts[idx].data[0], cls.addr_rd[idx]) # ok to read invalid things, so in read value LUT if has_ce: wire(bit(cls.CE), rams[idx].RE) else: wire(DefineCoreirConst(1, 1)().O[0], rams[idx].RE) if has_reset: wire(cls.RESET, rams[idx].RESET) # wire output sorting network value to output or term if idx < t_out_diff.port_width(): # since the output_ports are lists, need to wire them individually to the sorting ports if remove_tseqs(shared_and_diff_subtypes.shared_outer ) != ST_Tombstone(): cur_output_port = flatten_ports( output_ports[idx], sseq_layers_to_flatten) cur_sort_port = flatten_ports( output_sorting_network.O[idx].val, sseq_layers_to_flatten) for i in range(len(cur_output_port)): wire(cur_output_port[i], cur_sort_port[i]) else: if num_sseq_layers_outputs == 0: # output_ports will be an array of bits for 1 element # if no sseq in t_out wire(output_sorting_network.O[idx].val, output_ports) else: wire(output_sorting_network.O[idx].val, output_ports[idx]) else: wire(output_sorting_network.O[idx].val, TermAnyType(type(output_sorting_network.O[idx].val))) # wire sorting networks bank/lane to term as not used on outputs, just used for sorting wire(input_sorting_network.O[idx].bank, TermAnyType(type(input_sorting_network.O[idx].bank))) wire(output_sorting_network.O[idx].lane, TermAnyType(type(output_sorting_network.O[idx].lane)))
outType2 = TOUT # Top level module: line buffer input, reduce output args = ['I', inType, 'O', outType2, 'WE', BitIn, 'V', Out(Bit)] + ClockInterface(False, False) top = DefineCircuit('Downscale', *args) # Line buffer declaration lb = Linebuffer(cirb, inType, outType, imgType, True) wire(lb.I, top.I) wire(lb.wen, top.WE) # Reduce declaration red = ReduceParallel(cirb, m * n, renameCircuitForReduce(DeclareAdd(b))) # additive identity coreirConst = DefineCoreirConst(b, 0)() # flatten linebuffer output and wire to reduce parallel input for i in range(n): for j in range(m): k = m * i + j wire(red.I.data[k], lb.out[i][j]) wire(red.I.identity, coreirConst.out) wire(top.O, red.out) wire(top.V, lb.valid) EndCircuit() module = GetCoreIRModule(cirb, top) module.save_to_file("downscale.json")
'O0', Array[8, Out(Bit)], 'valid_data_in', In(Bit), 'ready_data_in', Out(Bit), 'valid_data_out', Out(Bit), 'ready_data_out', In(Bit), ] + ClockInterface(has_ce=True) convolution_32x32Im_2x2Win_1px_in_per_clk = DefineCircuit( 'convolution_32x32Im_2x2Win_1px_in_per_clk_Circuit', *args) magmaInstance0 = DefineTwoDimensionalLineBuffer(Array[8, In(Bit)], 1, 1, 2, 2, 32, 32, 1, 1, 0, 0)() magmaInstance1 = DefineCoreirConst(8, 1)() magmaInstance2 = DefineCoreirConst(8, 2)() magmaInstance3 = DefineCoreirConst(8, 2)() magmaInstance4 = DefineCoreirConst(8, 1)() magmaInstance5 = DefineCoreirMul(8)() magmaInstance6 = DefineCoreirMul(8)() magmaInstance7 = DefineCoreirMul(8)() magmaInstance8 = DefineCoreirMul(8)() wire(magmaInstance0.O[0][0][0], magmaInstance5.I0) wire(magmaInstance1.O, magmaInstance5.I1) wire(magmaInstance0.O[0][0][1], magmaInstance6.I0) wire(magmaInstance2.O, magmaInstance6.I1) wire(magmaInstance0.O[0][1][0], magmaInstance7.I0) wire(magmaInstance3.O, magmaInstance7.I1) wire(magmaInstance0.O[0][1][1], magmaInstance8.I0) wire(magmaInstance4.O, magmaInstance8.I1)
'O7', Array[8, Out(Bit)], 'valid_data_in', In(Bit), 'ready_data_in', Out(Bit), 'valid_data_out', Out(Bit), 'ready_data_out', In(Bit), ] + ClockInterface(has_ce=True) convolution_32x32Im_2x2Win_8px_in_per_clk = DefineCircuit( 'convolution_32x32Im_2x2Win_8px_in_per_clk_Circuit', *args) magmaInstance0 = DefineTwoDimensionalLineBuffer(Array[8, In(Bit)], 8, 1, 2, 2, 32, 32, 1, 1, 0, 0)() magmaInstance1 = DefineCoreirConst(8, 1)() magmaInstance2 = DefineCoreirConst(8, 1)() magmaInstance3 = DefineCoreirConst(8, 1)() magmaInstance4 = DefineCoreirConst(8, 1)() magmaInstance5 = DefineCoreirConst(8, 1)() magmaInstance6 = DefineCoreirConst(8, 1)() magmaInstance7 = DefineCoreirConst(8, 1)() magmaInstance8 = DefineCoreirConst(8, 1)() magmaInstance9 = DefineCoreirConst(8, 2)() magmaInstance10 = DefineCoreirConst(8, 2)() magmaInstance11 = DefineCoreirConst(8, 2)() magmaInstance12 = DefineCoreirConst(8, 2)() magmaInstance13 = DefineCoreirConst(8, 2)() magmaInstance14 = DefineCoreirConst(8, 2)() magmaInstance15 = DefineCoreirConst(8, 2)() magmaInstance16 = DefineCoreirConst(8, 2)()
'O15', Array[8, Out(Bit)], 'valid_data_in', In(Bit), 'ready_data_in', Out(Bit), 'valid_data_out', Out(Bit), 'ready_data_out', In(Bit), ] + ClockInterface(has_ce=True) partialParallel16Convolution = DefineCircuit( 'partialParallel16Convolution_Circuit', *args) magmaInstance0 = DefineTwoDimensionalLineBuffer(Array[8, In(Bit)], 8, 2, 2, 2, 8, 8, 1, 1, 0, 0)() magmaInstance1 = DefineCoreirConst(8, 1)() magmaInstance2 = DefineCoreirConst(8, 1)() magmaInstance3 = DefineCoreirConst(8, 1)() magmaInstance4 = DefineCoreirConst(8, 1)() magmaInstance5 = DefineCoreirConst(8, 1)() magmaInstance6 = DefineCoreirConst(8, 1)() magmaInstance7 = DefineCoreirConst(8, 1)() magmaInstance8 = DefineCoreirConst(8, 1)() magmaInstance9 = DefineCoreirConst(8, 1)() magmaInstance10 = DefineCoreirConst(8, 1)() magmaInstance11 = DefineCoreirConst(8, 1)() magmaInstance12 = DefineCoreirConst(8, 1)() magmaInstance13 = DefineCoreirConst(8, 1)() magmaInstance14 = DefineCoreirConst(8, 1)() magmaInstance15 = DefineCoreirConst(8, 1)() magmaInstance16 = DefineCoreirConst(8, 1)()
def definition(cls): if type(t) == ST_TSeq: outer_counter = AESizedCounterModM(t.n + t.i, has_ce=True, has_reset=has_reset) inner_counters = DefineNestedCounters( t.t, has_last=True, has_cur_valid=False, has_ce=has_ce, has_reset=has_reset, valid_when_ce_off=valid_when_ce_off)() if has_last: is_last = Decode(t.n + t.i - 1, outer_counter.O.N)(outer_counter.O) if has_cur_valid: cur_valid_counter = AESizedCounterModM(t.valid_clocks(), has_ce=True, has_reset=has_reset) wire(cur_valid_counter.O, cls.cur_valid) # if t.n is a power of 2 and always valid, then outer_counter.O.N not enough bits # for valid_length to contain t.n and for is_valid to get the right input # always valid in this case, so just emit 1 if math.pow(2, outer_counter.O.N) - 1 < t.n: is_valid = DefineCoreirConst(1, 1)().O[0] if not has_last: # never using the outer_counter is not has_last last_term = TermAnyType(type(outer_counter.O)) wire(outer_counter.O, last_term.I) else: valid_length = DefineCoreirConst(outer_counter.O.N, t.n)() is_valid_cmp = DefineCoreirUlt(outer_counter.O.N)() wire(is_valid_cmp.I0, outer_counter.O) wire(is_valid_cmp.I1, valid_length.O) is_valid = is_valid_cmp.O wire(inner_counters.valid & is_valid, cls.valid) if has_last: wire(is_last & inner_counters.last, cls.last) if has_reset: wire(cls.RESET, outer_counter.RESET) wire(cls.RESET, inner_counters.RESET) if has_cur_valid: wire(cls.RESET, cur_valid_counter.RESET) if has_ce: wire(bit(cls.CE) & inner_counters.last, outer_counter.CE) wire(cls.CE, inner_counters.CE) if has_cur_valid: wire( bit(cls.CE) & inner_counters.valid & is_valid, cur_valid_counter.CE) else: wire(inner_counters.last, outer_counter.CE) if has_cur_valid: wire(inner_counters.valid & is_valid, cur_valid_counter.CE) elif is_nested(t): inner_counters = DefineNestedCounters( t.t, has_last, has_cur_valid, has_ce, has_reset, valid_when_ce_off=valid_when_ce_off)() wire(inner_counters.valid, cls.valid) if has_last: wire(inner_counters.last, cls.last) if has_reset: wire(cls.RESET, inner_counters.RESET) if has_ce: wire(cls.CE, inner_counters.CE) if has_cur_valid: wire(inner_counters.cur_valid, cls.cur_valid) else: # only 1 element, so always last and valid element valid_and_last = DefineCoreirConst(1, 1)() if has_last: wire(valid_and_last.O[0], cls.last) if has_cur_valid: cur_valid = DefineCoreirConst(1, 0)() wire(cur_valid.O, cls.cur_valid) if has_ce: if valid_when_ce_off: wire(cls.valid, valid_and_last.O[0]) ce_term = TermAnyType(Bit) wire(cls.CE, ce_term.I) else: wire(cls.valid, cls.CE) else: wire(valid_and_last.O[0], cls.valid) if has_reset: reset_term = TermAnyType(Bit) wire(reset_term.I, cls.RESET)