コード例 #1
0
ファイル: test_reduce.py プロジェクト: adamdai/aetherling
def test_reduce_parallel():
    width = 11
    numIn = 13
    c = coreir.Context()
    cirb = CoreIRBackend(c)
    scope = Scope()
    inType = In(Array(numIn, Array(width, BitIn)))
    outType = Out(Array(width, Bit))
    args = ['I', inType, 'O', outType] + ClockInterface(False, False)

    testcircuit = DefineCircuit('Test_Reduce_Parallel', *args)

    reducePar = ReduceParallel(cirb, numIn,
                               renameCircuitForReduce(DefineAdd(width)))
    coreirConst = DefineCoreirConst(width, 0)()
    wire(reducePar.I.data, testcircuit.I)
    wire(reducePar.I.identity, coreirConst.out)
    wire(testcircuit.O, reducePar.out)

    EndCircuit()

    sim = CoreIRSimulator(testcircuit,
                          testcircuit.CLK,
                          context=cirb.context,
                          namespaces=[
                              "aetherlinglib", "commonlib", "mantle", "coreir",
                              "global"
                          ])

    for i in range(numIn):
        sim.set_value(testcircuit.I[i], int2seq(i, width), scope)
    sim.evaluate()
    assert seq2int(sim.get_value(testcircuit.O, scope)) == sum(range(numIn))
コード例 #2
0
ファイル: shift.py プロジェクト: David-Durst/aetherling
        def definition(cls):
            enabled = DefineCoreirConst(1, 1)().O[0]
            if has_valid:
                enabled = cls.valid_up & enabled
                wire(cls.valid_up, cls.valid_down)
            if has_ce:
                enabled = bit(cls.CE) & enabled

            # don't need valid on these shift_t as they'll be getting it from the enable signal
            shift_t_xs = []
            for i in range(ni):
                shift_amount_t = (ni - i + shift_amount - 1) // ni
                if shift_amount_t == 0:
                    shift_t_xs.append(None)
                else:
                    shift_t_xs.append(
                        DefineShift_T(no, io, shift_amount_t, elem_t, True,
                                      has_reset, False)())

            for i in range(ni):
                if shift_t_xs[i] is None:
                    wire(cls.I[(i - shift_amount) % ni], cls.O[i])
                else:
                    wire(cls.I[(i - shift_amount) % ni], shift_t_xs[i].I)
                    wire(shift_t_xs[i].O, cls.O[i])
                    wire(enabled, shift_t_xs[i].CE)
                    if has_reset:
                        wire(cls.RESET, shift_t_xs[i].RESET)
コード例 #3
0
def test_const():
    Const8 = DefineCoreirConst(width=4, value=8)
    assert repr(Const8) == """\
coreir_const48 = DeclareCircuit("coreir_const48", "O", Out(Bits(4)))"""
    compile("build/test_const", wrap(Const8), output="coreir")
    assert check_files_equal(__file__, "build/test_const.json",
                             "gold/test_const.json")
コード例 #4
0
ファイル: shift.py プロジェクト: David-Durst/aetherling
        def definition(cls):
            enabled = DefineCoreirConst(1, 1)().O[0]
            if has_valid:
                enabled = cls.valid_up & enabled
                wire(cls.valid_up, cls.valid_down)
            if has_ce:
                enabled = bit(cls.CE) & enabled

            value_store = DefineRAM_ST(elem_t,
                                       shift_amount,
                                       has_reset=has_reset)()

            # write and read from same location
            # will write on first iteration through element, write and read on later iterations
            # output for first iteration is undefined, so ok to read anything
            next_ram_addr = DefineNestedCounters(elem_t,
                                                 has_ce=True,
                                                 has_reset=has_reset)()
            # its fine that this doesn't account for the invalid clocks of outer TSeq
            # after the invalid clocks, the next iteration will start from
            # an index that is possibly not 0. That doesn't matter
            # as will just loop around
            ram_addr = AESizedCounterModM(shift_amount,
                                          has_ce=True,
                                          has_reset=has_reset)
            # this handles invalid clocks of inner TSeq
            inner_valid_t = ST_Int()
            for i in range(len(nis))[::-1]:
                inner_valid_t = ST_TSeq(nis[i], iis[i], inner_valid_t)
            inner_valid = DefineNestedCounters(inner_valid_t,
                                               has_last=False,
                                               has_ce=True,
                                               has_reset=has_reset,
                                               valid_when_ce_off=True)()

            wire(ram_addr.O, value_store.WADDR)
            wire(ram_addr.O, value_store.RADDR)

            wire(enabled & inner_valid.valid, value_store.WE)
            wire(enabled & next_ram_addr.last, inner_valid.CE)
            #wire(inner_valid.valid, cls.inner_valid)
            wire(enabled & inner_valid.valid, value_store.RE)
            wire(enabled & next_ram_addr.last & inner_valid.valid, ram_addr.CE)
            wire(enabled, next_ram_addr.CE)

            next_ram_addr_term = TermAnyType(Bit)
            wire(next_ram_addr.valid, next_ram_addr_term.I)

            wire(cls.I, value_store.WDATA)
            wire(value_store.RDATA, cls.O)
            if has_reset:
                wire(value_store.RESET, cls.RESET)
                wire(ram_addr.RESET, cls.RESET)
                wire(next_ram_addr.RESET, cls.RESET)
                wire(inner_valid.RESET, cls.RESET)
コード例 #5
0
ファイル: arith_atom.py プロジェクト: David-Durst/aetherling
 def definition(cls):
     dir_path = os.path.dirname(os.path.realpath(__file__))
     op = m.DefineFromVerilogFile(os.path.join(dir_path, "pipelined",
                                               "mul.v"),
                                  type_map={"CLK": m.In(m.Clock)})[0]()
     zero_const = DefineCoreirConst(1, 0)()
     one_const = DefineCoreirConst(1, 1)()
     wire(zero_const.O[0], op.rst)
     wire(one_const.O[0], op.ce)
     wire(cls.I[0], op.a)
     wire(cls.I[1], op.b)
     wire(op.p[0:8], cls.O)
     term = DefineTerm(8)()
     wire(op.p[8:16], term.I)
     if has_valid:
         reg0 = DefineRegister(1)()
         reg1 = DefineRegister(1)()
         wire(cls.valid_up, reg0.I[0])
         wire(reg0.O, reg1.I)
         wire(reg1.O[0], cls.valid_down)
コード例 #6
0
ファイル: upsample.py プロジェクト: David-Durst/aetherling
        def definition(cls):
            enabled = DefineCoreirConst(1, 1)().O[0]
            if has_valid:
                enabled = cls.valid_up & enabled
                wire(cls.valid_up, cls.valid_down)
            if has_ce:
                enabled = bit(cls.CE) & enabled

            value_store = DefineRAM_ST(elem_t, 1, has_reset=has_reset)()

            # write to value_store for first element, read for next
            element_time_counter = DefineNestedCounters(elem_t, has_ce=True, has_reset=has_reset)()
            element_idx_counter = AESizedCounterModM(n + i, has_ce=True, has_reset=has_reset)
            is_first_element = Decode(0, element_idx_counter.O.N)(element_idx_counter.O)

            zero_addr = DefineCoreirConst(1, 0)().O
            wire(zero_addr, value_store.WADDR)
            wire(zero_addr, value_store.RADDR)

            wire(enabled & is_first_element, value_store.WE)
            wire(enabled, value_store.RE)
            wire(enabled, element_time_counter.CE)
            wire(enabled & element_time_counter.last, element_idx_counter.CE)

            element_time_counter_term = TermAnyType(Bit)
            wire(element_time_counter.valid, element_time_counter_term.I)

            wire(cls.I, value_store.WDATA)

            output_selector = DefineMuxAnyType(elem_t.magma_repr(), 2)()

            # on first element, send the input directly out. otherwise, use the register
            wire(is_first_element, output_selector.sel[0])
            wire(value_store.RDATA, output_selector.data[0])
            wire(cls.I, output_selector.data[1])
            wire(output_selector.out, cls.O)

            if has_reset:
                wire(value_store.RESET, cls.RESET)
                wire(element_time_counter.RESET, cls.RESET)
                wire(element_idx_counter.RESET, cls.RESET)
コード例 #7
0
ファイル: test_const.py プロジェクト: akeley98/mantle
def test_const():
    Const8 = DefineCoreirConst(width=4, value=8)
    assert repr(Const8) == """\
coreir_const48 = DefineCircuit("coreir_const48", "out", Out(Bits(4)))
wire(0, coreir_const48.out[0])
wire(0, coreir_const48.out[1])
wire(0, coreir_const48.out[2])
wire(1, coreir_const48.out[3])
EndCircuit()"""
    compile("build/test_const", Const8, output="coreir")
    assert check_files_equal(__file__, "build/test_const.json",
                             "gold/test_const.json")
コード例 #8
0
def test_term():
    width = 11
    T = Array[width, BitIn]

    args = ['I', In(T), 'O', Out(T)]
    testcircuit = DefineCircuit('Test_Term', *args)
    wire(testcircuit.I, testcircuit.O)
    term = TermAnyType(T)
    t_const = DefineCoreirConst(width, 0)()
    wire(t_const.O, term.I)
    EndCircuit()

    tester = fault.Tester(testcircuit)
    tester.circuit.I = 2
    tester.eval()
    tester.circuit.O.expect(2)
    compile_and_run(tester)
コード例 #9
0
ファイル: shift.py プロジェクト: David-Durst/aetherling
        def definition(cls):
            enabled = DefineCoreirConst(1, 1)().O[0]
            if has_valid:
                enabled = cls.valid_up & enabled
                wire(cls.valid_up, cls.valid_down)
            if has_ce:
                enabled = bit(cls.CE) & enabled

            value_store = DefineRAM_ST(elem_t,
                                       shift_amount,
                                       has_reset=has_reset)()

            # write and read from same location
            # will write on first iteration through element, write and read on later iterations
            # output for first iteration is undefined, so ok to read anything
            next_ram_addr = DefineNestedCounters(elem_t,
                                                 has_ce=True,
                                                 has_reset=has_reset)()
            # its fine that this doesn't account for the invalid clocks.
            # after the invalid clocks, the next iteration will start from
            # an index that is possibly not 0. That doesn't matter
            # as will just loop around
            ram_addr = AESizedCounterModM(shift_amount,
                                          has_ce=True,
                                          has_reset=has_reset)

            wire(ram_addr.O, value_store.WADDR)
            wire(ram_addr.O, value_store.RADDR)

            wire(enabled, value_store.WE)
            wire(enabled, value_store.RE)
            wire(enabled & next_ram_addr.last, ram_addr.CE)
            wire(enabled, next_ram_addr.CE)

            next_ram_addr_term = TermAnyType(Bit)
            wire(next_ram_addr.valid, next_ram_addr_term.I)

            wire(cls.I, value_store.WDATA)
            wire(value_store.RDATA, cls.O)
            if has_reset:
                wire(value_store.RESET, cls.RESET)
                wire(ram_addr.RESET, cls.RESET)
                wire(next_ram_addr.RESET, cls.RESET)
コード例 #10
0
        def definition(BitonicSort):
            # generate the max value (all 1's) and feed it to all inputs to
            # power 2 bitonic sorting network not used by inputs
            t_size = T.size()
            n_raised_to_nearest_pow2 = pow(2, ceil(log2(n)))
            if n_raised_to_nearest_pow2 > n:
                max_const_flat = DefineCoreirConst(t_size,
                                                   pow(2, t_size) - 1)()
                max_const = Hydrate(T)
                wire(max_const_flat.O, max_const.I)

            pow2_sort = DefineBitonicSortPow2(T, n_raised_to_nearest_pow2,
                                              cmp_component)()
            for i in range(n_raised_to_nearest_pow2):
                if i < n:
                    wire(BitonicSort.I[i], pow2_sort.I[i])
                    wire(BitonicSort.O[i], pow2_sort.O[i])
                else:
                    wire(max_const.out, pow2_sort.I[i])
                    term = TermAnyType(T)
                    wire(term.I, pow2_sort.O[i])
コード例 #11
0
ファイル: test_noop.py プロジェクト: David-Durst/aetherling
def test_noop():
    testVal = 21
    scope = Scope()
    inType = Array[8, In(Bit)]
    outType = Array[8, Out(Bit)]
    args = ['I', inType, 'O', outType] + ClockInterface(False, False)

    testcircuit = DefineCircuit('Test', *args)

    noopInst = DefineNoop(DefineCoreirConst(8, 0))()
    wire(noopInst.in_O, testcircuit.I)
    wire(testcircuit.O, noopInst.O)

    EndCircuit()

    sim = CoreIRSimulator(testcircuit, testcircuit.CLK)

    sim.set_value(testcircuit.I, int2seq(testVal, 8), scope)
    sim.evaluate()
    sim.advance_cycle()
    sim.evaluate()
    assert seq2int(sim.get_value(testcircuit.O, scope)) == testVal
コード例 #12
0
ファイル: const.py プロジェクト: David-Durst/aetherling
        def definition(cls):
            one_const = DefineCoreirConst(1, 1)().O[0]
            if delay == 0:
                enabled = one_const
            else:
                delay_counter = InitialDelayCounter(delay)
                wire(delay_counter.CE, one_const)
                enabled = delay_counter.valid
            if has_ce:
                enabled = bit(cls.CE) & enabled

            luts = DefineLUTAnyType(t.magma_repr(), t.time(), ts_arrays_to_bits(ts_values))()
            lut_position_counter = AESizedCounterModM(t.time(), has_ce=True, has_reset=has_reset)

            wire(lut_position_counter.O, luts.addr)
            wire(cls.O, luts.data)
            wire(enabled, lut_position_counter.CE)

            if has_reset:
                wire(cls.RESET, lut_position_counter.RESET)
            if has_valid:
                valid_up_term = TermAnyType(Bit)
                wire(cls.valid_up, valid_up_term.I)
                wire(enabled, cls.valid_down)
コード例 #13
0
    In(Bit),
    'ready_data_in',
    Out(Bit),
    'valid_data_out',
    Out(Bit),
    'ready_data_out',
    In(Bit),
] + ClockInterface(has_ce=True)
downsample_256x256_to_32x32_16px_in_per_clk = DefineCircuit(
    'downsample_256x256_to_32x32_16px_in_per_clk_Circuit', *args)
magmaInstance0 = DefineTwoDimensionalLineBuffer(Array[8, In(Bit)], 16, 1, 2, 2,
                                                256, 256, 2, 2, 0, 0)()
magmaInstance1 = DefineNoop(
    DefineTwoDimensionalLineBuffer(Array[8, In(Bit)], 16, 1, 2, 2, 256, 256, 2,
                                   2, 0, 0))()
magmaInstance2 = DefineCoreirConst(8, 1)()
magmaInstance3 = DefineCoreirConst(8, 1)()
magmaInstance4 = DefineCoreirConst(8, 1)()
magmaInstance5 = DefineCoreirConst(8, 1)()
magmaInstance6 = DefineCoreirConst(8, 2)()
magmaInstance7 = DefineCoreirConst(8, 2)()
magmaInstance8 = DefineCoreirConst(8, 2)()
magmaInstance9 = DefineCoreirConst(8, 2)()
magmaInstance13 = DefineCoreirConst(8, 3)()
magmaInstance14 = DefineCoreirConst(8, 3)()
magmaInstance15 = DefineCoreirConst(8, 3)()
magmaInstance16 = DefineCoreirConst(8, 3)()
magmaInstance17 = DefineCoreirConst(8, 4)()
magmaInstance18 = DefineCoreirConst(8, 4)()
magmaInstance19 = DefineCoreirConst(8, 4)()
magmaInstance20 = DefineCoreirConst(8, 4)()
コード例 #14
0
        def definition(cls):
            per_clock_type = t.magma_repr()
            if delay == 1:
                reg = DefineRegisterAnyType(t.magma_repr(),
                                            has_ce=False,
                                            has_reset=has_reset)()

                wire(reg.I, cls.I)
                wire(reg.O, cls.O)
                if has_reset:
                    wire(reg.RESET, cls.RESET)

                if has_valid:
                    valid_reg = DefineRegisterAnyType(Bit,
                                                      has_ce=False,
                                                      has_reset=has_reset)()
                    wire(valid_reg.I, cls.valid_up)
                    wire(valid_reg.O, cls.valid_down)
                    if has_reset:
                        wire(valid_reg.RESET, cls.RESET)

            else:
                enabled = DefineCoreirConst(1, 1)().O[0]
                if has_valid:
                    enabled = cls.valid_up & enabled
                if has_ce:
                    enabled = bit(cls.CE) & enabled
                read_counter = AESizedCounterModM(delay - 1,
                                                  has_ce=True,
                                                  has_reset=has_reset)
                write_counter = AESizedCounterModM(delay - 1,
                                                   has_ce=True,
                                                   has_reset=has_reset)
                fifo_buffer = DefineRAMAnyType(per_clock_type, delay - 1)()
                reg = DefineRegisterAnyType(t.magma_repr(),
                                            has_ce=False,
                                            has_reset=has_reset)()

                # delay read for delay clocks
                internal_delay_counter = DefineInitialDelayCounter(delay - 1)()
                advance_read_counter = internal_delay_counter.valid
                wire(enabled, internal_delay_counter.CE)
                wire(advance_read_counter & enabled, read_counter.CE)
                wire(enabled, write_counter.CE)

                if has_reset:
                    wire(cls.RESET, read_counter.RESET)
                    wire(cls.RESET, write_counter.RESET)
                    wire(cls.RESET, internal_delay_counter.RESET)

                wire(fifo_buffer.WADDR, write_counter.O)
                wire(fifo_buffer.RADDR, read_counter.O)
                wire(fifo_buffer.WDATA, cls.I)
                wire(fifo_buffer.RDATA, reg.I)
                wire(reg.O, cls.O)
                wire(fifo_buffer.WE, enabled)

                if has_valid:
                    valid_reg = DefineRegister(1)()
                    wire(advance_read_counter, valid_reg.I[0])
                    wire(valid_reg.O[0], cls.valid_down)
コード例 #15
0
        def definition(TSBankGenerator):
            flat_idx_width = getRAMAddrWidth(no * ni)
            # next element each time_per_element clock
            if time_per_element > 1:
                index_in_cur_element = SizedCounterModM(time_per_element,
                                                        has_ce=has_ce,
                                                        has_reset=has_reset)
                next_element = Decode(time_per_element - 1,
                                      index_in_cur_element.O.N)(
                                          index_in_cur_element.O)
            else:
                next_element = DefineCoreirConst(1, 1)()
            # each element of the SSeq is a separate vector lane
            first_lane_flat_idx = SizedCounterModM((no + io) * ni,
                                                   incr=ni,
                                                   has_ce=True,
                                                   has_reset=has_reset)()
            time_counter = SizedCounterModM(no + io,
                                            has_ce=True,
                                            has_reset=has_reset)
            wire(next_element.O, first_lane_flat_idx.CE)
            wire(next_element.O, time_counter.CE)
            if has_ce:
                wire(TSBankGenerator.CE, index_in_cur_element.CE)
            if has_reset:
                wire(TSBankGenerator.RESET, index_in_cur_element.RESET)
                wire(TSBankGenerator.RESET, first_lane_flat_idx.RESET)
                wire(TSBankGenerator.RESET, time_counter.RESET)

            lane_flat_idxs = [first_lane_flat_idx.O]

            # compute the current flat_idx for each lane
            for i in range(1, ni):
                cur_lane_flat_idx_adder = DefineAdd(flat_idx_width)()
                wire(cur_lane_flat_idx_adder.I0, first_lane_flat_idx.O)
                wire(cur_lane_flat_idx_adder.I1,
                     DefineCoreirConst(flat_idx_width, i * no)().O)

                lane_flat_idxs += [cur_lane_flat_idx_adder.O]

            lane_flat_div_lcms = []
            # conmpute flat_idx / lcm_dim for each lane
            for i in range(ni):
                cur_lane_lcm_div = DefineUDiv(flat_idx_width)()
                wire(cur_lane_lcm_div.I0, lane_flat_idxs[0].O)
                wire(cur_lane_lcm_div.I1,
                     DefineCoreirConst(lcm(no, ni), flat_idx_width)().O)

                lane_flat_div_lcms += [cur_lane_flat_idx_adder.O]

            # compute ((flat_idx % sseq_dim) + (flat_idx / lcm_dim)) % sseq_dim for each lane
            # note that s_ts == flat_idx % sseq_dim
            # only need to mod sseq_dim at end as that is same as also doing it flat_idx before addition
            for i in range(ni):
                pre_mod_add = DefineAdd(flat_idx_width)()
                wire(pre_mod_add.I0, lane_flat_idxs[i])
                wire(pre_mod_add.I1, lane_flat_div_lcms[i])

                bank_mod = DefineUMod(flat_idx_width)()
                wire(bank_mod.I0, pre_mod_add.O)
                wire(bank_mod.I0, DefineCoreirConst(flat_idx_width, ni)().O)

                wire(TSBankGenerator.bank[i],
                     bank_mod.O[0:TSBankGenerator.bank_width])

            # compute t for each lane addr
            for i in range(0, ni):
                wire(TSBankGenerator.addr[i],
                     time_counter.O[0:TSBankGenerator.addr_width])
コード例 #16
0
ファイル: test_linebuffer.py プロジェクト: essox514/magmacam
outType2 = m.Out(m.Array(16, Bit))

# Test circuit has line buffer's input and reduce's output
args = ['I', inType, 'O', outType2, 'WE', BitIn, 'V', m.Out(m.Bit),
        'L00', TOUT, 'L01', TOUT, 'L10', TOUT, 'L11', TOUT] + \
        m.ClockInterface(False, False)
testcircuit = m.DefineCircuit('STEN', *args)

# Line buffer declaration
lb = Linebuffer(cirb, inType, outType, imgType, True)
m.wire(lb.I, testcircuit.I)
m.wire(lb.wen, testcircuit.WE)

# # Reduce declaration
reducePar = ReduceParallel(cirb, 4, renameCircuitForReduce(DeclareAdd(16)))
coreirConst = DefineCoreirConst(16, 0)()
m.wire(reducePar.I.data[0], lb.out[0][0])
m.wire(reducePar.I.data[1], lb.out[0][1])
m.wire(reducePar.I.data[2], lb.out[1][0])
m.wire(reducePar.I.data[3], lb.out[1][1])
m.wire(reducePar.I.identity, coreirConst.O)
m.wire(testcircuit.O, reducePar.out)
m.wire(testcircuit.V, lb.valid)

m.wire(lb.out[0][0], testcircuit.L00)
m.wire(lb.out[0][1], testcircuit.L01)
m.wire(lb.out[1][0], testcircuit.L10)
m.wire(lb.out[1][1], testcircuit.L11)


m.EndCircuit()
コード例 #17
0
from mantle.coreir.compare import *
from mantle.coreir import DefineCoreirConst
from mantle.coreir.LUT import *
from aetherling.modules.upsample import *
from aetherling.modules.downsample import *
from aetherling.modules.reduce import *
from aetherling.modules.native_linebuffer.two_dimensional_native_linebuffer import DefineTwoDimensionalLineBuffer

c = coreir.Context()
cirb = CoreIRBackend(c)

args = ['I0', Array[8, In(Bit)], 'I1', Array[8, In(Bit)], 'O0', Array[8, Out(Bit)], 'valid_data_in', In(Bit), 'ready_data_in', Out(Bit), 'valid_data_out', Out(Bit), 'ready_data_out', In(Bit), ] + ClockInterface(has_ce=True)
downsampleStencilChain1Per32 = DefineCircuit('downsampleStencilChain1Per32_Circuit', *args)
magmaInstance0 = DefineTwoDimensionalLineBuffer(Array[8, In(Bit)], 2, 1, 2, 2, 16, 16, 2, 2, 0, 0)()
magmaInstance1 = DefineNoop(DefineTwoDimensionalLineBuffer(Array[8, In(Bit)], 2, 1, 2, 2, 16, 16, 2, 2, 0, 0))()
magmaInstance2 = DefineCoreirConst(8, 1)()
magmaInstance3 = DefineCoreirConst(8, 2)()
magmaInstance4 = DefineCoreirConst(8, 3)()
magmaInstance5 = DefineCoreirConst(8, 4)()
wire(magmaInstance0.O[0][0][0], magmaInstance1.in_O[0][0][0])
wire(magmaInstance0.O[0][0][1], magmaInstance1.in_O[0][0][1])
wire(magmaInstance0.O[0][1][0], magmaInstance1.in_O[0][1][0])
wire(magmaInstance0.O[0][1][1], magmaInstance1.in_O[0][1][1])
magmaInstance6 = DefineCoreirMul(8)()
magmaInstance7 = DefineCoreirMul(8)()
magmaInstance8 = DefineCoreirMul(8)()
magmaInstance9 = DefineCoreirMul(8)()
wire(magmaInstance1.O[0][0][0], magmaInstance6.I0)
wire(magmaInstance2.O, magmaInstance6.I1)
wire(magmaInstance1.O[0][0][1], magmaInstance7.I0)
wire(magmaInstance3.O, magmaInstance7.I1)
コード例 #18
0
        def definition(STBankGenerator):
            flat_idx_width = getRAMAddrWidth(no * ni)
            # next element each time_per_element clock
            if time_per_element > 1:
                index_in_cur_element = SizedCounterModM(time_per_element,
                                                        has_ce=has_ce,
                                                        has_reset=has_reset)
                next_element = Decode(time_per_element - 1,
                                      index_in_cur_element.O.N)(
                                          index_in_cur_element.O)
            else:
                next_element = DefineCoreirConst(1, 1)()
            # each element of the SSeq is a separate vector lane
            first_lane_flat_idx = DefineCounterModM(ni + ii,
                                                    flat_idx_width,
                                                    cout=False,
                                                    has_ce=True,
                                                    has_reset=has_reset)()
            wire(next_element.O[0], first_lane_flat_idx.CE)
            if has_ce:
                wire(STBankGenerator.CE, index_in_cur_element.CE)
            if has_reset:
                wire(STBankGenerator.RESET, index_in_cur_element.RESET)
                wire(STBankGenerator.RESET, first_lane_flat_idx.RESET)

            lane_flat_idxs = [first_lane_flat_idx.O]

            # compute the current flat_idx for each lane
            for i in range(1, no):
                cur_lane_flat_idx_adder = DefineAdd(flat_idx_width)()
                wire(cur_lane_flat_idx_adder.I0, first_lane_flat_idx.O)
                wire(cur_lane_flat_idx_adder.I1,
                     DefineCoreirConst(flat_idx_width, i * ni)().O)

                lane_flat_idxs += [cur_lane_flat_idx_adder.O]

            lane_flat_div_lcms = []
            lcm_dim = DefineCoreirConst(flat_idx_width, lcm(no, ni))()
            # conmpute flat_idx / lcm_dim for each lane
            for i in range(no):
                cur_lane_lcm_div = DefineUDiv(flat_idx_width)()
                wire(cur_lane_lcm_div.I0, lane_flat_idxs[i])
                wire(cur_lane_lcm_div.I1, lcm_dim.O)

                lane_flat_div_lcms += [cur_lane_lcm_div.O]

            # compute ((flat_idx % sseq_dim) + (flat_idx / lcm_dim)) % sseq_dim for each lane
            # only need to mod sseq_dim at end as that is same as also doing it flat_idx before addition
            for i in range(no):
                pre_mod_add = DefineAdd(flat_idx_width)()
                wire(pre_mod_add.I0, lane_flat_idxs[i])
                wire(pre_mod_add.I1, lane_flat_div_lcms[i])

                bank_mod = DefineUMod(flat_idx_width)()
                wire(bank_mod.I0, pre_mod_add.O)
                wire(bank_mod.I1, DefineCoreirConst(flat_idx_width, no)().O)

                wire(STBankGenerator.bank[i],
                     bank_mod.O[0:STBankGenerator.bank_width])
                if len(bank_mod.O) > STBankGenerator.bank_width:
                    bits_to_term = len(bank_mod.O) - STBankGenerator.bank_width
                    term = TermAnyType(Array[bits_to_term, Bit])
                    wire(bank_mod.O[STBankGenerator.bank_width:], term.I)

            # compute flat_idx / sseq_dim for each lane addr
            for i in range(no):
                flat_idx_sseq_dim_div = DefineUDiv(flat_idx_width)()
                wire(flat_idx_sseq_dim_div.I0, lane_flat_idxs[0])
                wire(flat_idx_sseq_dim_div.I1,
                     DefineCoreirConst(flat_idx_width, no)().O)

                wire(STBankGenerator.addr[i],
                     flat_idx_sseq_dim_div.O[0:STBankGenerator.addr_width])
                if len(flat_idx_sseq_dim_div.O) > STBankGenerator.addr_width:
                    bits_to_term = len(bank_mod.O) - STBankGenerator.addr_width
                    term = TermAnyType(Array[bits_to_term, Bit])
                    wire(flat_idx_sseq_dim_div.O[STBankGenerator.addr_width:],
                         term.I)
コード例 #19
0
    'O0',
    Array[8, Out(Bit)],
    'O1',
    Array[8, Out(Bit)],
    'valid_data_in',
    In(Bit),
    'ready_data_in',
    Out(Bit),
    'valid_data_out',
    Out(Bit),
    'ready_data_out',
    In(Bit),
] + ClockInterface(has_ce=True)
partialParallelSimpleAdd = DefineCircuit('partialParallelSimpleAdd_Circuit',
                                         *args)
magmaInstance0 = DefineNoop(DefineCoreirConst(8, 1))()
magmaInstance1 = DefineNoop(DefineCoreirConst(8, 1))()
magmaInstance2 = DefineCoreirConst(8, 1)()
magmaInstance3 = DefineCoreirConst(8, 1)()
magmaInstance5 = DefineAdd(8)()
magmaInstance6 = DefineAdd(8)()
wire(magmaInstance0.O, magmaInstance5.I0)
wire(magmaInstance2.O, magmaInstance5.I1)
wire(magmaInstance1.O, magmaInstance6.I0)
wire(magmaInstance3.O, magmaInstance6.I1)
wire(partialParallelSimpleAdd.I0, magmaInstance0.in_O)
wire(partialParallelSimpleAdd.I1, magmaInstance1.in_O)
wire(partialParallelSimpleAdd.O0, magmaInstance5.O)
wire(partialParallelSimpleAdd.O1, magmaInstance6.O)
wire(partialParallelSimpleAdd.ready_data_out,
     partialParallelSimpleAdd.ready_data_in)
コード例 #20
0
import magma as m
from magma.clock import *
from magma.backend.coreir_ import CoreIRBackend
from magma.bitutils import *
from coreir.context import *
from magma.simulator.coreir_simulator import CoreIRSimulator
import coreir
from magma.scope import Scope
from mantle.coreir import DefineCoreirConst
from mantle import CounterModM, Decode, SIPO
from magma.frontend.coreir_ import GetCoreIRModule
from mantle.coreir.arith import *
from mantle.primitives import DeclareAdd

c = coreir.Context()
cirb = CoreIRBackend(c)
scope = Scope()

width = 16

addID = DefineCoreirConst(width, 0)()
rpp = ReducePartiallyParallel(cirb, 8, 2,
                              renameCircuitForReduce(DeclareAdd(width)))

m.wire(addID.out, rpp.C)

m.EndCircuit()

module = GetCoreIRModule(cirb, rpp)
module.save_to_file("reducehybrid.json")
コード例 #21
0
ファイル: downscale_32.py プロジェクト: essox514/magmacam
outType2 = TOUT

# Top level module: line buffer input, reduce output
args = ['I', inType, 'O', outType2, 'WE', m.BitIn, 'V', m.Out(m.Bit)] + \
        m.ClockInterface(False, False)
dscale = m.DefineCircuit('Downscale', *args)

# Line buffer declaration
lb = Linebuffer(cirb, inType, outType, imgType, True)
m.wire(lb.I, dscale.I)
m.wire(lb.wen, dscale.WE)

# Reduce declaration
red = ReduceParallel(cirb, samples, renameCircuitForReduce(DeclareAdd(width)))
# additive identity
coreirConst = DefineCoreirConst(width, 0)()

# select 16 samples to keep
k = 0
for i in [0, 3, 5, 8]:
    for j in [0, 3, 7, 10]:
        m.wire(red.I.data[k], lb.out[i][j])
        k += 1

m.wire(red.I.identity, coreirConst.O)
m.wire(dscale.O, red.out)
m.wire(dscale.V, lb.valid)

m.EndCircuit()

module = GetCoreIRModule(cirb, dscale)
    In(Bit),
    'ready_data_in',
    Out(Bit),
    'valid_data_out',
    Out(Bit),
    'ready_data_out',
    In(Bit),
] + ClockInterface(has_ce=True)
downsample_256x256_to_32x32_64px_in_per_clk = DefineCircuit(
    'downsample_256x256_to_32x32_64px_in_per_clk_Circuit', *args)
magmaInstance0 = DefineTwoDimensionalLineBuffer(Array[8, In(Bit)], 64, 1, 2, 2,
                                                256, 256, 2, 2, 0, 0)()
magmaInstance1 = DefineNoop(
    DefineTwoDimensionalLineBuffer(Array[8, In(Bit)], 64, 1, 2, 2, 256, 256, 2,
                                   2, 0, 0))()
magmaInstance2 = DefineCoreirConst(8, 1)()
magmaInstance3 = DefineCoreirConst(8, 1)()
magmaInstance4 = DefineCoreirConst(8, 1)()
magmaInstance5 = DefineCoreirConst(8, 1)()
magmaInstance6 = DefineCoreirConst(8, 1)()
magmaInstance7 = DefineCoreirConst(8, 1)()
magmaInstance8 = DefineCoreirConst(8, 1)()
magmaInstance9 = DefineCoreirConst(8, 1)()
magmaInstance10 = DefineCoreirConst(8, 1)()
magmaInstance11 = DefineCoreirConst(8, 1)()
magmaInstance12 = DefineCoreirConst(8, 1)()
magmaInstance13 = DefineCoreirConst(8, 1)()
magmaInstance14 = DefineCoreirConst(8, 1)()
magmaInstance15 = DefineCoreirConst(8, 1)()
magmaInstance16 = DefineCoreirConst(8, 1)()
magmaInstance17 = DefineCoreirConst(8, 1)()
    In(Bit),
    'ready_data_in',
    Out(Bit),
    'valid_data_out',
    Out(Bit),
    'ready_data_out',
    In(Bit),
] + ClockInterface(has_ce=True)
downsample_256x256_to_32x32_8px_in_per_clk = DefineCircuit(
    'downsample_256x256_to_32x32_8px_in_per_clk_Circuit', *args)
magmaInstance0 = DefineTwoDimensionalLineBuffer(Array[8, In(Bit)], 8, 1, 2, 2,
                                                256, 256, 2, 2, 0, 0)()
magmaInstance1 = DefineNoop(
    DefineTwoDimensionalLineBuffer(Array[8, In(Bit)], 8, 1, 2, 2, 256, 256, 2,
                                   2, 0, 0))()
magmaInstance2 = DefineCoreirConst(8, 1)()
magmaInstance3 = DefineCoreirConst(8, 1)()
magmaInstance4 = DefineCoreirConst(8, 2)()
magmaInstance5 = DefineCoreirConst(8, 2)()
magmaInstance7 = DefineCoreirConst(8, 3)()
magmaInstance8 = DefineCoreirConst(8, 3)()
magmaInstance9 = DefineCoreirConst(8, 4)()
magmaInstance10 = DefineCoreirConst(8, 4)()
wire(magmaInstance0.O[0][0][0], magmaInstance1.in_O[0][0][0])
wire(magmaInstance0.O[0][0][1], magmaInstance1.in_O[0][0][1])
wire(magmaInstance0.O[0][1][0], magmaInstance1.in_O[0][1][0])
wire(magmaInstance0.O[0][1][1], magmaInstance1.in_O[0][1][1])
wire(magmaInstance0.O[1][0][0], magmaInstance1.in_O[1][0][0])
wire(magmaInstance0.O[1][0][1], magmaInstance1.in_O[1][0][1])
wire(magmaInstance0.O[1][1][0], magmaInstance1.in_O[1][1][0])
wire(magmaInstance0.O[1][1][1], magmaInstance1.in_O[1][1][1])
import coreir
from magma.scope import Scope
from mantle.coreir.arith import *
from mantle.coreir.logic import *
from mantle.coreir.compare import *
from mantle.coreir import DefineCoreirConst
from mantle.coreir.LUT import *
from aetherling.modules.upsample import *
from aetherling.modules.downsample import *
from aetherling.modules.reduce import *
from aetherling.modules.native_linebuffer.two_dimensional_native_linebuffer import DefineTwoDimensionalLineBuffer

args = ['I0', Array[8, In(Bit)], 'I1', Array[8, In(Bit)], 'I2', Array[8, In(Bit)], 'I3', Array[8, In(Bit)], 'O0', Array[8, Out(Bit)], 'O1', Array[8, Out(Bit)], 'O2', Array[8, Out(Bit)], 'O3', Array[8, Out(Bit)], 'valid_data_in', In(Bit), 'ready_data_in', Out(Bit), 'valid_data_out', Out(Bit), 'ready_data_out', In(Bit), ] + ClockInterface(has_ce=True)
convolution_32x32Im_2x2Win_4px_in_per_clk = DefineCircuit('convolution_32x32Im_2x2Win_4px_in_per_clk_Circuit', *args)
magmaInstance0 = DefineTwoDimensionalLineBuffer(Array[8, In(Bit)], 4, 1, 2, 2, 32, 32, 1, 1, 0, 0)()
magmaInstance1 = DefineCoreirConst(8, 1)()
magmaInstance2 = DefineCoreirConst(8, 1)()
magmaInstance3 = DefineCoreirConst(8, 1)()
magmaInstance4 = DefineCoreirConst(8, 1)()
magmaInstance5 = DefineCoreirConst(8, 2)()
magmaInstance6 = DefineCoreirConst(8, 2)()
magmaInstance7 = DefineCoreirConst(8, 2)()
magmaInstance8 = DefineCoreirConst(8, 2)()
magmaInstance12 = DefineCoreirConst(8, 2)()
magmaInstance13 = DefineCoreirConst(8, 2)()
magmaInstance14 = DefineCoreirConst(8, 2)()
magmaInstance15 = DefineCoreirConst(8, 2)()
magmaInstance16 = DefineCoreirConst(8, 1)()
magmaInstance17 = DefineCoreirConst(8, 1)()
magmaInstance18 = DefineCoreirConst(8, 1)()
magmaInstance19 = DefineCoreirConst(8, 1)()
コード例 #25
0
ファイル: reshape_st.py プロジェクト: David-Durst/aetherling
        def definition(cls):
            # first section creates the RAMs and LUTs that set values in them and the sorting network
            shared_and_diff_subtypes = get_shared_and_diff_subtypes(
                t_in, t_out)
            t_in_diff = shared_and_diff_subtypes.diff_input
            t_out_diff = shared_and_diff_subtypes.diff_output
            graph = build_permutation_graph(ST_TSeq(2, 0, t_in_diff),
                                            ST_TSeq(2, 0, t_out_diff))
            banks_write_addr_per_input_lane = get_banks_addr_per_lane(
                graph.input_nodes)
            input_lane_write_addr_per_bank = get_lane_addr_per_banks(
                graph.input_nodes)
            output_lane_read_addr_per_bank = get_lane_addr_per_banks(
                graph.output_nodes)

            # each ram only needs to be large enough to handle the number of addresses assigned to it
            # all rams receive the same number of writes
            # but some of those writes don't happen as the data is invalid, so don't need storage for them
            max_ram_addrs = [
                max([bank_clock_data.addr for bank_clock_data in bank_data])
                for bank_data in output_lane_read_addr_per_bank
            ]
            # rams also handle parallelism from outer_shared type as this affects all banks the same
            outer_shared_sseqs = remove_tseqs(
                shared_and_diff_subtypes.shared_outer)
            if outer_shared_sseqs == ST_Tombstone():
                ram_element_type = shared_and_diff_subtypes.shared_inner
            else:
                ram_element_type = replace_tombstone(
                    outer_shared_sseqs, shared_and_diff_subtypes.shared_inner)
            # can use wider rams rather than duplicate for outer_shared_sseqs because will
            # transpose dimenions of input wires below to wire up as if outer, shared dimensions
            # were on the inside
            rams = [
                DefineRAM_ST(ram_element_type, ram_max_addr + 1)()
                for ram_max_addr in max_ram_addrs
            ]
            rams_addr_widths = [ram.WADDR.N for ram in rams]

            # for bank, the addresses to write to each clock
            write_addr_for_bank_luts = []
            for bank_idx in range(len(rams)):
                ram_addr_width = rams_addr_widths[bank_idx]
                num_addrs = len(input_lane_write_addr_per_bank[bank_idx])
                #assert num_addrs == t_in_diff.time()
                write_addrs = [
                    builtins.tuple(
                        int2seq(write_data_per_bank_per_clock.addr,
                                ram_addr_width))
                    for write_data_per_bank_per_clock in
                    input_lane_write_addr_per_bank[bank_idx]
                ]
                write_addr_for_bank_luts.append(
                    DefineLUTAnyType(Array[ram_addr_width, Bit], num_addrs,
                                     builtins.tuple(write_addrs))())

            # for bank, whether to actually write this clock
            write_valid_for_bank_luts = []
            for bank_idx in range(len(rams)):
                num_valids = len(input_lane_write_addr_per_bank[bank_idx])
                #assert num_valids == t_in_diff.time()
                valids = [
                    builtins.tuple([write_data_per_bank_per_clock.valid])
                    for write_data_per_bank_per_clock in
                    input_lane_write_addr_per_bank[bank_idx]
                ]
                write_valid_for_bank_luts.append(
                    DefineLUTAnyType(Bit, num_valids,
                                     builtins.tuple(valids))())

            # for each input lane, the bank to write to each clock
            write_bank_for_input_lane_luts = []
            bank_idx_width = getRAMAddrWidth(len(rams))
            for lane_idx in range(len(banks_write_addr_per_input_lane)):
                num_bank_idxs = len(banks_write_addr_per_input_lane[lane_idx])
                #assert num_bank_idxs == t_in_diff.time()
                bank_idxs = [
                    builtins.tuple(
                        int2seq(write_data_per_lane_per_clock.bank,
                                bank_idx_width))
                    for write_data_per_lane_per_clock in
                    banks_write_addr_per_input_lane[lane_idx]
                ]
                write_bank_for_input_lane_luts.append(
                    DefineLUTAnyType(Array[bank_idx_width, Bit], num_bank_idxs,
                                     builtins.tuple(bank_idxs))())

            # for each bank, the address to read from each clock
            read_addr_for_bank_luts = []
            for bank_idx in range(len(rams)):
                ram_addr_width = rams_addr_widths[bank_idx]
                num_addrs = len(output_lane_read_addr_per_bank[bank_idx])
                #assert num_addrs == t_in_diff.time()
                read_addrs = [
                    builtins.tuple(
                        int2seq(read_data_per_bank_per_clock.addr,
                                ram_addr_width))
                    for read_data_per_bank_per_clock in
                    output_lane_read_addr_per_bank[bank_idx]
                ]
                read_addr_for_bank_luts.append(
                    DefineLUTAnyType(Array[ram_addr_width, Bit], num_addrs,
                                     builtins.tuple(read_addrs))())

            # for each bank, the lane to send each read to
            output_lane_for_bank_luts = []
            # number of lanes equals number of banks
            # some the lanes are just always invalid, added so input lane width equals output lane width
            lane_idx_width = getRAMAddrWidth(len(rams))
            for bank_idx in range(len(rams)):
                num_lane_idxs = len(output_lane_read_addr_per_bank[bank_idx])
                #assert num_lane_idxs == t_in_diff.time()
                lane_idxs = [
                    builtins.tuple(
                        int2seq(read_data_per_bank_per_clock.s,
                                lane_idx_width))
                    for read_data_per_bank_per_clock in
                    output_lane_read_addr_per_bank[bank_idx]
                ]
                output_lane_for_bank_luts.append(
                    DefineLUTAnyType(Array[lane_idx_width, Bit], num_lane_idxs,
                                     builtins.tuple(lane_idxs))())

            # second part creates the counters that index into the LUTs
            # elem_per counts time per element of the reshape
            elem_per_reshape_counter = AESizedCounterModM(
                ram_element_type.time(), has_ce=True)
            end_cur_elem = Decode(ram_element_type.time() - 1,
                                  elem_per_reshape_counter.O.N)(
                                      elem_per_reshape_counter.O)
            # reshape counts which element in the reshape
            num_clocks = len(output_lane_read_addr_per_bank[0])
            reshape_write_counter = AESizedCounterModM(num_clocks,
                                                       has_ce=True,
                                                       has_reset=has_reset)
            reshape_read_counter = AESizedCounterModM(num_clocks,
                                                      has_ce=True,
                                                      has_reset=has_reset)

            output_delay = (
                get_output_latencies(graph)[0]) * ram_element_type.time()
            # this is present so testing knows the delay
            cls.output_delay = output_delay
            reshape_read_delay_counter = DefineInitialDelayCounter(
                output_delay, has_ce=True, has_reset=has_reset)()
            # outer counter the repeats the reshape
            #wire(reshape_write_counter.O, cls.reshape_write_counter)

            enabled = DefineCoreirConst(1, 1)().O[0]
            if has_valid:
                enabled = cls.valid_up & enabled
                wire(reshape_read_delay_counter.valid, cls.valid_down)
            if has_ce:
                enabled = bit(cls.CE) & enabled
            wire(enabled, elem_per_reshape_counter.CE)
            wire(enabled, reshape_read_delay_counter.CE)
            wire(enabled & end_cur_elem, reshape_write_counter.CE)
            wire(enabled & end_cur_elem & reshape_read_delay_counter.valid,
                 reshape_read_counter.CE)

            if has_reset:
                wire(cls.RESET, elem_per_reshape_counter.RESET)
                wire(cls.RESET, reshape_read_delay_counter.RESET)
                wire(cls.RESET, reshape_write_counter.RESET)
                wire(cls.RESET, reshape_read_counter.RESET)

            # wire read and write counters to all LUTs
            for lut in write_bank_for_input_lane_luts:
                wire(reshape_write_counter.O, lut.addr)

            for lut in write_addr_for_bank_luts:
                wire(reshape_write_counter.O, lut.addr)

            for lut in write_valid_for_bank_luts:
                wire(reshape_write_counter.O, lut.addr)

            for lut in read_addr_for_bank_luts:
                wire(reshape_read_counter.O, lut.addr)

            for lut in output_lane_for_bank_luts:
                wire(reshape_read_counter.O, lut.addr)

            # third and final instance creation part creates the sorting networks that map lanes to banks
            input_sorting_network_t = Tuple(
                bank=Array[write_bank_for_input_lane_luts[0].data.N, Bit],
                val=ram_element_type.magma_repr())
            input_sorting_network = DefineBitonicSort(input_sorting_network_t,
                                                      len(rams),
                                                      lambda x: x.bank)()

            output_sorting_network_t = Tuple(
                lane=Array[output_lane_for_bank_luts[0].data.N, Bit],
                val=ram_element_type.magma_repr())
            output_sorting_network = DefineBitonicSort(
                output_sorting_network_t, len(rams), lambda x: x.lane)()

            # wire luts, sorting networks, inputs, and rams
            # flatten all the sseq_layers to get flat magma type of inputs and outputs
            # tseqs don't affect magma types
            num_sseq_layers_inputs = num_nested_layers(
                remove_tseqs(shared_and_diff_subtypes.diff_input))
            num_sseq_layers_to_remove_inputs = max(0,
                                                   num_sseq_layers_inputs - 1)
            num_sseq_layers_outputs = num_nested_layers(
                remove_tseqs(shared_and_diff_subtypes.diff_output))
            num_sseq_layers_to_remove_outputs = max(
                0, num_sseq_layers_outputs - 1)
            if remove_tseqs(
                    shared_and_diff_subtypes.shared_outer) != ST_Tombstone():
                #num_sseq_layers_inputs += num_nested_layers(remove_tseqs(shared_and_diff_subtypes.shared_outer))
                #num_sseq_layers_outputs += num_nested_layers(remove_tseqs(shared_and_diff_subtypes.shared_outer))
                input_ports = flatten_ports(
                    transpose_outer_dimensions(
                        shared_and_diff_subtypes.shared_outer,
                        shared_and_diff_subtypes.diff_input, cls.I),
                    num_sseq_layers_to_remove_inputs)
                output_ports = flatten_ports(
                    transpose_outer_dimensions(
                        shared_and_diff_subtypes.shared_outer,
                        shared_and_diff_subtypes.diff_output, cls.O),
                    num_sseq_layers_to_remove_outputs)
            else:
                input_ports = flatten_ports(cls.I,
                                            num_sseq_layers_to_remove_inputs)
                output_ports = flatten_ports(
                    cls.O, num_sseq_layers_to_remove_outputs)
            # this is only used if the shared outer layers contains any sseqs
            sseq_layers_to_flatten = max(
                num_nested_layers(
                    remove_tseqs(shared_and_diff_subtypes.shared_outer)) - 1,
                0)
            for idx in range(len(rams)):
                # wire input and bank to input sorting network
                wire(write_bank_for_input_lane_luts[idx].data,
                     input_sorting_network.I[idx].bank)
                #if idx == 0:
                #    wire(cls.first_valid, write_valid_for_bank_luts[idx].data)
                if idx < t_in_diff.port_width():
                    # since the input_ports are lists, need to wire them individually to the sorting ports
                    if remove_tseqs(shared_and_diff_subtypes.shared_outer
                                    ) != ST_Tombstone():
                        cur_input_port = flatten_ports(input_ports[idx],
                                                       sseq_layers_to_flatten)
                        cur_sort_port = flatten_ports(
                            input_sorting_network.I[idx].val,
                            sseq_layers_to_flatten)
                        for i in range(len(cur_input_port)):
                            wire(cur_input_port[i], cur_sort_port[i])
                    else:
                        if num_sseq_layers_inputs == 0:
                            # input_ports will be an array of bits for 1 element
                            # if no sseq in t_in
                            wire(input_ports, input_sorting_network.I[idx].val)
                        else:
                            wire(input_ports[idx],
                                 input_sorting_network.I[idx].val)
                    #wire(cls.ram_wr, input_sorting_network.O[idx].val)
                    #wire(cls.ram_rd, rams[idx].RDATA)
                else:
                    zero_const = DefineCoreirConst(
                        ram_element_type.magma_repr().size(), 0)().O
                    cur_sn_input = input_sorting_network.I[idx].val
                    while len(cur_sn_input) != len(zero_const):
                        cur_sn_input = cur_sn_input[0]
                    wire(zero_const, cur_sn_input)

                # wire input sorting network, write addr, and write valid luts to banks
                wire(input_sorting_network.O[idx].val, rams[idx].WDATA)
                wire(write_addr_for_bank_luts[idx].data, rams[idx].WADDR)
                #wire(write_addr_for_bank_luts[idx].data[0], cls.addr_wr[idx])
                if has_ce:
                    wire(write_valid_for_bank_luts[idx].data & bit(cls.CE),
                         rams[idx].WE)
                else:
                    wire(write_valid_for_bank_luts[idx].data, rams[idx].WE)

                # wire output sorting network, read addr, read bank, and read enable
                wire(rams[idx].RDATA, output_sorting_network.I[idx].val)
                wire(output_lane_for_bank_luts[idx].data,
                     output_sorting_network.I[idx].lane)
                wire(read_addr_for_bank_luts[idx].data, rams[idx].RADDR)
                #wire(read_addr_for_bank_luts[idx].data[0], cls.addr_rd[idx])
                # ok to read invalid things, so in read value LUT
                if has_ce:
                    wire(bit(cls.CE), rams[idx].RE)
                else:
                    wire(DefineCoreirConst(1, 1)().O[0], rams[idx].RE)
                if has_reset:
                    wire(cls.RESET, rams[idx].RESET)

                # wire output sorting network value to output or term
                if idx < t_out_diff.port_width():
                    # since the output_ports are lists, need to wire them individually to the sorting ports
                    if remove_tseqs(shared_and_diff_subtypes.shared_outer
                                    ) != ST_Tombstone():
                        cur_output_port = flatten_ports(
                            output_ports[idx], sseq_layers_to_flatten)
                        cur_sort_port = flatten_ports(
                            output_sorting_network.O[idx].val,
                            sseq_layers_to_flatten)
                        for i in range(len(cur_output_port)):
                            wire(cur_output_port[i], cur_sort_port[i])
                    else:
                        if num_sseq_layers_outputs == 0:
                            # output_ports will be an array of bits for 1 element
                            # if no sseq in t_out
                            wire(output_sorting_network.O[idx].val,
                                 output_ports)
                        else:
                            wire(output_sorting_network.O[idx].val,
                                 output_ports[idx])
                else:
                    wire(output_sorting_network.O[idx].val,
                         TermAnyType(type(output_sorting_network.O[idx].val)))

                # wire sorting networks bank/lane to term as not used on outputs, just used for sorting
                wire(input_sorting_network.O[idx].bank,
                     TermAnyType(type(input_sorting_network.O[idx].bank)))
                wire(output_sorting_network.O[idx].lane,
                     TermAnyType(type(output_sorting_network.O[idx].lane)))
コード例 #26
0
ファイル: downscale.py プロジェクト: leonardt/magmacam
outType2 = TOUT

# Top level module: line buffer input, reduce output
args = ['I', inType, 'O', outType2, 'WE', BitIn, 'V',
        Out(Bit)] + ClockInterface(False, False)
top = DefineCircuit('Downscale', *args)

# Line buffer declaration
lb = Linebuffer(cirb, inType, outType, imgType, True)
wire(lb.I, top.I)
wire(lb.wen, top.WE)

# Reduce declaration
red = ReduceParallel(cirb, m * n, renameCircuitForReduce(DeclareAdd(b)))
# additive identity
coreirConst = DefineCoreirConst(b, 0)()

# flatten linebuffer output and wire to reduce parallel input
for i in range(n):
    for j in range(m):
        k = m * i + j
        wire(red.I.data[k], lb.out[i][j])

wire(red.I.identity, coreirConst.out)
wire(top.O, red.out)
wire(top.V, lb.valid)

EndCircuit()

module = GetCoreIRModule(cirb, top)
module.save_to_file("downscale.json")
    'O0',
    Array[8, Out(Bit)],
    'valid_data_in',
    In(Bit),
    'ready_data_in',
    Out(Bit),
    'valid_data_out',
    Out(Bit),
    'ready_data_out',
    In(Bit),
] + ClockInterface(has_ce=True)
convolution_32x32Im_2x2Win_1px_in_per_clk = DefineCircuit(
    'convolution_32x32Im_2x2Win_1px_in_per_clk_Circuit', *args)
magmaInstance0 = DefineTwoDimensionalLineBuffer(Array[8, In(Bit)], 1, 1, 2, 2,
                                                32, 32, 1, 1, 0, 0)()
magmaInstance1 = DefineCoreirConst(8, 1)()
magmaInstance2 = DefineCoreirConst(8, 2)()
magmaInstance3 = DefineCoreirConst(8, 2)()
magmaInstance4 = DefineCoreirConst(8, 1)()
magmaInstance5 = DefineCoreirMul(8)()
magmaInstance6 = DefineCoreirMul(8)()
magmaInstance7 = DefineCoreirMul(8)()
magmaInstance8 = DefineCoreirMul(8)()
wire(magmaInstance0.O[0][0][0], magmaInstance5.I0)
wire(magmaInstance1.O, magmaInstance5.I1)
wire(magmaInstance0.O[0][0][1], magmaInstance6.I0)
wire(magmaInstance2.O, magmaInstance6.I1)
wire(magmaInstance0.O[0][1][0], magmaInstance7.I0)
wire(magmaInstance3.O, magmaInstance7.I1)
wire(magmaInstance0.O[0][1][1], magmaInstance8.I0)
wire(magmaInstance4.O, magmaInstance8.I1)
    'O7',
    Array[8, Out(Bit)],
    'valid_data_in',
    In(Bit),
    'ready_data_in',
    Out(Bit),
    'valid_data_out',
    Out(Bit),
    'ready_data_out',
    In(Bit),
] + ClockInterface(has_ce=True)
convolution_32x32Im_2x2Win_8px_in_per_clk = DefineCircuit(
    'convolution_32x32Im_2x2Win_8px_in_per_clk_Circuit', *args)
magmaInstance0 = DefineTwoDimensionalLineBuffer(Array[8, In(Bit)], 8, 1, 2, 2,
                                                32, 32, 1, 1, 0, 0)()
magmaInstance1 = DefineCoreirConst(8, 1)()
magmaInstance2 = DefineCoreirConst(8, 1)()
magmaInstance3 = DefineCoreirConst(8, 1)()
magmaInstance4 = DefineCoreirConst(8, 1)()
magmaInstance5 = DefineCoreirConst(8, 1)()
magmaInstance6 = DefineCoreirConst(8, 1)()
magmaInstance7 = DefineCoreirConst(8, 1)()
magmaInstance8 = DefineCoreirConst(8, 1)()
magmaInstance9 = DefineCoreirConst(8, 2)()
magmaInstance10 = DefineCoreirConst(8, 2)()
magmaInstance11 = DefineCoreirConst(8, 2)()
magmaInstance12 = DefineCoreirConst(8, 2)()
magmaInstance13 = DefineCoreirConst(8, 2)()
magmaInstance14 = DefineCoreirConst(8, 2)()
magmaInstance15 = DefineCoreirConst(8, 2)()
magmaInstance16 = DefineCoreirConst(8, 2)()
コード例 #29
0
    'O15',
    Array[8, Out(Bit)],
    'valid_data_in',
    In(Bit),
    'ready_data_in',
    Out(Bit),
    'valid_data_out',
    Out(Bit),
    'ready_data_out',
    In(Bit),
] + ClockInterface(has_ce=True)
partialParallel16Convolution = DefineCircuit(
    'partialParallel16Convolution_Circuit', *args)
magmaInstance0 = DefineTwoDimensionalLineBuffer(Array[8, In(Bit)], 8, 2, 2, 2,
                                                8, 8, 1, 1, 0, 0)()
magmaInstance1 = DefineCoreirConst(8, 1)()
magmaInstance2 = DefineCoreirConst(8, 1)()
magmaInstance3 = DefineCoreirConst(8, 1)()
magmaInstance4 = DefineCoreirConst(8, 1)()
magmaInstance5 = DefineCoreirConst(8, 1)()
magmaInstance6 = DefineCoreirConst(8, 1)()
magmaInstance7 = DefineCoreirConst(8, 1)()
magmaInstance8 = DefineCoreirConst(8, 1)()
magmaInstance9 = DefineCoreirConst(8, 1)()
magmaInstance10 = DefineCoreirConst(8, 1)()
magmaInstance11 = DefineCoreirConst(8, 1)()
magmaInstance12 = DefineCoreirConst(8, 1)()
magmaInstance13 = DefineCoreirConst(8, 1)()
magmaInstance14 = DefineCoreirConst(8, 1)()
magmaInstance15 = DefineCoreirConst(8, 1)()
magmaInstance16 = DefineCoreirConst(8, 1)()
コード例 #30
0
        def definition(cls):
            if type(t) == ST_TSeq:
                outer_counter = AESizedCounterModM(t.n + t.i,
                                                   has_ce=True,
                                                   has_reset=has_reset)
                inner_counters = DefineNestedCounters(
                    t.t,
                    has_last=True,
                    has_cur_valid=False,
                    has_ce=has_ce,
                    has_reset=has_reset,
                    valid_when_ce_off=valid_when_ce_off)()
                if has_last:
                    is_last = Decode(t.n + t.i - 1,
                                     outer_counter.O.N)(outer_counter.O)
                if has_cur_valid:
                    cur_valid_counter = AESizedCounterModM(t.valid_clocks(),
                                                           has_ce=True,
                                                           has_reset=has_reset)
                    wire(cur_valid_counter.O, cls.cur_valid)

                # if t.n is a power of 2 and always valid, then outer_counter.O.N not enough bits
                # for valid_length to contain t.n and for is_valid to get the right input
                # always valid in this case, so just emit 1
                if math.pow(2, outer_counter.O.N) - 1 < t.n:
                    is_valid = DefineCoreirConst(1, 1)().O[0]
                    if not has_last:
                        # never using the outer_counter is not has_last
                        last_term = TermAnyType(type(outer_counter.O))
                        wire(outer_counter.O, last_term.I)
                else:
                    valid_length = DefineCoreirConst(outer_counter.O.N, t.n)()
                    is_valid_cmp = DefineCoreirUlt(outer_counter.O.N)()
                    wire(is_valid_cmp.I0, outer_counter.O)
                    wire(is_valid_cmp.I1, valid_length.O)
                    is_valid = is_valid_cmp.O

                wire(inner_counters.valid & is_valid, cls.valid)
                if has_last:
                    wire(is_last & inner_counters.last, cls.last)
                if has_reset:
                    wire(cls.RESET, outer_counter.RESET)
                    wire(cls.RESET, inner_counters.RESET)
                    if has_cur_valid:
                        wire(cls.RESET, cur_valid_counter.RESET)
                if has_ce:
                    wire(bit(cls.CE) & inner_counters.last, outer_counter.CE)
                    wire(cls.CE, inner_counters.CE)
                    if has_cur_valid:
                        wire(
                            bit(cls.CE) & inner_counters.valid & is_valid,
                            cur_valid_counter.CE)
                else:
                    wire(inner_counters.last, outer_counter.CE)
                    if has_cur_valid:
                        wire(inner_counters.valid & is_valid,
                             cur_valid_counter.CE)
            elif is_nested(t):
                inner_counters = DefineNestedCounters(
                    t.t,
                    has_last,
                    has_cur_valid,
                    has_ce,
                    has_reset,
                    valid_when_ce_off=valid_when_ce_off)()

                wire(inner_counters.valid, cls.valid)
                if has_last:
                    wire(inner_counters.last, cls.last)
                if has_reset:
                    wire(cls.RESET, inner_counters.RESET)
                if has_ce:
                    wire(cls.CE, inner_counters.CE)
                if has_cur_valid:
                    wire(inner_counters.cur_valid, cls.cur_valid)
            else:
                # only 1 element, so always last and valid element
                valid_and_last = DefineCoreirConst(1, 1)()
                if has_last:
                    wire(valid_and_last.O[0], cls.last)
                if has_cur_valid:
                    cur_valid = DefineCoreirConst(1, 0)()
                    wire(cur_valid.O, cls.cur_valid)
                if has_ce:
                    if valid_when_ce_off:
                        wire(cls.valid, valid_and_last.O[0])
                        ce_term = TermAnyType(Bit)
                        wire(cls.CE, ce_term.I)
                    else:
                        wire(cls.valid, cls.CE)
                else:
                    wire(valid_and_last.O[0], cls.valid)
                if has_reset:
                    reset_term = TermAnyType(Bit)
                    wire(reset_term.I, cls.RESET)