def definition(io): adder_cycle = mantle.Add(n, cin=False, cout=False) reg_cycle = mantle.Register(n, has_reset=True) adder_idx = mantle.Add(b, cin=False, cout=False) reg_idx = mantle.Register(b, has_ce=True) wire(io.CLK, reg_cycle.CLK) wire(io.CLK, reg_idx.CLK) wire(reg_cycle.O, adder_cycle.I0) wire(bits(1, n), adder_cycle.I1) wire(adder_cycle.O, reg_cycle.I) comparison_cycle = mantle.EQ(n) wire(reg_cycle.O, comparison_cycle.I0) wire(bits(num_cycles - 1, n), comparison_cycle.I1) # if cycle-th is the last, then switch to next idx (accumulate idx) and clear cycle wire(comparison_cycle.O, reg_cycle.RESET) wire(comparison_cycle.O, reg_idx.CE) comparison_idx = mantle.EQ(b) wire(reg_idx.O, comparison_idx.I0) wire(bits(num_classes - 1, b), comparison_idx.I1) wire(reg_idx.O, adder_idx.I0) wire(bits(0, b - 1), adder_idx.I1[1:]) nand_gate = mantle.NAnd() wire(comparison_cycle.O, nand_gate.I0) wire(comparison_idx.O, nand_gate.I1) # after all idx rows, we stop accumulating idx wire(nand_gate.O, adder_idx.I1[0]) wire(adder_idx.O, reg_idx.I) wire(reg_idx.O, io.IDX) wire(adder_cycle.O, io.CYCLE)
def definition(io): load = io.LOAD baud = rising(io.SCK) | falling(io.SCK) valid_counter = mantle.CounterModM(buf_size, 12, has_ce=True) m.wire(load & baud, valid_counter.CE) valid_list = [wi * (b - 1) + i * a - 1 for i in range(1, wo + 1)] # len = 32 valid = m.GND for i in valid_list: valid = valid | mantle.Decode(i, 12)(valid_counter.O) # register on input st_in = mantle.Register(width, has_ce=True) st_in(io.DATA) m.wire(load, st_in.CE) # --------------------------DOWNSCALING----------------------------- # # downscale the image from 352x288 to 32x32 Downscale = m.DeclareCircuit( 'Downscale', "I_0_0", m.In(m.Array(1, m.Array(1, m.Array(width, m.Bit)))), "WE", m.In(m.Bit), "CLK", m.In(m.Clock), "O", m.Out(m.Array(width, m.Bit)), "V", m.Out(m.Bit)) dscale = Downscale() m.wire(st_in.O, dscale.I_0_0[0][0]) m.wire(1, dscale.WE) m.wire(load, dscale.CLK) add16 = mantle.Add(width) # needed for Add16 definition # threshold the downscale output px_bit = mantle.ULE(16)(dscale.O, m.uint(THRESH, 16)) & valid # ---------------------------UART OUTPUT----------------------------- # m.wire(px_bit, io.O) m.wire(valid, io.VALID)
def definition(io): # IF - get cycle_id, label_index_id controller = Controller() reg_1_cycle = mantle.Register(n) reg_1_control = mantle.DFF(init=1) wire(io.CLK, controller.CLK) wire(io.CLK, reg_1_cycle.CLK) wire(io.CLK, reg_1_control.CLK) reg_1_idx = controller.IDX wire(controller.CYCLE, reg_1_cycle.I) wire(1, reg_1_control.I) # RR - get weight block, image block of N bits readROM = ReadROM() wire(reg_1_idx, readROM.IDX) wire(reg_1_cycle.O, readROM.CYCLE) reg_2 = mantle.Register(N + b + n) reg_2_control = mantle.DFF() reg_2_weight = readROM.WEIGHT wire(io.CLK, reg_2.CLK) wire(io.CLK, readROM.CLK) wire(io.CLK, reg_2_control.CLK) wire(readROM.IMAGE, reg_2.I[:N]) wire(reg_1_idx, reg_2.I[N:N + b]) wire(reg_1_cycle.O, reg_2.I[N + b:]) wire(reg_1_control.O, reg_2_control.I) # EX - NXOr for multiplication, pop count and accumulate the result for activation multiplier = mantle.NXOr(height=2, width=N) bit_counter = DefineBitCounter(N)() adder = mantle.Add(n_bc_adder, cin=False, cout=False) mux_for_adder_0 = mantle.Mux(height=2, width=n_bc_adder) mux_for_adder_1 = mantle.Mux(height=2, width=n_bc_adder) reg_3_1 = mantle.Register(n_bc_adder) reg_3_2 = mantle.Register(b + n) wire(io.CLK, reg_3_1.CLK) wire(io.CLK, reg_3_2.CLK) wire(reg_2_weight, multiplier.I0) wire(reg_2.O[:N], multiplier.I1) wire(multiplier.O, bit_counter.I) wire(bits(0, n_bc_adder), mux_for_adder_0.I0) wire(bit_counter.O, mux_for_adder_0.I1[:n_bc]) if n_bc_adder > n_bc: wire(bits(0, n_bc_adder - n_bc), mux_for_adder_0.I1[n_bc:]) # only when data read is ready (i.e. control signal is high), accumulate the pop count result wire(reg_2_control.O, mux_for_adder_0.S) wire(reg_3_1.O, mux_for_adder_1.I0) wire(bits(0, n_bc_adder), mux_for_adder_1.I1) if n == 4: comparison_3 = SB_LUT4(LUT_INIT=int('0' * 15 + '1', 2)) wire( reg_2.O[N + b:], bits([ comparison_3.I0, comparison_3.I1, comparison_3.I2, comparison_3.I3 ])) else: comparison_3 = mantle.EQ(n) wire(reg_2.O[N + b:], comparison_3.I0) wire(bits(0, n), comparison_3.I1) wire(comparison_3.O, mux_for_adder_1.S) wire(mux_for_adder_0.O, adder.I0) wire(mux_for_adder_1.O, adder.I1) wire(adder.O, reg_3_1.I) wire(reg_2.O[N:], reg_3_2.I) # CF - classify the image classifier = Classifier() reg_4 = mantle.Register(n + b) reg_4_idx = classifier.O wire(io.CLK, classifier.CLK) wire(io.CLK, reg_4.CLK) wire(reg_3_1.O, classifier.I) wire(reg_3_2.O[:b], classifier.IDX) wire(reg_3_2.O, reg_4.I) # WB - wait to show the result until the end reg_5 = mantle.Register(b, has_ce=True) comparison_5_1 = mantle.EQ(b) comparison_5_2 = mantle.EQ(n) and_gate = mantle.And() wire(io.CLK, reg_5.CLK) wire(reg_4_idx, reg_5.I) wire(reg_4.O[:b], comparison_5_1.I0) wire(bits(num_classes - 1, b), comparison_5_1.I1) wire(reg_4.O[b:], comparison_5_2.I0) wire(bits(num_cycles - 1, n), comparison_5_2.I1) wire(comparison_5_1.O, and_gate.I0) wire(comparison_5_2.O, and_gate.I1) wire(and_gate.O, reg_5.CE) wire(reg_5.O, io.O) # latch the light indicating the end reg_6 = mantle.DFF() wire(io.CLK, reg_6.CLK) or_gate = mantle.Or() wire(and_gate.O, or_gate.I0) wire(reg_6.O, or_gate.I1) wire(or_gate.O, reg_6.I) wire(reg_6.O, io.D)
# # "test" data # init = [m.uint(i, 16) for i in range(16)] # printf = mantle.Counter(4, has_ce=True) # rom = ROM16(4, init, printf.O) # m.wire(load & baud, printf.CE) #---------------------------STENCILING-----------------------------# ReduceHybrid = m.DeclareCircuit('ReduceHybrid', 'I_0', m.In(m.Array(a, TIN)), 'I_1', m.In(m.Array(a, TIN)), 'O', TOUT, 'WE', m.BitIn, 'V', m.Out(m.Bit), 'CLK', m.In(m.Clock)) redHybrid = ReduceHybrid() m.wire(m.bits(0, 16), redHybrid.I_0[0]) m.wire(m.bits(1, 16), redHybrid.I_1[0]) m.wire(1, redHybrid.WE) m.wire(load, redHybrid.CLK) add16 = mantle.Add(16) # needed for Add16 definition # ---------------------------UART OUTPUT----------------------------- # uart_red = UART(16) uart_red(CLK=main.CLKIN, BAUD=baud, DATA=redHybrid.O, LOAD=load) m.wire(redHybrid.V, main.J3[0]) m.wire(load, main.J3[1]) m.wire(uart_red.O, main.J3[2])
def definition(io): load = io.LOAD baud = rising(io.SCK) | falling(io.SCK) valid_counter = mantle.CounterModM(buf_size, 12, has_ce=True) m.wire(load & baud, valid_counter.CE) valid_list = [wi * (b - 1) + i * a - 1 for i in range(1, wo + 1)] # len = 32 valid = m.GND for i in valid_list: valid = valid | mantle.Decode(i, 12)(valid_counter.O) # register on input st_in = mantle.Register(width, has_ce=True) st_in(io.DATA) m.wire(load, st_in.CE) # --------------------------DOWNSCALING----------------------------- # # downscale the image from 352x288 to 32x32 Downscale = m.DeclareCircuit( 'Downscale', "I_0_0", m.In(m.Array(1, m.Array(1, m.Array(width, m.Bit)))), "WE", m.In(m.Bit), "CLK", m.In(m.Clock), "O", m.Out(m.Array(width, m.Bit)), "V", m.Out(m.Bit)) dscale = Downscale() m.wire(st_in.O, dscale.I_0_0[0][0]) m.wire(1, dscale.WE) m.wire(load, dscale.CLK) add16 = mantle.Add(width) # needed for Add16 definition # --------------------------FILL IMG RAM--------------------------- # # each valid output of dscale represents an entry of 32x32 binary image # accumulate each group of 32 entries into a 32-bit value representing a row col = mantle.CounterModM(32, 6, has_ce=True) col_ce = rising(valid) m.wire(col_ce, col.CE) # shift each bit in one at a time until we get an entire row px_bit = mantle.ULE(16)(dscale.O, m.uint(THRESH, 16)) & valid row_reg = mantle.SIPO(32, has_ce=True) row_reg(px_bit) m.wire(col_ce, row_reg.CE) # reverse the row bits since the image is flipped row = reverse(row_reg.O) rowaddr = mantle.Counter(6, has_ce=True) img_full = mantle.SRFF(has_ce=True) img_full(mantle.EQ(6)(rowaddr.O, m.bits(32, 6)), 0) m.wire(falling(col.COUT), img_full.CE) row_ce = rising(col.COUT) & ~img_full.O m.wire(row_ce, rowaddr.CE) waddr = rowaddr.O[:5] rdy = col.COUT & ~img_full.O pulse_count = mantle.Counter(2, has_ce=True) we = mantle.UGE(2)(pulse_count.O, m.uint(1, 2)) pulse_count(CE=(we|rdy)) # ---------------------------UART OUTPUT----------------------------- # row_load = row_ce row_baud = mantle.FF()(baud) uart_row = UART(32) uart_row(CLK=io.CLK, BAUD=row_baud, DATA=row, LOAD=row_load) uart_addr = UART(5) uart_addr(CLK=io.CLK, BAUD=row_baud, DATA=waddr, LOAD=row_load) m.wire(waddr, io.WADDR) m.wire(img_full, io.DONE) #img_full m.wire(uart_row, io.UART) #uart_st m.wire(row, io.O) m.wire(we, io.VALID) m.wire(valid, io.T0) m.wire(uart_addr, io.T1)
def definition(io): load = io.LOAD baud = io.BAUD valid_counter = mantle.CounterModM(buf_size, 13, has_ce=True) m.wire(load & baud, valid_counter.CE) valid_list = [wi * (b - 1) + i * a - 1 for i in range(1, wo + 1)] valid = m.GND for i in valid_list: valid = valid | mantle.Decode(i, 13)(valid_counter.O) # register on input st_in = mantle.Register(16, has_ce=True) st_in(io.DATA) m.wire(load, st_in.CE) # --------------------------DOWNSCALING----------------------------- # # downscale the image from 320x240 to 16x16 Downscale = m.DeclareCircuit( 'Downscale', "I_0_0", m.In(m.Array(1, m.Array(1, m.Array(16, m.Bit)))), "WE", m.In(m.Bit), "CLK", m.In(m.Clock), "O", m.Out(m.Array(16, m.Bit)), "V", m.Out(m.Bit)) dscale = Downscale() m.wire(st_in.O, dscale.I_0_0[0][0]) m.wire(1, dscale.WE) m.wire(load, dscale.CLK) add16 = mantle.Add(16) # needed for Add16 definition # --------------------------FILL IMG RAM--------------------------- # # each valid output of dscale represents an entry of 16x16 binary image # accumulate each group of 16 entries into a 16-bit value representing # a row of the image col = mantle.Counter(4, has_ce=True) row_full = mantle.SRFF(has_ce=True) row_full(mantle.EQ(4)(col.O, m.bits(15, 4)), 0) m.wire(falling(dscale.V), row_full.CE) col_ce = rising(dscale.V) & ~row_full.O m.wire(col_ce, col.CE) row = mantle.Counter(4, has_ce=True) img_full = mantle.SRFF(has_ce=True) img_full(mantle.EQ(4)(row.O, m.bits(15, 4)), 0) m.wire(falling(col.COUT), img_full.CE) row_ce = rising(col.COUT) & ~img_full.O m.wire(row_ce, row.CE) # ---------------------------UART OUTPUT----------------------------- # uart_st = UART(16) uart_st(CLK=io.CLK, BAUD=baud, DATA=dscale.O, LOAD=load) m.wire(row.O, io.ROW) m.wire(img_full.O, io.DONE) m.wire(uart_st.O, io.UART)
def definition(io): load = io.LOAD baud = rising(io.SCK) | falling(io.SCK) valid_counter = mantle.CounterModM(buf_size, 13, has_ce=True) m.wire(load & baud, valid_counter.CE) valid_list = [wi * (b - 1) + i * a - 1 for i in range(1, wo + 1)] valid = m.GND for i in valid_list: valid = valid | mantle.Decode(i, 13)(valid_counter.O) # register on input st_in = mantle.Register(16, has_ce=True) st_in(io.DATA) m.wire(load, st_in.CE) # --------------------------DOWNSCALING----------------------------- # # downscale the image from 320x240 to 16x16 Downscale = m.DeclareCircuit( 'Downscale', "I_0_0", m.In(m.Array(1, m.Array(1, m.Array(16, m.Bit)))), "WE", m.In(m.Bit), "CLK", m.In(m.Clock), "O", m.Out(m.Array(16, m.Bit)), "V", m.Out(m.Bit)) dscale = Downscale() m.wire(st_in.O, dscale.I_0_0[0][0]) m.wire(1, dscale.WE) m.wire(load, dscale.CLK) add16 = mantle.Add(16) # needed for Add16 definition # --------------------------FILL IMG RAM--------------------------- # # each valid output of dscale represents a pixel in 16x16 binary image # accumulate each group of 16 pixels into a 16-bit value representing # a row in the image col = mantle.CounterModM(16, 5, has_ce=True) col_ce = rising(valid) m.wire(col_ce, col.CE) # shift each bit in one at a time until we get an entire row px_bit = mantle.ULE(16)(dscale.O, m.uint(THRESH, 16)) & valid row_reg = mantle.SIPO(16, has_ce=True) row_reg(px_bit) m.wire(col_ce, row_reg.CE) # reverse the row bits since the image is flipped row = reverse(row_reg.O) rowaddr = mantle.Counter(5, has_ce=True) img_full = mantle.SRFF(has_ce=True) img_full(mantle.EQ(5)(rowaddr.O, m.bits(16, 5)), 0) m.wire(falling(col.COUT), img_full.CE) row_ce = rising(col.COUT) & ~img_full.O m.wire(row_ce, rowaddr.CE) waddr = rowaddr.O[:4] # we_counter = mantle.CounterModM(16, 5, has_ce=True) # m.wire(rising(valid), we_counter.CE) rdy = col.COUT & ~img_full.O pulse_count = mantle.Counter(5, has_ce=True) we = mantle.UGE(5)(pulse_count.O, m.uint(1, 5)) pulse_count(CE=(we | rdy)) # ---------------------------UART OUTPUT----------------------------- # row_load = row_ce row_baud = mantle.FF()(baud) uart_row = UART(16) uart_row(CLK=io.CLK, BAUD=row_baud, DATA=row, LOAD=row_load) uart_addr = UART(4) uart_addr(CLK=io.CLK, BAUD=row_baud, DATA=waddr, LOAD=row_load) # split 16-bit row data into 8-bit packets so it can be parsed low_byte = row & LOW_MASK high_byte = row & HIGH_MASK uart_counter = mantle.CounterModM(8, 4, has_ce=True) m.wire(rising(valid), uart_counter.CE) m.wire(waddr, io.WADDR) m.wire(img_full, io.DONE) m.wire(uart_row, io.UART) m.wire(row, io.O) m.wire(we, io.VALID)