Beispiel #1
0
 def __init__(self, width_i, shape):
     self.input_a = MatrixStream(width=width_i, shape=shape, direction='sink', name='input_a')
     self.input_b = MatrixStream(width=width_i, shape=shape, direction='sink', name='input_b')
     self.input_w = self.input_a.dataport.width
     self.n_inputs = self.input_a.dataport.n_elements
     self.output_w = calculate_output_width(self.input_w, self.n_inputs)
     self.output = DataStream(self.output_w, direction='source', name='output')
     self.shape = self.input_a.dataport.shape
Beispiel #2
0
 def __init__(self, width, shape):
     self.width = width
     self.shape = shape
     self.input = MatrixStream(width=width,
                               shape=shape,
                               direction='sink',
                               name='input')
     self.output = MatrixStream(width=width,
                                shape=shape,
                                direction='source',
                                name='output')
Beispiel #3
0
 def __init__(self, input_w, row_length, N, invert=False):
     self.row_length = row_length
     self.invert = invert
     self.input = DataStream(width=input_w, direction='sink', name='input')
     self.output = MatrixStream(width=input_w,
                                shape=(N, ),
                                direction='source',
                                name='output')
     self.input_w = len(self.input.data)
     self.output_w = self.output.dataport.width
     self.shape = self.output.dataport.shape
     self.N = self.output.dataport.shape[0]
Beispiel #4
0
 def __init__(self, data_w, N, invert=False):
     self.invert = invert
     self.input = MatrixStream(width=data_w,
                               shape=(N, ),
                               direction='sink',
                               name='input')
     self.output = MatrixStream(width=data_w,
                                shape=(N, N),
                                direction='source',
                                name='output')
     self.data_w = self.input.dataport.width
     self.shape_i = self.input.dataport.shape
     self.shape_o = self.output.dataport.shape
     self.N = self.output.dataport.shape[0]
Beispiel #5
0
class SubmatrixRegisters(Elaboratable):
    def __init__(self, data_w, N, invert=False):
        self.invert = invert
        self.input = MatrixStream(width=data_w,
                                  shape=(N, ),
                                  direction='sink',
                                  name='input')
        self.output = MatrixStream(width=data_w,
                                   shape=(N, N),
                                   direction='source',
                                   name='output')
        self.data_w = self.input.dataport.width
        self.shape_i = self.input.dataport.shape
        self.shape_o = self.output.dataport.shape
        self.N = self.output.dataport.shape[0]

    def get_ports(self):
        ports = [self.input[f] for f in self.input.fields]
        ports += [self.output[f] for f in self.output.fields]
        return ports

    def elaborate(self, platform):
        m = Module()
        sync = m.d.sync
        comb = m.d.comb

        if self.invert:
            _col = lambda col: col
        else:
            _col = lambda col: self.N - 1 - col

        with m.If(self.input.accepted()):
            for row in range(self.N):  # row iteration
                sync += self.output.dataport.matrix[row, _col(0)].eq(
                    self.input.dataport.matrix[row]
                )  # append column from input
                for col in range(
                        1, self.N):  # shift to the right the other columns
                    sync += self.output.dataport.matrix[row, _col(col)].eq(
                        self.output.dataport.matrix[row, _col(col - 1)])

        with m.If(self.input.accepted()):
            sync += self.output.valid.eq(1)
        with m.Elif(self.output.accepted()):
            sync += self.output.valid.eq(0)

        comb += self.input.ready.eq(self.output.accepted()
                                    | ~self.output.valid)

        return m
Beispiel #6
0
 def __init__(self, data_w, input_shape, N, invert=False):
     assert input_shape[0] % N == 0, (
         f'image height must be a multiple of N. Psss, you can use Padder() to append zeros!'
     )
     assert input_shape[1] % N == 0, (
         f'image width must be a multiple of N. Psss, you can use Padder() to append zeros!'
     )
     self.input = DataStream(width=data_w, direction='sink', name='input')
     self.output = MatrixStream(width=data_w,
                                shape=(N, N),
                                direction='source',
                                name='output')
     self.matrix_feeder = MatrixFeeder(data_w,
                                       input_shape,
                                       N,
                                       invert=invert)
     self.output_shape = (int(input_shape[0] / N), int(input_shape[1] / N))
     self.N = N
Beispiel #7
0
 def __init__(self, width, shape, n_cores):
     self.cores = [DotProduct(width, shape) for _ in range(n_cores)]
     self.input_a = MatrixStream(width=width,
                                 shape=shape,
                                 direction='sink',
                                 name='input_a')
     self.input_b = MatrixStream(width=width,
                                 shape=shape,
                                 direction='sink',
                                 name='input_b')
     self.output_w = self.cores[0].output_w
     self.output = DataStream(self.output_w,
                              direction='source',
                              name='output')
     self.input_w = self.input_a.dataport.width
     self.n_inputs = self.input_a.dataport.n_elements
     self.shape = self.input_a.dataport.shape
     self.n_cores = len(self.cores)
Beispiel #8
0
 def __init__(self, data_w, input_shape, N, invert=False):
     self.input_shape = input_shape
     self.output_shape = (input_shape[0] + 1 - N, input_shape[1] + 1 - N)
     self.invert = invert
     self.input = DataStream(width=data_w, direction='sink', name='input')
     self.output = MatrixStream(width=data_w,
                                shape=(N, N),
                                direction='source',
                                name='output')
     self.data_w = len(self.input.data)
     self.shape = self.output.dataport.shape
     self.N = self.output.dataport.shape[0]
Beispiel #9
0
 def __init__(self, width, input_shape, N, n_cores):
     self.input_shape = input_shape
     self.n_cores = n_cores
     self.matrix_feeder = MatrixFeeder(data_w=width,
                                       input_shape=input_shape,
                                       N=N,
                                       invert=False)
     self.farm = Farm(width=width,
                      shape=(N, N),
                      n_cores=n_cores)
     self.coeff = MatrixStream(width=width, shape=(N, N), direction='sink', name='coeff')
     self.input = DataStream(width=width, direction='sink', name='input')
     self.output = DataStream(width=len(self.farm.output.data), direction='source', name='output')
     self.input_w = len(self.input.data)
     self.output_w = len(self.output.data)
     self.shape = self.coeff.dataport.shape
     self.N = self.coeff.dataport.shape[0]
Beispiel #10
0
def TreeHighestUnsignedWrapped(width_i, n_stages, reg_in, reg_out):
    core = TreeHighestUnsigned(width_i=width_i,
                               n_stages=n_stages,
                               reg_in=reg_in,
                               reg_out=reg_out)
    latency = core.latency
    n_inputs = len(core.inputs)
    input_stream = MatrixStream(width_i,
                                shape=(n_inputs, ),
                                direction='sink',
                                name='input')
    output_stream = DataStream(core.output.width,
                               direction='source',
                               name='output')
    input_map = {}
    for i in range(n_inputs):
        input_map['data_' + str(i)] = core.inputs[i].name
    return StreamWrapper(wrapped_core=core,
                         input_stream=input_stream,
                         output_stream=output_stream,
                         input_map=input_map,
                         output_map={'data': 'output'},
                         latency=latency)
Beispiel #11
0
class MatrixFeederSkip(MatrixFeeder):
    def __init__(self, data_w, input_shape, N, invert=False):
        assert input_shape[0] % N == 0, (
            f'image height must be a multiple of N. Psss, you can use Padder() to append zeros!'
        )
        assert input_shape[1] % N == 0, (
            f'image width must be a multiple of N. Psss, you can use Padder() to append zeros!'
        )
        self.input = DataStream(width=data_w, direction='sink', name='input')
        self.output = MatrixStream(width=data_w,
                                   shape=(N, N),
                                   direction='source',
                                   name='output')
        self.matrix_feeder = MatrixFeeder(data_w,
                                          input_shape,
                                          N,
                                          invert=invert)
        self.output_shape = (int(input_shape[0] / N), int(input_shape[1] / N))
        self.N = N

    def get_ports(self):
        ports = [self.input[f] for f in self.input.fields]
        ports += [self.output[f] for f in self.output.fields]
        return ports

    def elaborate(self, platform):
        m = Module()
        sync = m.d.sync
        comb = m.d.comb

        pooling_counter_row = Signal(range(self.N))
        pooling_counter_col = Signal(range(self.N))

        m.submodules.matrix_feeder = matrix_feeder = self.matrix_feeder

        row, col = img_position_counter(m, sync, self.output,
                                        self.output_shape)
        feeder_row, feeder_col = img_position_counter(
            m, sync, matrix_feeder.output, matrix_feeder.output_shape)

        # input --> matrix_feeder
        comb += [
            matrix_feeder.input.valid.eq(self.input.valid),
            matrix_feeder.input.last.eq(self.input.last),
            matrix_feeder.input.data.eq(self.input.data),
            self.input.ready.eq(matrix_feeder.input.ready),
        ]

        comb += self.output.dataport.eq(matrix_feeder.output.dataport)
        comb += self.output.last.eq(is_last(row, col, self.output_shape))

        with m.If(matrix_feeder.output.accepted()):
            sync += pooling_counter_row.eq(_incr(pooling_counter_row, self.N))
            with m.If(feeder_row == matrix_feeder.output_shape[1] - 1):
                sync += pooling_counter_row.eq(0)
                sync += pooling_counter_col.eq(
                    _incr(pooling_counter_col, self.N))
            with m.If(matrix_feeder.output.last):
                sync += [
                    pooling_counter_row.eq(0),
                    pooling_counter_col.eq(0),
                ]

        with m.FSM() as fsm:
            with m.State("normal"):
                with m.If((pooling_counter_row == 0)
                          & (pooling_counter_col == 0)):
                    comb += [
                        self.output.valid.eq(matrix_feeder.output.valid),
                        matrix_feeder.output.ready.eq(self.output.ready),
                    ]
                with m.Else():
                    comb += [
                        self.output.valid.eq(0),
                        matrix_feeder.output.ready.eq(1),
                    ]
                with m.If(self.output.accepted() & self.output.last):
                    m.next = "last"

            with m.State("last"):
                comb += [
                    self.output.valid.eq(0),
                    matrix_feeder.output.ready.eq(1),
                ]
                with m.If(self.input.accepted() & self.input.last):
                    m.next = "normal"

        return m
Beispiel #12
0
class Farm(Elaboratable):
    _doc_ = """
    "Farm" of DotProduct cores, for parallel computation.
    The performed operation is the dot product of two NxM
    matrixes.

    Keep in mind that since throughput will never be higher
    than one output per clock, it doesn't make sense to use
    a higher number of DotProduct cores than the latency of
    each one of them.

    The dataflow is controlled ONLY by the input_a Stream interface.
    The input_b stream interface is DUMMY, and should always
    have valid values in the input. The ready of the input_b
    interface will be attached to input_a.accepted(), and a valid=1
    will be assumed. Why?
    I want to avoid a combinational path between the valid of input_b
    and the ready of input_a.

    Interfaces
    ----------
    input_a : Matrix Stream, input
        Input a matrix data.

    input_b : Matrix Stream, input
        Input b matrix data.
        TO DO: should not be a stream, but plain "matrix shaped" values.

    output : Data Stream, output
        Dot product computated value.

    Parameters
    ----------
    width : int
        Bit width of both inputs.

    shape : tuple
        Input shape (N, M).

    n_cores : int
        Number of paralell computations of dot product.
    """

    def __init__(self, width, shape, n_cores):
        self.cores = [DotProduct(width, shape) for _ in range(n_cores)]
        self.input_a = MatrixStream(width=width,
                                    shape=shape,
                                    direction='sink',
                                    name='input_a')
        self.input_b = MatrixStream(width=width,
                                    shape=shape,
                                    direction='sink',
                                    name='input_b')
        self.output_w = self.cores[0].output_w
        self.output = DataStream(self.output_w,
                                 direction='source',
                                 name='output')
        self.input_w = self.input_a.dataport.width
        self.n_inputs = self.input_a.dataport.n_elements
        self.shape = self.input_a.dataport.shape
        self.n_cores = len(self.cores)

    def get_ports(self):
        ports = []
        ports += [self.input_a[f] for f in self.input_a.fields]
        ports += [self.input_b[f] for f in self.input_b.fields]
        ports += [self.output[f] for f in self.output.fields]
        return ports

    def elaborate(self, platform):
        m = Module()
        sync = m.d.sync
        comb = m.d.comb

        current_core_sink = Signal(range(self.n_cores))
        current_core_source = Signal(range(self.n_cores))

        # DUMMY input_b interface
        # comb += [self.input_b.ready.eq(self.input_a.accepted())]

        for i, core in enumerate(self.cores):
            m.submodules['core_' + str(i)] = core
            comb += core.input_b.dataport.eq(
                self.input_b.dataport)  # same coefficients for everybody
            with m.If(current_core_sink == i):
                comb += [
                    self.input_a.ready.eq(core.input_a.ready),
                    self.input_b.ready.eq(core.input_b.ready),
                ]
                comb += [
                    core.input_a.valid.eq(self.input_a.valid),
                    core.input_b.valid.eq(self.input_b.valid),
                    core.input_a.dataport.eq(self.input_a.dataport),
                ]
            with m.Else():
                comb += [
                    core.input_a.valid.eq(0),
                    core.input_b.valid.eq(0),
                    core.input_a.dataport.eq_const(0),
                ]
            with m.If(current_core_source == i):
                comb += [
                    self.output.valid.eq(core.output.valid),
                    self.output.data.eq(core.output.data),
                ]
                comb += [
                    core.output.ready.eq(self.output.ready),
                ]
            with m.Else():
                comb += [
                    core.output.ready.eq(0),
                ]

        with m.If(self.input_a.accepted()):
            sync += current_core_sink.eq(_incr(current_core_sink,
                                               self.n_cores))

        with m.If(self.output.accepted()):
            sync += current_core_source.eq(
                _incr(current_core_source, self.n_cores))

        return m
Beispiel #13
0
class DotProduct(Elaboratable):
    #
    # WARNING:
    # The dataflow is controlled ONLY by the input_a AXIS interface.
    # The input_b AXIS interface is DUMMY, and should always have valid values in the input.
    # The ready of the input_b interface will be attached to input_a.accepted(), and a valid=1
    # will be assumed.
    #
    # Why?
    # I want to avoid a combinational path between the valid of input_b and the ready of input_a.
    #
    def __init__(self, width_i, shape):
        self.input_a = MatrixStream(width=width_i, shape=shape, direction='sink', name='input_a')
        self.input_b = MatrixStream(width=width_i, shape=shape, direction='sink', name='input_b')
        self.input_w = self.input_a.dataport.width
        self.n_inputs = self.input_a.dataport.n_elements
        self.output_w = calculate_output_width(self.input_w, self.n_inputs)
        self.output = DataStream(self.output_w, direction='source', name='output')
        self.shape = self.input_a.dataport.shape

    def get_ports(self):
        ports = []
        ports += [self.input_a[f] for f in self.input_a.fields]
        ports += [self.input_b[f] for f in self.input_b.fields]
        ports += [self.output[f] for f in self.output.fields]
        return ports

    def elaborate(self, platform):
        m = Module()
        sync = m.d.sync
        comb = m.d.comb

        tmp_input_a = Signal(self.input_w * self.n_inputs)
        tmp_input_b = Signal(self.input_w * self.n_inputs)
        counter = Signal(range(self.n_inputs))
        
        m.submodules['mac'] = mac = MAC(input_w=self.input_w, output_w=self.output_w)
        comb += [mac.input_a.eq(tmp_input_a[0:self.input_w]),
                 mac.input_b.eq(tmp_input_b[0:self.input_w]),]
        
        # DUMMY input_b interface
        comb += [self.input_b.ready.eq(self.input_a.accepted())]
    
        with m.FSM() as fsm:
            
            with m.State("IDLE"):
            
                comb += [self.input_a.ready.eq(self.output.accepted() | ~self.output.valid),
                         mac.clr.eq(1),
                         mac.clken.eq(0),]
            
                with m.If(self.input_a.accepted()):
                    m.next = "BUSY"
                    sync += [tmp_input_a.eq(Cat(*self.input_a.flat)), #self.input_a.data),
                             tmp_input_b.eq(Cat(*self.input_b.flat)), #Cat(*self.input_b)),
                             counter.eq(0),]
            
                with m.If(self.output.accepted()):
                    sync += self.output.valid.eq(0)
            
            with m.State("BUSY"):
            
                comb += [self.input_a.ready.eq(0),
                         mac.clr.eq(0),
                         mac.clken.eq(1),]
            
                sync += [tmp_input_b.eq(tmp_input_b >> self.input_w),
                         tmp_input_a.eq(tmp_input_a >> self.input_w),]
            
                with m.If(mac.valid_o):
                    sync += counter.eq(counter + 1)
                    with m.If(counter == self.n_inputs - 1):
                        m.next = "IDLE"
                        sync += [self.output.data.eq(mac.output),
                                 self.output.valid.eq(1),]

        return m
Beispiel #14
0
class RowFifos(Elaboratable):
    """ N fifos that work synchronized to provide Nx1 (N=row)
    vector of data.
    """
    def __init__(self, input_w, row_length, N, invert=False):
        self.row_length = row_length
        self.invert = invert
        self.input = DataStream(width=input_w, direction='sink', name='input')
        self.output = MatrixStream(width=input_w,
                                   shape=(N, ),
                                   direction='source',
                                   name='output')
        self.input_w = len(self.input.data)
        self.output_w = self.output.dataport.width
        self.shape = self.output.dataport.shape
        self.N = self.output.dataport.shape[0]

    def get_ports(self):
        ports = [self.input[f] for f in self.input.fields]
        ports += [self.output[f] for f in self.output.fields]
        return ports

    def elaborate(self, platform):
        m = Module()
        sync = m.d.sync
        comb = m.d.comb

        fifo = [
            SyncFIFOBuffered(width=self.input_w, depth=self.row_length + 4)
            for _ in range(self.N)
        ]

        fifo_r_rdy = [Signal() for _ in range(self.N)]
        fifo_r_valid = [Signal() for _ in range(self.N)]

        w_en = [Signal() for _ in range(self.N - 1)]

        for n in range(self.N):
            m.submodules['fifo_' + str(n)] = fifo[n]
            comb += [
                fifo_r_rdy[n].eq((fifo[n].level < self.row_length)
                                 | self.output.accepted()),
            ]

        # first fifo
        comb += [
            self.input.ready.eq(fifo[0].w_rdy),
            fifo[0].w_en.eq(self.input.accepted()),
            fifo[0].w_data.eq(self.input.data),
        ]

        for n in range(self.N - 1):
            comb += [
                fifo_r_valid[n].eq((fifo[n + 1].level == self.row_length)
                                   & (fifo[n].r_rdy)),
                fifo[n].r_en.eq((self.output.accepted() | ~fifo_r_valid[n])),
                fifo[n + 1].w_en.eq(fifo[n].r_rdy & fifo[n].r_en),
                fifo[n + 1].w_data.eq(fifo[n].r_data),
            ]

        # last fifo
        n = self.N - 1
        comb += [
            fifo_r_valid[n].eq(fifo[n].r_rdy),
            fifo[n].r_en.eq(self.output.accepted()),
        ]

        # output
        comb += [
            self.output.valid.eq(_and(fifo_r_valid)),
        ]

        for n in range(self.N):
            if self.invert:
                comb += self.output.dataport.matrix[n].eq(fifo[n].r_data)
            else:
                comb += self.output.dataport.matrix[n].eq(fifo[self.N - 1 -
                                                               n].r_data)

        return m