def matrix_multiplication(cfg, mat1, mat2, *, cols_per_row): """General idea is to parallelize matrix multiplication, this is achieved by multiplying one row with several columns at the same time. Number of columns that are multiplied with one row is cols_per_row. Column_multiplication is module that multiplies one row with one column at the time, it can also store several columns in it. First we need to split mat2 by columns and send them to different column_multiplication modules, to be stored, then send every row on each column_multiplication.""" col_chunks = qdeal(mat2, num=cols_per_row, lvl=1) row_chunks = row_dispatch(mat1, cols_per_row=cols_per_row) \ | dreg \ | decouple(latency=2) \ | dispatch tmp = [] if not isinstance(col_chunks, tuple): col_chunks = (col_chunks, ) if not isinstance(row_chunks, tuple): row_chunks = (row_chunks, ) for col, row in zip(col_chunks, row_chunks): # col is flattened because, after qdeal, every col has type Queue lvl1 with eot == True # after flattening, we group it by cols_per_multiplier (set eot (last) after last column that goes # to specific column multiplier) col = col | flatten | group(size=cfg['cols_per_multiplier']) tmp.append(column_multiplication(cfg, row, col) | flatten) res = ccat(*tmp) | Array return res
from pygears.lib import group, check, drv from pygears.typing import Uint size = drv(t=Uint[3], seq=[3, 4]) drv(t=Uint[4], seq=[1, 2, 3, 4, 5, 6, 7]) \ | group(size=size) \ | check(ref=[[1, 2, 3], [4, 5, 6, 7]])
def test_synth_vivado(): group(Intf(Queue[Uint[16]]), Intf(Uint[16]))
def test_synth_yosys(): group(Intf(Queue[Uint[16]]), Intf(Uint[16]))
def test_formal(): group(Intf(Queue[Uint[8]]), Intf(Uint[3]))