コード例 #1
0
ファイル: matmul.py プロジェクト: vishalbelsare/DeepSpeed
 def make_sdd_lut(layout, block, dtype, device):
     #_sparse_matmul._load_utils()
     #start_width = 64 // block
     #segmented = _sparse_matmul.sdd_segment(layout.type(torch.int32), start_width)
     start_width = (128 if block > 16 else 32) // block
     layout = layout.type(torch.int32)
     segmented = libtriton.superblock(layout.data_ptr(), layout.shape[0],
                                      layout.shape[1], layout.shape[2],
                                      start_width)
     luts, widths, packs = [], [], []
     for size, nnz in segmented:
         """ width = nnz.shape[0] // (size * size)
         h = nnz[:, 0]
         i = nnz[:, 1]
         j = nnz[:, 2]
         b = nnz[:, 3]
         lut = torch.stack((h, i, j, b), dim=1).view(-1).contiguous()
         luts.append(lut.type(torch.int32).to(device))
         widths.append(width)
         packs.append(size) """
         nnz = nnz.reshape(-1, 4)
         width = nnz.shape[0] // (size * size)
         luts.append(torch.from_numpy(nnz).type(torch.int32).to(device))
         widths.append(width)
         packs.append(size)
     # create locks
     return luts, None, widths, packs
コード例 #2
0
 def make_sdd_lut(layout, block, dtype, device):
     start_width = 128 // block
     layout = layout.type(torch.int32)
     superblocks = libtriton.superblock(layout.data_ptr(), layout.shape[0],
                                        layout.shape[1], layout.shape[2],
                                        start_width)
     luts, widths, packs = [], [], []
     for size, nnz in superblocks:
         nnz = nnz.reshape(-1, 4)
         width = nnz.shape[0] // (size * size)
         luts.append(torch.from_numpy(nnz).type(torch.int32).to(device))
         widths.append(width)
         packs.append(size)
     # create locks
     return luts, None, widths, packs
コード例 #3
0
ファイル: matmul.py プロジェクト: jareddk/triton
 def make_sdd_lut(layout, block, dtype, device):
   start_width = 128 // block
   superblocks = libtriton.superblock(layout.type(torch.int32), start_width)
   luts, widths, packs = [], [], []
   for size, nnz in superblocks:
     width = nnz.shape[0] // (size*size)
     h = nnz[:, 0]
     i = nnz[:, 1]
     j = nnz[:, 2]
     b = nnz[:, 3]
     lut = torch.stack((h, i, j, b), dim=1).view(-1).contiguous()
     luts.append(lut.type(torch.int32).to(device)) 
     widths.append(width)
     packs.append(size)
   # create locks
   return luts, None, widths, packs