Exemple #1
0
def add_row_sum_to_vec(vec, mat, alpha=1.0, beta=1.0):
    '''
  This function would sum up the element int a matrix row and store the result to
  the corresponding position of the vec
  Unlike other function that only provide small computation, this function raise the
  upper bound for the number of column to 2^16, actually it could be 2^20
  '''
    timer.start()
    mh, mw = mat.shape
    vh, vw = vec.shape
    assert (vw == 1 and vh == mh or vh == 1 and vw == mh)
    if mw != 1:
        cudaconv2.sum(mat, 1, vec)
    else:
        gpu_partial_copy_to(mat, vec, 0, mh, 0, 1)
    # if mat.shape[1] <= INTERNAL_SIZE:
    #  grid = (1, mh)
    #  block = (mw, 1,  1)
    #  leading = mat.strides[0] /4
    #  _add_row_sum_to_vec_(mat, F(alpha), vec, F(beta),I(leading), I(mh), I(mw), block = block, grid= grid)
    # else:
    #  block = (INTERNAL_SIZE, 1, 1)
    #  grid = (divup(mw, INTERNAL_SIZE), mh)
    #  #tmp  = gpuarray.to_gpu(np.zeros((mh, divup(mw, INTERNAL_SIZE)) ).astype(np.float32))
    #  tmp = gpuarray.zeros((mh, divup(mw, INTERNAL_SIZE)), dtype=np.float32)
    #  #print 'TOGPU', tmp.shape

    #  leading = mat.strides[0]/4
    #  _add_row_sum_to_vec_(mat, F(alpha), tmp, F(beta), I(leading), I(mh),I(mw), block = block, grid = grid)
    #  add_row_sum_to_vec(vec, tmp)
    timer.end('add_row_sum_to_vec')
Exemple #2
0
def add_row_sum_to_vec(vec, mat, alpha = 1.0, beta = 1.0):
  '''
  This function would sum up the element int a matrix row and store the result to
  the corresponding position of the vec
  Unlike other function that only provide small computation, this function raise the
  upper bound for the number of column to 2^16, actually it could be 2^20
  '''
  timer.start()
  mh, mw = mat.shape
  vh, vw = vec.shape
  assert(vw == 1 and vh == mh or vh == 1 and vw == mh)
  cudaconv2.sum(mat, 1, vec)
  #if mat.shape[1] <= INTERNAL_SIZE:
  #  grid = (1, mh)
  #  block = (mw, 1,  1)
  #  leading = mat.strides[0] /4
  #  _add_row_sum_to_vec_(mat, F(alpha), vec, F(beta),I(leading), I(mh), I(mw), block = block, grid= grid)
  #else:
  #  block = (INTERNAL_SIZE, 1, 1)
  #  grid = (ceil(mw, INTERNAL_SIZE), mh)
  #  #tmp  = gpuarray.to_gpu(np.zeros((mh, ceil(mw, INTERNAL_SIZE)) ).astype(np.float32))
  #  tmp = gpuarray.zeros((mh, ceil(mw, INTERNAL_SIZE)), dtype=np.float32)
  #  #print 'TOGPU', tmp.shape

  #  leading = mat.strides[0]/4
  #  _add_row_sum_to_vec_(mat, F(alpha), tmp, F(beta), I(leading), I(mh),I(mw), block = block, grid = grid)
  #  add_row_sum_to_vec(vec, tmp)
  timer.end('add_row_sum_to_vec')
Exemple #3
0
def convWeightActs(input, ingrad, weight_grad, bias_grad, padding, stride, color, *args):
  image_y = input.shape[ConvDataLayout.HEIGHT]
  output_y =  ingrad.shape[ConvDataLayout.HEIGHT]
  output_x =  ingrad.shape[ConvDataLayout.WIDTH]
  filter_size =  weight_grad.shape[FilterLayout.HEIGHT]
  color = input.shape[ConvDataLayout.CHANNEL]
  cudaconv2.convWeightActs(input, ingrad, weight_grad, image_y, output_y, output_x, filter_size, padding, stride, color, 1, 0)

  batch_size = ingrad.shape[ConvDataLayout.BATCH]
  channel = ingrad.shape[ConvDataLayout.CHANNEL]

  cudaconv2.sum(ingrad.reshape((channel, output_y * output_x * batch_size)), 1, bias_grad)
Exemple #4
0
def add_col_sum_to_vec(vec, mat, alpha=1.0, beta=1.0):
  '''
  This function would sum up the element int a matrix column and store the result to
  the corresponding position of the vec
  ONLY work on small matrix
  Small means the row of the matrix is up to 1024
  and the column, seams like a little big, can be 2048, but the upper bound has  not been tested
  '''
  mh, mw = mat.shape
  vh, vw = vec.shape
  assert(vw == 1 and vh == mw or vh == 1 and vw == mw)

  cudaconv2.sum(mat, 0, vec)
def add_col_sum_to_vec(vec, mat, alpha=1.0, beta=1.0):
    '''
  This function would sum up the element int a matrix column and store the result to
  the corresponding position of the vec
  ONLY work on small matrix
  Small means the row of the matrix is up to 1024
  and the column, seams like a little big, can be 2048, but the upper bound has  not been tested
  '''
    mh, mw = mat.shape
    vh, vw = vec.shape
    assert (vw == 1 and vh == mw or vh == 1 and vw == mw)

    cudaconv2.sum(mat, 0, vec)
Exemple #6
0
def add_row_sum_to_vec(vec, mat, alpha=1.0, beta=1.0):
  '''
  This function would sum up the element int a matrix row and store the result to
  the corresponding position of the vec
  Unlike other function that only provide small computation, this function raise the
  upper bound for the number of column to 2^16, actually it could be 2^20
  '''
  mh, mw = mat.shape
  vh, vw = vec.shape
  assert(vw == 1 and vh == mh or vh == 1 and vw == mh)
  if mw != 1:
    cudaconv2.sum(mat, 1, vec)
  else:
    gpu_partial_copy_to(mat, vec, 0, mh, 0, 1)
def add_row_sum_to_vec(vec, mat, alpha=1.0, beta=1.0):
    '''
  This function would sum up the element int a matrix row and store the result to
  the corresponding position of the vec
  Unlike other function that only provide small computation, this function raise the
  upper bound for the number of column to 2^16, actually it could be 2^20
  '''
    mh, mw = mat.shape
    vh, vw = vec.shape
    assert (vw == 1 and vh == mh or vh == 1 and vw == mh)
    if mw != 1:
        cudaconv2.sum(mat, 1, vec)
    else:
        gpu_partial_copy_to(mat, vec, 0, mh, 0, 1)
Exemple #8
0
def add_col_sum_to_vec(vec, mat, alpha=1.0, beta=1.0):
  '''
  This function would sum up the element int a matrix column and store the result to
  the corresponding position of the vec
  ONLY work on small matrix
  Small means the row of the matrix is up to 1024
  and the column, seams like a little big, can be 2048, but the upper bound has  not been tested
  '''
  timer.start()
  mh, mw = mat.shape
  vh, vw = vec.shape
  assert(vw == 1 and vh == mw or vh == 1 and vw == mw)

  cudaconv2.sum(mat, 0, vec)
  #grid = (mw, 1)
  #block = (1, mh, 1)
  #leading = mat.strides[0] / 4
  #_add_col_sum_to_vec_(mat, F(alpha), vec, F(beta), I(leading), I(mh), I(mw), block=block, grid=grid)
  timer.end('add_col_sum_to_vec')
Exemple #9
0
def add_col_sum_to_vec(vec, mat, alpha=1.0, beta=1.0):
    '''
  This function would sum up the element int a matrix column and store the result to
  the corresponding position of the vec
  ONLY work on small matrix
  Small means the row of the matrix is up to 1024
  and the column, seams like a little big, can be 2048, but the upper bound has  not been tested
  '''
    timer.start()
    mh, mw = mat.shape
    vh, vw = vec.shape
    assert (vw == 1 and vh == mw or vh == 1 and vw == mw)

    cudaconv2.sum(mat, 0, vec)
    #grid = (mw, 1)
    #block = (1, mh, 1)
    #leading = mat.strides[0] / 4
    #_add_col_sum_to_vec_(mat, F(alpha), vec, F(beta), I(leading), I(mh), I(mw), block=block, grid=grid)
    timer.end('add_col_sum_to_vec')