Python im2col_indicesの例、cs231n.im2col.im2col_indices Pythonの例

コード例 #1

0

ファイルを表示

ファイル: layers.py プロジェクト: barryridge/Stanford-CS231n-Winter-2016

def conv_backward_naive(dout, cache):
  """
  A naive implementation of the backward pass for a convolutional layer.

  Inputs:
  - dout: Upstream derivatives.
  - cache: A tuple of (x, w, b, conv_param) as in conv_forward_naive

  Returns a tuple of:
  - dx: Gradient with respect to x
  - dw: Gradient with respect to w
  - db: Gradient with respect to b
  """
  dx, dw, db = None, None, None
  #############################################################################
  # TODO: Implement the convolutional backward pass.                          #
  #############################################################################
  x = cache[0]
  w = cache[1]
  b = cache[2]
  conv_param = cache[3]
  pad = conv_param['pad']
  stride = conv_param['stride']
  (N, C, H, W) = x.shape
  (F, C, HH, WW) = w.shape
  
  # Calculate x_col using the im2col helper function
  x_col = im2col.im2col_indices(x, field_height=HH, field_width=WW, padding=pad, stride=stride)
  
  # Calculate w_row using the im2col helper function
  w_row = im2col.im2col_indices(w, field_height=HH, field_width=WW, padding=0, stride=1)

  # Reshape the output gradient into col form
  dout_col = im2col.im2col_indices(dout, field_height=1, field_width=1, padding=0, stride=1)
  
  # Calculate and reshape the dx gradient
  dx_col = w_row.dot(dout_col)
  dx = im2col.col2im_indices(dx_col, x.shape, field_height=HH, field_width=WW, padding=pad, stride=stride)

  # Calculate and reshape the dw gradient
  dw_row = x_col.dot(dout_col.T)
  dw = im2col.col2im_indices(dw_row, w.shape, field_height=HH, field_width=WW, padding=0, stride=1)

  # Calculate and reshape the db gradient
  db = np.sum(dout_col, axis=1)
  
  pass
  #############################################################################
  #                             END OF YOUR CODE                              #
  #############################################################################
  return dx, dw, db

コード例 #2

0

ファイルを表示

ファイル: layers.py プロジェクト: barryridge/Stanford-CS231n-Winter-2016

def max_pool_backward_naive(dout, cache):
  """
  A naive implementation of the backward pass for a max pooling layer.

  Inputs:
  - dout: Upstream derivatives
  - cache: A tuple of (x, pool_param) as in the forward pass.

  Returns:
  - dx: Gradient with respect to x
  """
  dx = None
  #############################################################################
  # TODO: Implement the max pooling backward pass                             #
  #############################################################################
  x = cache[0]
  switches = cache[1]
  pool_param = cache[2]
  (N, C, H, W) = x.shape
  HH = pool_param['pool_height']
  WW = pool_param['pool_width']
  stride = pool_param['stride']
  
  # Calculate x_col using the im2col helper function
  x_col = im2col.im2col_indices(x, field_height=HH, field_width=WW, padding=0, stride=stride)
  
  # Reshape into pools over all channels
  # x_col_pools = x_col.T.reshape(-1, HH*WW).T
  x_col_pools = x_col.reshape(-1,HH*WW).T

  # Reshape the output gradient into col form
  dout_col = im2col.im2col_indices(dout, field_height=1, field_width=1, padding=0, stride=1)

  # Since we're taking the gradient of a max function,
  # we route the output gradient to the inputs that had
  # the max values on the forward pass using the cached switches.
  dx_col_pools = np.zeros(x_col_pools.shape)
  dx_col_pools[switches, np.arange(dx_col_pools.shape[-1])] = dout_col.T.flatten()

  # Reshape into col form
  dx_col = dx_col_pools.T.reshape(x_col.T.shape).T

  # Finally, reshape dx_col
  dx = im2col.col2im_indices(dx_col, x.shape, field_height=HH, field_width=WW, padding=0, stride=stride)

  pass
  #############################################################################
  #                             END OF YOUR CODE                              #
  #############################################################################
  return dx

コード例 #3

0

ファイルを表示

def max_pool_backward_naive(dout, cache):
    """
    A naive implementation of the backward pass for a max-pooling layer.

    Inputs:
    - dout: Upstream derivatives
    - cache: A tuple of (x, pool_param) as in the forward pass.

    Returns:
    - dx: Gradient with respect to x
    """
    dx = None
    ###########################################################################
    # TODO: Implement the max-pooling backward pass                           #
    ###########################################################################
    x, pool_param, mask = cache
    N, C, H, W = x.shape
    dx = np.zeros(x.shape)
    for i in range(N):
        for j in range(C):
            m_temp = im2col_indices(np.reshape(mask[i][j], (1, 1, H, W)),
                                    pool_param['pool_height'],
                                    pool_param['pool_width'], 0,
                                    pool_param['stride'])
            dout_temp = np.reshape(dout[i][j], (-1))
            dx[i, j, :, :] = col2im_indices(m_temp * dout_temp, (1, 1, H, W),
                                            pool_param['pool_height'],
                                            pool_param['pool_width'], 0,
                                            pool_param['stride'])

    pass
    ###########################################################################
    #                             END OF YOUR CODE                            #
    ###########################################################################
    return dx

コード例 #4

0

ファイルを表示

def conv_forward_naive(x, w, b, conv_param):
    """
    A naive implementation of the forward pass for a convolutional layer.

    The input consists of N data points, each with C channels, height H and
    width W. We convolve each input with F different filters, where each filter
    spans all C channels and has height HH and width WW.

    Input:
    - x: Input data of shape (N, C, H, W)
    - w: Filter weights of shape (F, C, HH, WW)
    - b: Biases, of shape (F,)
    - conv_param: A dictionary with the following keys:
      - 'stride': The number of pixels between adjacent receptive fields in the
        horizontal and vertical directions.
      - 'pad': The number of pixels that will be used to zero-pad the input. 
        

    During padding, 'pad' zeros should be placed symmetrically (i.e equally on both sides)
    along the height and width axes of the input. Be careful not to modfiy the original
    input x directly.

    Returns a tuple of:
    - out: Output data, of shape (N, F, H', W') where H' and W' are given by
      H' = 1 + (H + 2 * pad - HH) / stride
      W' = 1 + (W + 2 * pad - WW) / stride
    - cache: (x, w, b, conv_param)
    """
    out = None
    ###########################################################################
    # TODO: Implement the convolutional forward pass.                         #
    # Hint: you can use the function np.pad for padding.                      #
    ###########################################################################
    N, C, H, W = x.shape
    F, C, HH, WW = w.shape
    H_d = int(1 + (H + 2 * conv_param['pad'] - HH) / conv_param['stride'])
    W_d = int(1 + (W + 2 * conv_param['pad'] - WW) / conv_param['stride'])
    out = np.zeros((N, F, H_d, W_d))
    for i in range(N):
        x_temp = im2col_indices(x[i:i + 1], HH, WW, conv_param['pad'],
                                conv_param['stride'])
        for j in range(F):
            w_temp = np.reshape(w[j], (-1))
            out[i][j][:][:] = np.reshape(np.dot(w_temp, x_temp), (H_d, W_d))
            out[i][j][:][:] += b[j]
    pass
    ###########################################################################
    #                             END OF YOUR CODE                            #
    ###########################################################################
    cache = (x, w, b, conv_param)
    return out, cache

コード例 #5

0

ファイルを表示

def conv_forward_naive(x, w, b, conv_param):
    """
    A naive implementation of the forward pass for a convolutional layer.

    The input consists of N data points, each with C channels, height H and
    width W. We convolve each input with F different filters, where each filter
    spans all C channels and has height HH and width HH.

    Input:
    - x: Input data of shape (N, C, H, W)
    - w: Filter weights of shape (F, C, HH, WW)
    - b: Biases, of shape (F,)
    - conv_param: A dictionary with the following keys:
      - 'stride': The number of pixels between adjacent receptive fields in the
        horizontal and vertical directions.
      - 'pad': The number of pixels that will be used to zero-pad the input.

    Returns a tuple of:
    - out: Output data, of shape (N, F, H', W') where H' and W' are given by
      H' = 1 + (H + 2 * pad - HH) / stride
      W' = 1 + (W + 2 * pad - WW) / stride
    - cache: (x, w, b, conv_param)
    """
    from cs231n.im2col import im2col_indices
    out = None
    ###########################################################################
    # TODO: Implement the convolutional forward pass.                         #
    # Hint: you can use the function np.pad for padding.                      #
    ###########################################################################
    N, C, H, W = x.shape
    F, C, HH, WW = w.shape

    stride, pad = conv_param['stride'], conv_param['pad']

    H_new = 1 + (H + 2 * pad - HH) // stride
    W_new = 1 + (W + 2 * pad - WW) // stride

    # x_col has shape (HH*WW*C, N*H'*W')
    x_col = im2col_indices(x, HH, WW, padding=pad, stride=stride)
    # W_col : (F, C*HH*WW) matrix
    w_col = w.reshape(F, -1)

    out = np.dot(w_col, x_col) + b.reshape(F, 1)
    out = out.reshape(F, H_new, W_new, N)
    out = out.transpose(3, 0, 1, 2)

    ###########################################################################
    #                             END OF YOUR CODE                            #
    ###########################################################################
    cache = (x, w, b, conv_param)
    return out, cache

コード例 #6

0

ファイルを表示

ファイル: layers.py プロジェクト: barryridge/Stanford-CS231n-Winter-2016

def max_pool_forward_naive(x, pool_param):
  """
  A naive implementation of the forward pass for a max pooling layer.

  Inputs:
  - x: Input data, of shape (N, C, H, W)
  - pool_param: dictionary with the following keys:
    - 'pool_height': The height of each pooling region
    - 'pool_width': The width of each pooling region
    - 'stride': The distance between adjacent pooling regions

  Returns a tuple of:
  - out: Output data
  - cache: (x, pool_param)
  """
  out = None
  #############################################################################
  # TODO: Implement the max pooling forward pass                              #
  #############################################################################
  # Unpack params
  (N, C, H, W) = x.shape
  HH = pool_param['pool_height']
  WW = pool_param['pool_width']
  stride = pool_param['stride']

  # Calculate H' and W'
  H_ = 1 + (H - HH) / stride
  W_ = 1 + (W - WW) / stride

  # Calculate x_col using the im2col helper function
  x_col = im2col.im2col_indices(x, HH, WW, padding=0, stride=stride)

  # Reshape into pools over all channels
  x_col_pools = x_col.T.reshape(-1, HH*WW).T

  # Perform the max-pooling
  switches = np.argmax(x_col_pools, axis=0)
  out_maxpool = x_col_pools[switches, np.arange(x_col_pools.shape[-1])]

  # Reshape into columns
  out_maxpool_col = out_maxpool.reshape(-1, C).T

  # Reshape the output using the col2im helper function
  out = im2col.col2im_indices(out_maxpool_col, (N,C,H_,W_), field_height=1, field_width=1, padding=0, stride=1)

  pass
  #############################################################################
  #                             END OF YOUR CODE                              #
  #############################################################################
  cache = (x, switches, pool_param)
  return out, cache

コード例 #7

0

ファイルを表示

ファイル: layers.py プロジェクト: rudolfsteiner/cs231n

def max_pool_backward_naive(dout, cache):
    """
  A naive implementation of the backward pass for a max pooling layer.

  Inputs:
  - dout: Upstream derivatives
  - cache: A tuple of (x, pool_param) as in the forward pass.

  Returns:
  - dx: Gradient with respect to x
  """
    dx = None
    #############################################################################
    # TODO: Implement the max pooling backward pass                             #
    #############################################################################
    x, pool_param = cache

    pool_height = pool_param['pool_height']
    pool_width = pool_param['pool_width']
    stride = pool_param['stride']
    N, C, H, W = x.shape

    HH = (H - pool_height) // stride + 1
    WW = (W - pool_width) // stride + 1

    #k, i, j = get_im2col_indices(x.shape, pool_height, pool_width, 0, stride)

    #pool_col = x[:, k, i, j]
    pool_col = im2col_indices(x, pool_height, pool_width, 0, stride)

    pool_col = pool_col.reshape(C, pool_height * pool_width, -1).transpose(
        1, 0, 2).reshape(pool_height * pool_width, -1)

    pool_max_index = np.argmax(pool_col, axis=0)
    #print('pool_max_index', len(pool_max_index))
    dx_col = np.zeros(pool_col.shape)

    #print('dx_col.shape', dx_col.shape)
    #print('dout.shape--before', dout.shape)
    dout = dout.transpose(1, 2, 3, 0).reshape(dout.size)

    #print('dout.shape', dout.shape)
    dx_col[pool_max_index, range(len(pool_max_index))] = dout
    dx_col = dx_col.reshape(pool_height * pool_width, C, -1).transpose(
        1, 0, 2).reshape(C * pool_height * pool_width, -1)
    dx = col2im_indices(dx_col, x.shape, pool_height, pool_width, 0, stride)
    #print(dx)
    #############################################################################
    #                             END OF YOUR CODE                              #
    #############################################################################
    return dx

コード例 #8

0

ファイルを表示

def conv_forward_naive(x, w, b, conv_param):
    """
    A naive implementation of the forward pass for a convolutional layer.

    The input consists of N data points, each with C channels, height H and
    width W. We convolve each input with F different filters, where each filter
    spans all C channels and has height HH and width HH.

    Input:
    - x: Input data of shape (N, C, H, W)
    - w: Filter weights of shape (F, C, HH, WW)
    - b: Biases, of shape (F,)
    - conv_param: A dictionary with the following keys:
      - 'stride': The number of pixels between adjacent receptive fields in the
        horizontal and vertical directions.
      - 'pad': The number of pixels that will be used to zero-pad the input.

    Returns a tuple of:
    - out: Output data, of shape (N, F, H', W') where H' and W' are given by
      H' = 1 + (H + 2 * pad - HH) / stride
      W' = 1 + (W + 2 * pad - WW) / stride
    - cache: (x, w, b, conv_param)
    """
    out = None
    ###########################################################################
    # TODO: Implement the convolutional forward pass.                         #
    # Hint: you can use the function np.pad for padding.                      #
    ###########################################################################
    num_data, num_channels, im_height, im_width = x.shape
    num_filters, num_channels, filter_height, filter_width = w.shape
    pad, stride = conv_param['pad'], conv_param['stride']

    out_height = (im_height + 2 * pad - filter_height) // stride + 1
    out_width = (im_width + 2 * pad - filter_width) // stride + 1

    cols = im2col_indices(x,
                          filter_height,
                          filter_width,
                          padding=pad,
                          stride=stride)
    filter_matrix = w.reshape(num_filters, -1)

    out = np.reshape(
        filter_matrix.dot(cols) + b.reshape(-1, 1),
        (num_filters, out_height, out_width, num_data))
    out = out.transpose(3, 0, 1, 2)
    ###########################################################################
    #                             END OF YOUR CODE                            #
    ###########################################################################
    cache = (x, w, b, conv_param)
    return out, cache

コード例 #9

0

ファイルを表示

def max_pool_forward_naive(x, pool_param):
    """
    A naive implementation of the forward pass for a max-pooling layer.

    Inputs:
    - x: Input data, of shape (N, C, H, W)
    - pool_param: dictionary with the following keys:
      - 'pool_height': The height of each pooling region
      - 'pool_width': The width of each pooling region
      - 'stride': The distance between adjacent pooling regions

    No padding is necessary here. Output size is given by 

    Returns a tuple of:
    - out: Output data, of shape (N, C, H', W') where H' and W' are given by
      H' = 1 + (H - pool_height) / stride
      W' = 1 + (W - pool_width) / stride
    - cache: (x, pool_param)
    """
    out = None
    ###########################################################################
    # TODO: Implement the max-pooling forward pass                            #
    ###########################################################################
    N, C, H, W = x.shape
    H_d = int(1 + (H - pool_param['pool_height']) / pool_param['stride'])
    W_d = int(1 + (W - pool_param['pool_width']) / pool_param['stride'])
    out = np.zeros((N, C, H_d, W_d))
    mask = np.zeros(x.shape)
    for i in range(N):
        for j in range(C):
            x_temp = np.reshape(x[i][j], (1, 1, H, W))
            x_temp = im2col_indices(x_temp, pool_param['pool_height'],
                                    pool_param['pool_width'], 0,
                                    pool_param['stride'])
            out[i, j, :, :] = np.reshape(np.amax(x_temp, axis=0),
                                         (1, 1, H_d, W_d))
            inds = np.argmax(x_temp, axis=0)
            temp = np.zeros(x_temp.shape)
            temp[inds, range(H_d * W_d)] = 1
            mask[i, j, :, :] = col2im_indices(temp, (1, 1, H, W),
                                              pool_param['pool_height'],
                                              pool_param['pool_width'], 0,
                                              pool_param['stride'])
    pass
    ###########################################################################
    #                             END OF YOUR CODE                            #
    ###########################################################################
    cache = (x, pool_param, mask)
    return out, cache

コード例 #10

0

ファイルを表示

ファイル: layers.py プロジェクト: rudolfsteiner/cs231n

def conv_forward_naive(x, w, b, conv_param):
    """
  A naive implementation of the forward pass for a convolutional layer.

  The input consists of N data points, each with C channels, height H and width
  W. We convolve each input with F different filters, where each filter spans
  all C channels and has height HH and width HH.

  Input:
  - x: Input data of shape (N, C, H, W)
  - w: Filter weights of shape (F, C, HH, WW)
  - b: Biases, of shape (F,)
  - conv_param: A dictionary with the following keys:
    - 'stride': The number of pixels between adjacent receptive fields in the
      horizontal and vertical directions.
    - 'pad': The number of pixels that will be used to zero-pad the input.

  Returns a tuple of:
  - out: Output data, of shape (N, F, H', W') where H' and W' are given by
    H' = 1 + (H + 2 * pad - HH) / stride
    W' = 1 + (W + 2 * pad - WW) / stride
  - cache: (x, w, b, conv_param)
  """
    out = None
    #############################################################################
    # TODO: Implement the convolutional forward pass.                           #
    # Hint: you can use the function np.pad for padding.                        #
    #############################################################################

    stride = conv_param['stride']
    pad = conv_param['pad']

    N, C, H, W = x.shape
    F, C, HH, WW = w.shape
    Hdot = 1 + (H + 2 * pad - HH) // stride
    Wdot = 1 + (W + 2 * pad - WW) // stride
    x_col = im2col_indices(x, HH, WW, pad, stride)

    #w_transfer = w.transpose(0, 2,3,1).reshape(F, -1)
    w_transfer = w.reshape(F, -1)
    out_t = (np.dot(w_transfer, x_col).T + b).T
    #print('out_t.shape', out_t.shape)
    out = out_t.reshape(F, Hdot, Wdot, N).transpose(3, 0, 1, 2)

    #############################################################################
    #                             END OF YOUR CODE                              #
    #############################################################################
    cache = (x, w, b, conv_param)
    return out, cache

コード例 #11

0

ファイルを表示

def max_pool_forward_naive(x, pool_param):
    """
    A naive implementation of the forward pass for a max pooling layer.

    Inputs:
    - x: Input data, of shape (N, C, H, W)
    - pool_param: dictionary with the following keys:
      - 'pool_height': The height of each pooling region
      - 'pool_width': The width of each pooling region
      - 'stride': The distance between adjacent pooling regions

    Returns a tuple of:
    - out: Output data
    - cache: (x, pool_param)
    """
    from cs231n.im2col import im2col_indices
    out = None
    ###########################################################################
    # TODO: Implement the max pooling forward pass                            #
    ###########################################################################
    N, C, H, W = x.shape
    pool_height, pool_width, stride = pool_param['pool_height'], \
        pool_param['pool_width'], pool_param['stride']
    H_new = 1 + (H - pool_height) // stride
    W_new = 1 + (W - pool_width) // stride
    x_ = x.reshape(N * C, 1, H, W)
    # x_col has shape (C*HH*WW, N*H_new*W_new)
    x_col = im2col_indices(x_,
                           pool_height,
                           pool_width,
                           padding=0,
                           stride=stride)

    # out_has shape(1,N*H_new*W_new)
    out_ = np.max(x_col, axis=0)
    out_mask = np.zeros_like(x_col)
    out_index = np.argmax(x_col, axis=0)
    #print(out_mask.shape)
    out_mask[out_index, np.arange(H_new * W_new * N * C)] = 1
    out = out_.reshape(H_new, W_new, N, C).transpose(2, 3, 0, 1)

    pool_param['out_mask'] = out_mask

    #print(out_mask)
    ###########################################################################
    #                             END OF YOUR CODE                            #
    ###########################################################################
    cache = (x, pool_param)
    return out, cache

コード例 #12

0

ファイルを表示

ファイル: layers.py プロジェクト: jguoaj/deep-learning-cv

def conv_backward_naive(dout, cache):
    """
    A naive implementation of the backward pass for a convolutional layer.

    Inputs:
    - dout: Upstream derivatives of shape (N, C, H, W)
    - cache: A tuple of (x, w, b, conv_param) as in conv_forward_naive
    
    - x: Input data of shape (N, C, H, W)
    - w: Filter weights of shape (F, C, HH, WW)
    - b: Biases, of shape (F,)
    Returns a tuple of:
    - dx: Gradient with respect to x
    - dw: Gradient with respect to w
    - db: Gradient with respect to b
    """
    dx, dw, db = None, None, None
    ###########################################################################
    # TODO: Implement the convolutional backward pass.                        #
    ###########################################################################
    x, w, b, conv_param = cache
    N, C, H, W = x.shape
    F, _, HH, WW = w.shape
    _, _, h_out, w_out = dout.shape
    stride = conv_param['stride']
    pad = conv_param['pad']

    # dx = np.zeros_like(x)
    # dw = np.zeros_like(w)
    # db = np.zeros_like(b)
    db = np.sum(dout, axis=(0, 2, 3))

    dout = dout.transpose(1, 2, 3, 0)
    dout = dout.reshape(dout.shape[0], -1)
    import cs231n.im2col as im2col
    X_col = im2col.im2col_indices(x, HH, WW, padding=pad, stride=stride)
    W_col = w.reshape(F, -1)

    dw = np.dot(dout, X_col.T)
    dx = np.dot(W_col.T, dout)
    dw = dw.reshape(F, C, HH, WW)
    dx = im2col.col2im_indices(dx, x.shape, field_height=HH, field_width=WW, padding=pad, stride=stride)

    ###########################################################################
    #                             END OF YOUR CODE                            #
    ###########################################################################
    return dx, dw, db

コード例 #13

0

ファイルを表示

def max_pool_backward_naive(dout, cache):
    """
    A naive implementation of the backward pass for a max pooling layer.

    Inputs:
    - dout: Upstream derivatives
    - cache: A tuple of (x, pool_param) as in the forward pass.

    Returns:
    - dx: Gradient with respect to x
    """
    dx = None
    ###########################################################################
    # TODO: Implement the max pooling backward pass                           #
    ###########################################################################
    x, pool_param = cache
    num_data, num_channels, im_height, im_width = x.shape
    pool_height, pool_width, stride = (pool_param['pool_height'],
                                       pool_param['pool_width'],
                                       pool_param['stride'])
    out_height = (im_height - pool_height) // stride + 1
    out_width = (im_width - pool_width) // stride + 1

    x_reshaped = x.reshape(num_data * num_channels, 1, im_height, im_width)
    x_col = im2col_indices(x_reshaped,
                           pool_height,
                           pool_width,
                           padding=0,
                           stride=stride)

    dx_col = np.zeros(x_col.shape)
    max_idx = np.argmax(x_col, axis=0)
    dx_col[max_idx, np.arange(dout.size)] = dout.transpose(2, 3, 0, 1).ravel()
    dx = col2im_indices(dx_col,
                        (num_data * num_channels, 1, im_height, im_width),
                        pool_height,
                        pool_width,
                        padding=0,
                        stride=stride)
    dx = dx.reshape(x.shape)

    ###########################################################################
    #                             END OF YOUR CODE                            #
    ###########################################################################
    return dx

コード例 #14

0

ファイルを表示

ファイル: layers.py プロジェクト: rudolfsteiner/cs231n

def max_pool_forward_naive(x, pool_param):
    """
  A naive implementation of the forward pass for a max pooling layer.

  Inputs:
  - x: Input data, of shape (N, C, H, W)
  - pool_param: dictionary with the following keys:
    - 'pool_height': The height of each pooling region
    - 'pool_width': The width of each pooling region
    - 'stride': The distance between adjacent pooling regions

  Returns a tuple of:
  - out: Output data
  - cache: (x, pool_param)
  """
    out = None
    #############################################################################
    # TODO: Implement the max pooling forward pass                              #
    #############################################################################
    pool_height = pool_param['pool_height']
    pool_width = pool_param['pool_width']
    stride = pool_param['stride']
    N, C, H, W = x.shape

    HH = (H - pool_height) // stride + 1
    WW = (W - pool_width) // stride + 1

    #k, i, j = get_im2col_indices(x.shape, pool_height, pool_width, 0, stride)

    #pool_col = x[:, k, i, j]
    pool_col = im2col_indices(x, pool_height, pool_width, 0, stride)

    pool_col = pool_col.reshape(C, pool_height * pool_width, -1).transpose(
        1, 0, 2).reshape(pool_height * pool_width, -1)

    pool_max = np.amax(pool_col, axis=0)

    out = pool_max.reshape(C, HH, WW, N).transpose(3, 0, 1, 2)

    #############################################################################
    #                             END OF YOUR CODE                              #
    #############################################################################
    cache = (x, pool_param)
    return out, cache

コード例 #15

0

ファイルを表示

def conv_backward_naive(dout, cache):
    """
    A naive implementation of the backward pass for a convolutional layer.

    Inputs:
    - dout: Upstream derivatives.
    - cache: A tuple of (x, w, b, conv_param) as in conv_forward_naive

    Returns a tuple of:
    - dx: Gradient with respect to x
    - dw: Gradient with respect to w
    - db: Gradient with respect to b
    """
    dx, dw, db = None, None, None
    x, w, b, conv_param = cache
    N, C, H, W = x.shape
    f_filter, c_fitler, h_filter, w_filter = w.shape
    ###########################################################################
    # TODO: Implement the convolutional backward pass.                        #
    ###########################################################################
    db = np.sum(dout, axis=(0, 2, 3))

    x_col = im2col.im2col_indices(x,
                                  h_filter,
                                  w_filter,
                                  padding=conv_param['pad'],
                                  stride=conv_param['stride'])
    dout_reshaped = dout.transpose(1, 2, 3, 0).reshape(f_filter, -1)
    dw = np.dot(dout_reshaped, x_col.T)
    dw = dw.reshape(w.shape)

    w_shape = w.reshape(f_filter, -1)
    dX_col = np.dot(w_shape.T, dout_reshaped)
    dx = im2col.col2im_indices(dX_col,
                               x.shape,
                               h_filter,
                               w_filter,
                               padding=conv_param['pad'],
                               stride=conv_param['stride'])

    ###########################################################################
    #                             END OF YOUR CODE                            #
    ###########################################################################
    return dx, dw, db

コード例 #16

0

ファイルを表示

def conv_backward_naive(dout, cache):
    """
    A naive implementation of the backward pass for a convolutional layer.

    Inputs:
    - dout: Upstream derivatives.
    - cache: A tuple of (x, w, b, conv_param) as in conv_forward_naive

    Returns a tuple of:
    - dx: Gradient with respect to x
    - dw: Gradient with respect to w
    - db: Gradient with respect to b
    """
    dx, dw, db = None, None, None
    ###########################################################################
    # TODO: Implement the convolutional backward pass.                        #
    ###########################################################################
    x, w, b, conv_param = cache
    pad, stride = conv_param['pad'], conv_param['stride']
    num_data = dout.shape[0]
    num_filters, num_channels, filter_height, filter_width = w.shape

    dout_matrix = dout.transpose(1, 2, 3, 0).reshape(num_filters, -1)
    cols = im2col_indices(x,
                          filter_height,
                          filter_width,
                          padding=pad,
                          stride=stride)
    filter_matrix = w.reshape(num_filters, -1)

    db = np.sum(dout, axis=(0, 2, 3))
    dw = np.reshape(dout_matrix.dot(cols.T), w.shape)
    dx_col = filter_matrix.T.dot(dout_matrix)
    dx = col2im_indices(dx_col,
                        x.shape,
                        filter_height,
                        filter_width,
                        padding=pad,
                        stride=stride)

    ###########################################################################
    #                             END OF YOUR CODE                            #
    ###########################################################################
    return dx, dw, db

コード例 #17

0

ファイルを表示

def max_pool_forward_naive(x, pool_param):
    """
  A naive implementation of the forward pass for a max pooling layer.

  Inputs:
  - x: Input data, of shape (N, C, H, W)
  - pool_param: dictionary with the following keys:
    - 'pool_height': The height of each pooling region
    - 'pool_width': The width of each pooling region
    - 'stride': The distance between adjacent pooling regions

  Returns a tuple of:
  - out: Output data
  - cache: (x, pool_param)
  """
    #############################################################################
    # TODO: Implement the max pooling forward pass                              #
    #############################################################################
    HH, WW = pool_param['pool_height'], pool_param['pool_width']
    stride = pool_param['stride']

    N, C, H, W = x.shape
    H_, W_ = (H - HH) / stride + 1, (W - WW) / stride + 1

    x_col = im2col_indices(x.reshape(N * C, 1, H, W),
                           HH,
                           WW,
                           padding=0,
                           stride=stride)

    switches = np.argmax(x_col, axis=0)
    out = np.choose(switches, x_col).reshape(H_, W_, N,
                                             C).transpose(2, 3, 0, 1)

    out = np.zeros((N, C, H_, W_))
    for i, ii in enumerate(xrange(0, H - HH + 1, stride)):
        for j, jj in enumerate(xrange(0, W - WW + 1, stride)):
            out[:, :, i, j] = np.max(x[:, :, ii:ii + HH, jj:jj + WW],
                                     axis=(2, 3))
    #############################################################################
    #                             END OF YOUR CODE                              #
    #############################################################################
    cache = (x, pool_param, x_col, switches)
    return out, cache

コード例 #18

0

ファイルを表示

def max_pool_backward_naive(dout, cache):
    """
    A naive implementation of the backward pass for a max pooling layer.

    Inputs:
    - dout: Upstream derivatives
    - cache: A tuple of (x, pool_param) as in the forward pass.

    Returns:
    - dx: Gradient with respect to x
    """
    dx = None
    ###########################################################################
    # TODO: Implement the max pooling backward pass                           #
    ###########################################################################
    x, pool_param = cache
    N, C, H, W = x.shape
    x_reshaped = x.reshape(N * C, 1, H, W)
    h_out = int(1 +
                (H + 2 * 0 - pool_param['pool_height']) / pool_param['stride'])
    w_out = int(1 +
                (W + 2 * 0 - pool_param['pool_width']) / pool_param['stride'])
    x_col = im2col.im2col_indices(x_reshaped,
                                  pool_param['pool_height'],
                                  pool_param['pool_width'],
                                  padding=0,
                                  stride=pool_param['stride'])
    max_idx = np.argmax(x_col, axis=0)
    dx_col = np.zeros_like(x_col)
    dout_ = dout.transpose(2, 3, 0, 1).ravel()
    dx_col[max_idx, range(max_idx.size)] = dout_
    dx = im2col.col2im_indices(dx_col, (N * C, 1, H, W),
                               pool_param['pool_height'],
                               pool_param['pool_width'],
                               padding=0,
                               stride=pool_param['stride'])
    dx = dx.reshape(x.shape)
    ###########################################################################
    #                             END OF YOUR CODE                            #
    ###########################################################################
    return dx

コード例 #19

0

ファイルを表示

def max_pool_forward_naive(x, pool_param):
    """
    A naive implementation of the forward pass for a max pooling layer.

    Inputs:
    - x: Input data, of shape (N, C, H, W)
    - pool_param: dictionary with the following keys:
      - 'pool_height': The height of each pooling region
      - 'pool_width': The width of each pooling region
      - 'stride': The distance between adjacent pooling regions

    Returns a tuple of:
    - out: Output data
    - cache: (x, pool_param)
    """
    out = None
    ###########################################################################
    # TODO: Implement the max pooling forward pass                            #
    ###########################################################################
    num_data, num_channels, im_height, im_width = x.shape
    pool_height, pool_width, stride = (pool_param['pool_height'],
                                       pool_param['pool_width'],
                                       pool_param['stride'])
    out_height = (im_height - pool_height) // stride + 1
    out_width = (im_width - pool_width) // stride + 1

    x_reshaped = x.reshape(num_data * num_channels, 1, im_height, im_width)
    x_col = im2col_indices(x_reshaped,
                           pool_height,
                           pool_width,
                           padding=0,
                           stride=stride)
    out = np.amax(x_col, axis=0)
    out = out.reshape(out_height, out_width, num_data, num_channels)
    out = out.transpose(2, 3, 0, 1)

    ###########################################################################
    #                             END OF YOUR CODE                            #
    ###########################################################################
    cache = (x, pool_param)
    return out, cache

コード例 #20

0

ファイルを表示

ファイル: layers.py プロジェクト: rudolfsteiner/cs231n

def conv_backward_naive(dout, cache):
    """
  A naive implementation of the backward pass for a convolutional layer.

  Inputs:
  - dout: Upstream derivatives.
  - cache: A tuple of (x, w, b, conv_param) as in conv_forward_naive

  Returns a tuple of:
  - dx: Gradient with respect to x
  - dw: Gradient with respect to w
  - db: Gradient with respect to b
  """
    dx, dw, db = None, None, None
    #############################################################################
    # TODO: Implement the convolutional backward pass.                          #
    #############################################################################
    x, w, b, conv_param = cache
    stride = conv_param['stride']
    pad = conv_param['pad']

    #print('pad', pad)
    N, C, H, W = x.shape
    F, C, HH, WW = w.shape
    #Hdot = 1 + (H+2*pad - HH) // stride
    #Wdot = 1 + (W + 2*pad -WW) // stride
    x_col = im2col_indices(x, HH, WW, pad, stride)
    #print('x_col shape', x_col.shape)
    w_transfer = w.reshape(F, -1)
    dout_t = dout.transpose(1, 2, 3, 0).reshape(F, -1)

    dx_col = np.dot(w_transfer.T, dout_t)
    dx = col2im_indices(dx_col, x.shape, HH, WW, pad, stride)

    dw_transfer = np.dot(dout_t, x_col.T)
    dw = dw_transfer.reshape(w.shape)
    db = np.sum(dout_t, axis=1)
    #############################################################################
    #                             END OF YOUR CODE                              #
    #############################################################################
    return dx, dw, db

コード例 #21

0

ファイルを表示

def max_pool_forward_naive(x, pool_param):
    """
    A naive implementation of the forward pass for a max pooling layer.

    Inputs:
    - x: Input data, of shape (N, C, H, W)
    - pool_param: dictionary with the following keys:
      - 'pool_height': The height of each pooling region
      - 'pool_width': The width of each pooling region
      - 'stride': The distance between adjacent pooling regions

    Returns a tuple of:
    - out: Output data
    - cache: (x, pool_param)
    """
    out = None
    ###########################################################################
    # TODO: Implement the max pooling forward pass                            #
    ###########################################################################
    N, C, H, W = x.shape
    x_reshaped = x.reshape(N * C, 1, H, W)
    h_out = int(1 +
                (H + 2 * 0 - pool_param['pool_height']) / pool_param['stride'])
    w_out = int(1 +
                (W + 2 * 0 - pool_param['pool_width']) / pool_param['stride'])
    x_col = im2col.im2col_indices(x_reshaped,
                                  pool_param['pool_height'],
                                  pool_param['pool_width'],
                                  padding=0,
                                  stride=pool_param['stride'])
    max_idx = np.argmax(x_col, axis=0)
    out = x_col[max_idx, range(max_idx.size)]
    out = out.reshape(h_out, w_out, N, C)
    out = out.transpose(2, 3, 0, 1)

    ###########################################################################
    #                             END OF YOUR CODE                            #
    ###########################################################################
    cache = (x, pool_param)
    return out, cache

コード例 #22

0

ファイルを表示

def conv_backward_naive(dout, cache):
    """
    A naive implementation of the backward pass for a convolutional layer.

    Inputs:
    - dout: Upstream derivatives.
    - cache: A tuple of (x, w, b, conv_param) as in conv_forward_naive

    Returns a tuple of:
    - dx: Gradient with respect to x
    - dw: Gradient with respect to w
    - db: Gradient with respect to b
    """
    from cs231n.im2col import im2col_indices, col2im_indices
    x, w, b, conv_param = cache
    stride, pad = conv_param['stride'], conv_param['pad']
    F, C, HH, WW = w.shape
    N, C, H, W = x.shape
    dx, dw, db = None, None, None
    ###########################################################################
    # TODO: Implement the convolutional backward pass.                        #
    ###########################################################################
    db = np.sum(dout, axis=(0, 2, 3))
    # dout has shape (N, F, H_new, W_new)
    # dout_reshape has shape (F, H_new*W_new*N)
    dout_reshape = dout.transpose(1, 2, 3, 0).reshape((F, -1))

    # x_col has shape (C*HH*WW, N*H_new*W_new)
    x_col = im2col_indices(x, HH, WW, padding=pad, stride=stride)

    dw_ = np.dot(dout_reshape, x_col.transpose())
    dw = dw_.reshape(F, C, HH, WW)
    w_reshape = w.reshape(F, -1)
    dx_ = np.dot(w_reshape.transpose(), dout_reshape)
    dx = col2im_indices(dx_, x.shape, HH, WW, padding=pad, stride=stride)
    ###########################################################################
    #                             END OF YOUR CODE                            #
    ###########################################################################
    return dx, dw, db

コード例 #23

0

ファイルを表示

ファイル: layers.py プロジェクト: jguoaj/deep-learning-cv

def conv_forward_naive(x, w, b, conv_param):
    """
    A naive implementation of the forward pass for a convolutional layer.

    The input consists of N data points, each with C channels, height H and
    width W. We convolve each input with F different filters, where each filter
    spans all C channels and has height HH and width HH.

    Input:
    - x: Input data of shape (N, C, H, W)
    - w: Filter weights of shape (F, C, HH, WW)
    - b: Biases, of shape (F,)
    - conv_param: A dictionary with the following keys:
      - 'stride': The number of pixels between adjacent receptive fields in the
        horizontal and vertical directions.
      - 'pad': The number of pixels that will be used to zero-pad the input.

    Returns a tuple of:
    - out: Output data, of shape (N, F, H', W') where H' and W' are given by
      H' = 1 + (H + 2 * pad - HH) / stride
      W' = 1 + (W + 2 * pad - WW) / stride
    - cache: (x, w, b, conv_param)
    """
    out = None
    ###########################################################################
    # TODO: Implement the convolutional forward pass.                         #
    # Hint: you can use the function np.pad for padding.                      #
    ###########################################################################
    ### reference: https://wiseodd.github.io/techblog/2016/07/16/convnet-conv-layer/
    # eg. x_shape = (2, 3, 4, 4), X_col.shape = (3*4*4, 2*(h_out*w_out))
    # eg. w_shape = (3, 3, 4, 4), W_col.shape = (3, 3*4*4)
    stride = conv_param['stride']
    pad = conv_param['pad']
    N, C, H, W = x.shape
    F, _, HH, WW = w.shape
    h_out = 1 + (H + 2 * pad - HH) // stride
    w_out = 1 + (W + 2 * pad - WW) // stride
    assert (H + 2 * pad - HH) % stride == 0
    assert (W + 2 * pad - WW) % stride == 0

    import cs231n.im2col as im2col
    X_col = im2col.im2col_indices(x, HH, WW, padding=pad, stride=stride)
    W_col = w.reshape(F, -1)

    # print('X_col shape is: ', X_col.shape): (48, 8)
    # print('W_col shape is: ', W_col.shape): (3, 48)
    out = np.dot(W_col, X_col) + b.reshape(b.shape[0],1)  # out.shape = (3, 8)
    out = out.reshape(F, h_out, w_out, N)                 # out.shape = (3, 2, 2, 2)
    out = out.transpose(3, 0, 1, 2)                       # be careful of the reshape and transpose

    ### naive loop implementation
    # stride = conv_param['stride']
    # pad = conv_param['pad']
    # N, C, H, W = x.shape
    # F, _, HH, WW = w.shape
    # assert (H + 2 * pad - HH) % stride == 0
    # assert (W + 2 * pad - WW) % stride == 0
    # H_prime = 1 + (H + 2 * pad - HH) // stride
    # W_prime = 1 + (W + 2 * pad - WW) // stride
    # out = np.zeros((N, F, H_prime, W_prime))
    # for n in range(N):
    #     x_pad = np.pad(x[n,:,:,:], ((0,0),(pad,pad),(pad,pad)), 'constant')
    #     for f in range(F):
    #         for h_prime in range(H_prime):
    #             for w_prime in range(W_prime):
    #                 h1 = h_prime*stride
    #                 h2 = h_prime*stride + HH
    #                 w1 = w_prime*stride
    #                 w2 = w_prime*stride + WW
    #                 window = x_pad[:, h1:h2, w1:w2]
    #                 out[n,f,h_prime,w_prime] = np.sum(window * w[f]) + b[f]

    ###########################################################################
    #                             END OF YOUR CODE                            #
    ###########################################################################
    cache = (x, w, b, conv_param)
    return out, cache

コード例 #24

0

ファイルを表示

ファイル: layers.py プロジェクト: barryridge/Stanford-CS231n-Winter-2016

def conv_forward_naive(x, w, b, conv_param):
  """
  A naive implementation of the forward pass for a convolutional layer.

  The input consists of N data points, each with C channels, height H and width
  W. We convolve each input with F different filters, where each filter spans
  all C channels and has height HH and width HH.

  Input:
  - x: Input data of shape (N, C, H, W)
  - w: Filter weights of shape (F, C, HH, WW)
  - b: Biases, of shape (F,)
  - conv_param: A dictionary with the following keys:
    - 'stride': The number of pixels between adjacent receptive fields in the
      horizontal and vertical directions.
    - 'pad': The number of pixels that will be used to zero-pad the input.

  Returns a tuple of:
  - out: Output data, of shape (N, F, H', W') where H' and W' are given by
    H' = 1 + (H + 2 * pad - HH) / stride
    W' = 1 + (W + 2 * pad - WW) / stride
  - cache: (x, w, b, conv_param)
  """
  out = None
  #############################################################################
  # TODO: Implement the convolutional forward pass.                           #
  # Hint: you can use the function np.pad for padding.                        #
  #############################################################################

  # Unpack params
  pad = conv_param['pad']
  stride = conv_param['stride']
  (N, C, H, W) = x.shape
  (F, C, HH, WW) = w.shape

  # Calculate H' and W'
  H_ = 1 + (H + 2 * pad - HH) / stride
  W_ = 1 + (W + 2 * pad - WW) / stride

  # TODO: Add some exception throwing here if H_ and W_ are not ints

  # Calculate x_col using the im2col helper function
  x_col = im2col.im2col_indices(x, HH, WW, padding=pad, stride=stride)

  # Calculate w_row using the im2col helper function
  w_row = im2col.im2col_indices(w, HH, WW, padding=0, stride=1)

  # Pad out x_col with ones so we can use the bias trick
  x_col_1 = np.vstack((x_col, np.ones(x_col.shape[-1])))

  # Pad out w_row with the bias term b
  w_row_1 = np.vstack((w_row, b))

  # Perform the convolution 
  out_ = np.dot(w_row_1.T, x_col_1)

  # Reshape the output using the col2im helper function
  out = im2col.col2im_indices(out_, (N,F,H_,W_), field_height=1, field_width=1, padding=0, stride=1)

  pass
  #############################################################################
  #                             END OF YOUR CODE                              #
  #############################################################################
  cache = (x, w, b, conv_param)
  return out, cache

コード例 #25

0

ファイルを表示

def conv_backward_naive(dout, cache):
    """
    A naive implementation of the backward pass for a convolutional layer.

    Inputs:
    - dout: Upstream derivatives.
    - cache: A tuple of (x, w, b, conv_param) as in conv_forward_naive

    Returns a tuple of:
    - dx: Gradient with respect to x
    - dw: Gradient with respect to w
    - db: Gradient with respect to b
    """
    dx, dw, db = None, None, None
    ###########################################################################
    # TODO: Implement the convolutional backward pass.                        #
    ###########################################################################
    x, w, b, conv_param = cache
    N, C, H, W = x.shape
    F, C, HH, WW = w.shape
    N, F, H_d, W_d = dout.shape
    pad = conv_param['pad']
    stride = conv_param['stride']
    dw = np.zeros(w.shape)
    dx_t = np.zeros((N, C, H + 2 * pad, W + 2 * pad))
    dx = np.zeros(x.shape)
    db = np.zeros(b.shape)

    for j in range(F):
        for i in range(N):
            for k in range(C):
                dout_temp = dout[i][j]
                temp = np.zeros(stride * np.array(dout_temp.shape) - stride +
                                1,
                                dtype=dout_temp.dtype)
                temp[::stride, ::stride] = dout_temp
                dout_temp = temp
                x_temp = np.reshape(x[i][k], (1, 1, H, W))
                x_temp = im2col_indices(x_temp, dout_temp.shape[0],
                                        dout_temp.shape[1], pad, 1)
                dout_temp = np.reshape(dout_temp, (-1))
                dw[j][k] += np.reshape(np.dot(dout_temp, x_temp), (HH, WW))

    for i in range(N):
        for k in range(C):
            for j in range(F):
                dout_temp = np.reshape(dout[i][j][:][:], (1, 1, H_d, W_d))
                temp = np.zeros(stride * np.array(dout_temp.shape) - stride +
                                1,
                                dtype=dout_temp.dtype)
                temp[:, :, ::stride, ::stride] = dout_temp
                dout_temp = temp

                w_temp = w[j][k]
                w_temp = np.fliplr(np.flipud(w_temp))

                dout_temp = im2col_indices(dout_temp, w_temp.shape[0],
                                           w_temp.shape[1],
                                           w_temp.shape[0] - 1, 1)
                w_temp = np.reshape(w_temp, (-1))
                dx_t[i][k] += np.reshape(np.dot(w_temp, dout_temp),
                                         (H + 2 * pad, W + 2 * pad))

    dx = dx_t[:, :, pad:H + pad, pad:W + pad]

    for j in range(F):
        db[j] = np.sum(dout[:, j, :, :])

    pass
    ###########################################################################
    #                             END OF YOUR CODE                            #
    ###########################################################################
    return dx, dw, db