def max_pool_forward_im2col(x, pool_param):
  """
  An implementation of the forward pass for max pooling based on im2col.

  This isn't much faster than the naive version, so it should be avoided if
  possible.
  """
  N, C, H, W = x.shape
  pool_height, pool_width = pool_param['pool_height'], pool_param['pool_width']
  stride = pool_param['stride']

  assert (H - pool_height) % stride == 0, 'Invalid height'
  assert (W - pool_width) % stride == 0, 'Invalid width'

  out_height = (H - pool_height) / stride + 1
  out_width = (W - pool_width) / stride + 1

  x_split = x.reshape(N * C, 1, H, W)
  x_cols = im2col(x_split, pool_height, pool_width, padding=0, stride=stride)
  x_cols_argmax = np.argmax(x_cols, axis=0)
  x_cols_max = x_cols[x_cols_argmax, np.arange(x_cols.shape[1])]
  out = x_cols_max.reshape(out_height, out_width, N, C).transpose(2, 3, 0, 1)

  cache = (x, x_cols, x_cols_argmax, pool_param)
  return out, cache
Ejemplo n.º 2
0
def max_pool_forward_im2col(x, pool_param):
    """
    An implementation of the forward pass for max pooling based on im2col.

    This isn't much faster than the naive version, so it should be avoided if
    possible.
    """
    N, C, H, W = x.shape
    pool_height, pool_width = pool_param['pool_height'], pool_param[
        'pool_width']
    stride = pool_param['stride']

    assert (H - pool_height) % stride == 0, 'Invalid height'
    assert (W - pool_width) % stride == 0, 'Invalid width'

    out_height = (H - pool_height) // stride + 1
    out_width = (W - pool_width) // stride + 1

    x_split = x.reshape(N * C, 1, H, W)
    x_cols = im2col(x_split, pool_height, pool_width, padding=0, stride=stride)
    x_cols_argmax = np.argmax(x_cols, axis=0)
    x_cols_max = x_cols[x_cols_argmax, np.arange(x_cols.shape[1])]
    out = x_cols_max.reshape(out_height, out_width, N, C).transpose(2, 3, 0, 1)

    cache = (x, x_cols, x_cols_argmax, pool_param)
    return out, cache
Ejemplo n.º 3
0
def conv_forward_naive(x, w, b, conv_param):
    """
    A naive implementation of the forward pass for a convolutional layer.

    The input consists of N data points, each with C channels, height H and
    width W. We convolve each input with F different filters, where each filter
    spans all C channels and has height HH and width HH.

    Input:
    - x: Input data of shape (N, C, H, W)
    - w: Filter weights of shape (FN, C, FH, FW)
    - b: Biases, of shape (FN,)
    - conv_param: A dictionary with the following keys:
      - 'stride': The number of pixels between adjacent receptive fields in the
        horizontal and vertical directions.
      - 'pad': The number of pixels that will be used to zero-pad the input.

    Returns a tuple of:
    - out: Output data, of shape (N, F, H', W') where H' and W' are given by
      H' = 1 + (H + 2 * pad - HH) / stride
      W' = 1 + (W + 2 * pad - WW) / stride
    - cache: (x, w, b, conv_param)
    """
    out = None
    cache = None
    ###########################################################################
    # TODO: Implement the convolutional forward pass.                         #
    # Hint: you can use the function np.pad for padding.                      #
    ###########################################################################
    stride = conv_param['stride']
    pad = conv_param['pad']

    N, C, H, W = x.shape
    FN, C, FH, FW = w.shape

    OH = int(1 + (H + 2 * pad - FH) / stride)
    OW = int(1 + (W + 2 * pad - FW) / stride)

    col = im2col(x, FH, FW, stride, pad)  # (N*OH*OW, C*FH*FW)
    col_w = w.reshape(FN, -1).T  # (C*FH*FW, FN)

    out = np.dot(col, col_w) + b  # (N*OH*OW, FN)
    out = out.reshape(N, OH, OW, FN).transpose(0, 3, 1, 2)

    ###########################################################################
    #                             END OF YOUR CODE                            #
    ###########################################################################
    cache = (x, w, b, col, col_w, conv_param)
    return out, cache
Ejemplo n.º 4
0
def max_pool_forward_naive(x, pool_param):
    """
    A naive implementation of the forward pass for a max pooling layer.

    Inputs:
    - x: Input data, of shape (N, C, H, W)
    - pool_param: dictionary with the following keys:
      - 'pool_height': The height of each pooling region
      - 'pool_width': The width of each pooling region
      - 'stride': The distance between adjacent pooling regions

    Returns a tuple of:
    - out: Output data
    - cache: (x, pool_param)
    """
    out = None
    ###########################################################################
    # TODO: Implement the max pooling forward pass                            #
    ###########################################################################
    N, C, H, W = x.shape
    FH, FW = pool_param['pool_height'], pool_param['pool_width']
    stride = pool_param['stride']
    pad = 0

    OH = int(1 + (H + 2 * pad - FH) / stride)
    OW = int(1 + (W + 2 * pad - FW) / stride)

    col = im2col(x, FH, FW, stride, pad)  # (N*OH*OW, C*FH*FW)
    col = col.reshape(-1, FH * FW)

    arg_max = np.argmax(col, axis=1)
    out = np.max(col, axis=1)
    out = out.reshape(N, OH, OW, -1).transpose(0, 3, 1, 2)

    ###########################################################################
    #                             END OF YOUR CODE                            #
    ###########################################################################
    cache = (x, pool_param, arg_max)
    return out, cache