Example #1
0
def cost(theta, input_size, hidden_size, num_classes, netconfig, lamb, data, labels):
  # We first extract the part which compute the softmax gradient
  softmax_theta = theta[:hidden_size * num_classes].reshape([num_classes, hidden_size], order='F')

  # Extract out the "stack"
  stack = params2stack(theta[hidden_size * num_classes:], netconfig)
  depth = len(stack)
  num_cases = data.shape[1]
  ground_truth = np.zeros([num_classes, num_cases])
  ground_truth[labels.ravel(), np.arange(num_cases)] = 1

  # Compute the cost function and gradient vector for the stacked
  # autoencoder.
  #
  # `stack` is a cell-array of the weights and biases for every
  # layer. In particular, the weights of layer d are `stack[d].w` and
  # the biases are `stack[d].b`.
  #
  # The last layer of the network is connected to the softmax
  # classification layer, `softmax_theta`.
  #
  # Compute the gradients for the softmax_theta, storing that in
  # `softmax_theta_grad`. Similarly, compute the gradients for each
  # layer in the stack, storing the gradients in `stack_grad[d].w` and
  # `stack_grad[d].b`.  Note that the size of the matrices in stackgrad
  # should match exactly that of the size of the matrices in stack.
  z = [0]
  a = [data]

  for layer in xrange(depth):
    z.append(stack[layer].w.dot(a[layer]) + stack[layer].b)
    a.append(autoencoder.sigmoid(z[layer+1]))

  M = softmax_theta.dot(a[depth])
  M = M - M.max(0)
  p = np.exp(M) / np.exp(M).sum(0)

  gt_vec = ground_truth.reshape([1, -1], order='F')
  p_vec = p.reshape([-1, 1], order='F')
  cost = (-1.0/num_cases * gt_vec.dot(np.log(p_vec)) + lamb/2 * (softmax_theta**2).sum())
  softmax_theta_grad = -1.0/num_cases * (ground_truth - p).dot(a[depth].T) + lamb * softmax_theta

  d = [0 for _ in xrange(depth+1)]

  d[depth] = -(softmax_theta.T.dot(ground_truth - p)) * a[depth] * (1-a[depth])

  for layer in range(depth-1, 0, -1):
    d[layer] = stack[layer].w.T.dot(d[layer+1]) * a[layer] * (1-a[layer])

  stack_grad = [util.Empty() for _ in xrange(depth)]
  for layer in range(depth-1, -1, -1):
    stack_grad[layer].w = (1.0/num_cases) * d[layer+1].dot(a[layer].T)
    stack_grad[layer].b = (1.0/num_cases) * np.sum(d[layer+1], 1)[:, np.newaxis]

  grad = np.append(softmax_theta_grad.ravel('F'), stack2params(stack_grad)[0])

  assert (grad.shape==theta.shape)
  assert grad.flags['F_CONTIGUOUS']
  return cost, grad
Example #2
0
def predict(theta, input_size, hidden_size, num_classes, netconfig, data):
  # We first extract the part which compute the softmax gradient
  softmax_theta = theta[:hidden_size * num_classes].reshape([num_classes, hidden_size], order='F')

  # Extract out the "stack"
  stack = params2stack(theta[hidden_size * num_classes:], netconfig)

  depth = len(stack)
  z = [0]
  a = [data]

  for layer in xrange(depth):
    z.append(stack[layer].w.dot(a[layer]) + stack[layer].b)
    a.append(autoencoder.sigmoid(z[layer+1]))

  return softmax_theta.dot(a[depth]).argmax(0)