# Reusing debugInitializeWeights to get random X X = debugInitializeWeights(input_layer_size - 1, m) # Set each element of y to be in [0,num_labels] y = [(i % num_labels) for i in range(m)] # Unroll parameters nn_params = np.append(Theta2, Theta1).reshape(-1) y = (np.ravel(y) - 1) ys = np.zeros((m, num_labels)) for i in range(m): ys[i][y[i]] = 1 # Compute Cost cost, grad = net._backprop(X, np.array(ys)) def reduced_cost_func(p): """ Cheaply decorated nnCostFunction """ net.th[0] = np.reshape(p[:hidden_layer_size * (input_layer_size + 1)], (hidden_layer_size, (input_layer_size + 1))) net.th[1] = np.reshape(p[hidden_layer_size * (input_layer_size + 1):], (num_labels, (hidden_layer_size + 1))) return net._backprop(X, np.array(ys))[0] numgrad = computeNumericalGradient(reduced_cost_func, nn_params) # Check two gradients np.testing.assert_almost_equal(grad, numgrad) print(grad - numgrad)