def run_feedforward_nn_for_sample(theta_matrix_1, theta_matrix_2, sample):
  assert(mathutil.is_np_1d_array(sample))
  n_features = np.size(sample)
  output_dim_1, input_dim_1 = theta_matrix_1.shape
  output_dim_2, input_dim_2 = theta_matrix_2.shape
  assert(n_features+1 == input_dim_1)
  a_1 = np.concatenate(([1.0], sample))
  z_2= np.dot(theta_matrix_1, a_1)
  a_withoutbias_2 = mathutil.sigmoid_fn(z_2)
  a_2 = np.concatenate(([1.0], a_withoutbias_2))
  assert( np.size(a_2) == input_dim_2 )
  z_3 = np.dot(theta_matrix_2, a_2)
  predicted_output_vector = mathutil.sigmoid_fn(z_3)
  return np.argmax(predicted_output_vector) + 1
def compute_htheta_by_feedforward(theta_transfers, x):
  activations = x.transpose()
  num_features, num_samples = activations.shape
  activations_all_layers = [activations]
  z_all_layers = []
  for theta_transfer in theta_transfers:
    activations_withbias = \
      np.concatenate((np.ones((1,num_samples), dtype=np.float64),
                      activations), axis = 0)
    z = np.dot(theta_transfer, activations_withbias) 
    activations = mathutil.sigmoid_fn(z)
    z_all_layers.append(z)
    activations_all_layers.append(activations)
  h_theta = activations.transpose()
  return (h_theta, z_all_layers, activations_all_layers)
def compute_gradient(theta, aug_x, y, lagrange_lambda):
  n_samples, n_features = aug_x.shape
  assert(np.ndim(theta)==1)
  assert(np.size(theta) == n_features)
  inner_term = np.dot(aug_x, theta)
  hyp_h_theta = mathutil.sigmoid_fn(inner_term)
  assert(mathutil.is_np_1d_array(y))
  assert(mathutil.is_np_1d_array(hyp_h_theta))
  jac = (1/n_samples) * np.dot(aug_x.transpose(),(hyp_h_theta - y))
  assert(np.ndim(jac)==1)
  assert(np.size(jac)==n_features)
  regularization_pull = (lagrange_lambda/n_samples)*theta
  jac_with_regularization = jac + regularization_pull
  assert(not np.any(np.isnan(jac_with_regularization)))
  return jac_with_regularization
def compute_cost(theta, aug_x, y, lagrange_lambda):
  n_samples, n_features = aug_x.shape
  assert(np.ndim(theta) == 1)
  assert(np.size(theta) == n_features)
  with np.errstate(over = 'raise'):
    inner_term = np.dot(aug_x, theta)
  hyp_h_theta = mathutil.sigmoid_fn(inner_term) # h_Theta(x)
  assert(mathutil.is_np_1d_array(y))
  assert(mathutil.is_np_1d_array(hyp_h_theta))
  
  cost_per_sample = -y*np.log(hyp_h_theta) \
                    -(1-y)*np.log(1-hyp_h_theta)
  assert(not np.any(np.isnan(cost_per_sample)))
  average_cost = (1/n_samples)*np.sum(cost_per_sample)
  cost_plus_regularization = average_cost + \
                             (lagrange_lambda/(2*n_samples))*np.sum(theta**2)
  assert(not np.isnan(cost_plus_regularization))
  return cost_plus_regularization