def compute_error_per_sample(theta, x, y):
  assert(mathutil.is_np_1d_array(theta))
  assert(theta.size == 2)
  num_samples = x.size
  aug_x = get_aug_x(x)
  h_theta_x = np.sum(theta*aug_x, axis = 1)
  error_per_sample = h_theta_x - y
  assert(mathutil.is_np_1d_array(error_per_sample))
  assert(error_per_sample.size == num_samples)
  return error_per_sample
def compute_gradient(theta, aug_x, y, lagrange_lambda):
  n_samples, n_features = aug_x.shape
  assert(np.ndim(theta)==1)
  assert(np.size(theta) == n_features)
  inner_term = np.dot(aug_x, theta)
  hyp_h_theta = mathutil.sigmoid_fn(inner_term)
  assert(mathutil.is_np_1d_array(y))
  assert(mathutil.is_np_1d_array(hyp_h_theta))
  jac = (1/n_samples) * np.dot(aug_x.transpose(),(hyp_h_theta - y))
  assert(np.ndim(jac)==1)
  assert(np.size(jac)==n_features)
  regularization_pull = (lagrange_lambda/n_samples)*theta
  jac_with_regularization = jac + regularization_pull
  assert(not np.any(np.isnan(jac_with_regularization)))
  return jac_with_regularization
def compute_linear_regression_gradient(theta, x, y, lagrange_lambda):
  """
  Compute the linear regression gradient with the regularization pull
  
  .. math::
     \\frac{\\partial J(\\theta)}{\\partial \\theta_0} = \\frac{1}{m} \\sum_{i=1}^{m-1} (h_\\theta(x^{(i)}) - y^{(i)}) x_j^{(i)} \,for \,j=0
     
     \\frac{\\partial J(\\theta)}{\\partial \\theta_j} = \\frac{1}{m} \\sum_{i=1}^{m-1} (h_\\theta(x^{(i)}) - y^{(i)}) x_j^{(i)} + \\frac{\\lambda}{m} \\theta_j \,for \,j \\ge 1     
  
  :param theta: the hyper-plane params
  :type theta: list
  :param x: design matrix
  :type x: np array
  :param y: output
  :type y: np array
  """  
  error_per_sample = compute_error_per_sample(theta, x, y)
  num_samples = x.size
  aug_x = get_aug_x(x)
  inner_term = error_per_sample[:, np.newaxis] * aug_x
  summed_inner_term = np.sum(inner_term, axis=0)
  assert(mathutil.is_np_1d_array(summed_inner_term))
  assert(summed_inner_term.size == 2)
  jacobian = (1/num_samples)*summed_inner_term
  jacobian_with_reg_pull = jacobian + np.array([0.0, lagrange_lambda*theta[1]/num_samples])
  return jacobian_with_reg_pull
def compute_cost(theta, aug_x, y, lagrange_lambda):
  n_samples, n_features = aug_x.shape
  assert(np.ndim(theta) == 1)
  assert(np.size(theta) == n_features)
  with np.errstate(over = 'raise'):
    inner_term = np.dot(aug_x, theta)
  hyp_h_theta = mathutil.sigmoid_fn(inner_term) # h_Theta(x)
  assert(mathutil.is_np_1d_array(y))
  assert(mathutil.is_np_1d_array(hyp_h_theta))
  
  cost_per_sample = -y*np.log(hyp_h_theta) \
                    -(1-y)*np.log(1-hyp_h_theta)
  assert(not np.any(np.isnan(cost_per_sample)))
  average_cost = (1/n_samples)*np.sum(cost_per_sample)
  cost_plus_regularization = average_cost + \
                             (lagrange_lambda/(2*n_samples))*np.sum(theta**2)
  assert(not np.isnan(cost_plus_regularization))
  return cost_plus_regularization
def get_theta_transfers_from_flattened_version(theta_transfers_flattened, theta_transfer_shapes):
  offset = 0
  assert(mathutil.is_np_1d_array(theta_transfers_flattened))
  theta_transfers = []
  for theta_transfer_shape in theta_transfer_shapes:
    r, c = theta_transfer_shape
    theta_transfer = theta_transfers_flattened[offset:(offset+r*c)].reshape(r,c)
    theta_transfers.append(theta_transfer)
    offset += r*c
  assert(offset==theta_transfers_flattened.size)
  return theta_transfers    
def multivariate_gaussian(X, mu, sigma_sq):
  if (mathutil.is_np_1d_array(sigma_sq)):
    sigma_sq = np.diag(sigma_sq)
  r, c = sigma_sq.shape
  assert(r == c)    
  inv_sigma_sq = np.linalg.inv(sigma_sq)
  X_minus_mu = X-mu
  exp_term_0 = np.dot(X_minus_mu, inv_sigma_sq)
  exp_term = np.sum(exp_term_0 * X_minus_mu, axis=1)
  dist = (2*np.pi)**(-r/2)*np.linalg.det(sigma_sq)**(-0.5)*np.exp(-exp_term)
  return dist
def run_feedforward_nn_for_sample(theta_matrix_1, theta_matrix_2, sample):
  assert(mathutil.is_np_1d_array(sample))
  n_features = np.size(sample)
  output_dim_1, input_dim_1 = theta_matrix_1.shape
  output_dim_2, input_dim_2 = theta_matrix_2.shape
  assert(n_features+1 == input_dim_1)
  a_1 = np.concatenate(([1.0], sample))
  z_2= np.dot(theta_matrix_1, a_1)
  a_withoutbias_2 = mathutil.sigmoid_fn(z_2)
  a_2 = np.concatenate(([1.0], a_withoutbias_2))
  assert( np.size(a_2) == input_dim_2 )
  z_3 = np.dot(theta_matrix_2, a_2)
  predicted_output_vector = mathutil.sigmoid_fn(z_3)
  return np.argmax(predicted_output_vector) + 1
def generate_learning_curve(x, y, x_cv, y_cv):
  """
  Generates learning curves by sweeping acrosss the size of the training set
  """
  assert(mathutil.is_np_1d_array(x))
  num_samples = x.size
  assert(num_samples>0)
  training_set_size = np.arange(1, num_samples)
  training_error = np.empty_like(training_set_size)
  cv_error = np.empty_like(training_set_size)
  for idx, curr_training_set_size in enumerate(training_set_size):
    x_train = x[:curr_training_set_size]
    y_train = y[:curr_training_set_size]
    initial_theta = np.ones((2), dtype=np.float64)
    trained_theta = train_linear_regression(initial_theta, x_train, y_train, 1.0)
    training_error[idx] = compute_linear_regression_cost(trained_theta, x_train, y_train, 0.0)
    cv_error[idx] = compute_linear_regression_cost(trained_theta, x_cv, y_cv, 0.0)
  plt_learning_curve(training_set_size, training_error, cv_error)
def unflatten_X_Theta(X_Theta_flattened, num_users, num_movies, num_features):
  assert(mathutil.is_np_1d_array(X_Theta_flattened))
  assert(X_Theta_flattened.size == (num_users+num_movies)*num_features)
  X = X_Theta_flattened[0:(num_movies*num_features)].reshape((num_movies, num_features))
  Theta = X_Theta_flattened[(num_movies*num_features):].reshape((num_users, num_features))
  return X, Theta