def compute_cost(nn_params, input_layer_size, hidden_layer_size, num_labels, x, y, _lambda, yk=None, x_bias=None): m = x.shape[0] theta1, theta2 = unroll_thetas(nn_params, input_layer_size, hidden_layer_size, num_labels) a1, a2, a3, z2, z3 = feed_forward(theta1, theta2, x, x_bias) if yk is None: yk = recode_labels(y, num_labels) assert yk.shape == a3.shape, 'Error, shape of recoded y is different from a3' term1 = -yk * np.log(a3) term2 = (1 - yk) * np.log(1 - a3) cost = np.sum(term1 - term2) / m reg_cost = (np.sum(theta1[:, 1:] ** 2) + np.sum(theta2[:, 1:] ** 2)) * _lambda / (2 * m) return cost + reg_cost
def compute_gradients(nn_params, input_layer_size, hidden_layer_size, num_labels, x, y, _lambda, yk=None, x_bias=None): m = x.shape[0] theta1, theta2 = unroll_thetas(nn_params, input_layer_size, hidden_layer_size, num_labels) a1, a2, a3, z2, z3 = feed_forward(theta1, theta2, x, x_bias) if yk is None: yk = recode_labels(y, num_labels) assert yk.shape == a3.shape, 'Error, shape of recoded y is different from a3' # Backward propagation to compute gradients sigma3 = a3 - yk sigma2 = theta2[:, 1:].T.dot(sigma3) * sigmoid_gradient(z2) theta1_grad = sigma2.dot(a1.T) / m theta2_grad = sigma3.dot(a2.T) / m theta1_grad[:, 1:] = theta1_grad[:, 1:] + (theta1[:, 1:] * _lambda / m) theta2_grad[:, 1:] = theta2_grad[:, 1:] + (theta2[:, 1:] * _lambda / m) return np.concatenate((theta1_grad.T.ravel(), theta2_grad.T.ravel()))
def train_model(x, y, input_layer_size, hidden_layer_size, num_labels): initial_theta1 = rand_initialize_weights(input_layer_size, hidden_layer_size) initial_theta2 = rand_initialize_weights(hidden_layer_size, num_labels) initial_nn_params = np.concatenate((initial_theta1.T.ravel(), initial_theta2.T.ravel())) _lambda = 0.1 max_iterations = 50 iterations_counter = dict(val=0) yk = recode_labels(y, num_labels) x_bias = np.r_[np.ones((1, x.shape[0])), x.T] def show_progress(current_x): iterations_counter['val'] += 1 progress = iterations_counter['val'] * 100 // max_iterations sys.stdout.write('\r[{0}{1}] {2}% - iter:{3}'.format( '=' * (progress // 5), ' ' * ((104 - progress) // 5), progress, iterations_counter['val'] )) # Solve! nn_params = fmin_cg( compute_cost, x0=initial_nn_params, args=(input_layer_size, hidden_layer_size, num_labels, x, y, _lambda, yk, x_bias), fprime=compute_gradients, maxiter=max_iterations, callback=show_progress ) # Obtain theta1 and theta2 back from nn_params theta1, theta2 = unroll_thetas(nn_params, input_layer_size, hidden_layer_size, num_labels) return theta1, theta2