def test01_linear_convergence_gradient_test_X(self): max_iter = 17 C, W, X, _, d = create_C_W_X_d() eps0 = 0.5 losses = [] loss_convergence = [] for i in range(max_iter): eps = eps0 * (0.5**i) objective1 = round(objective_soft_max(X + eps * d, W, C), 10) objective2 = round(objective_soft_max(X, W, C), 10) losses.append(abs(objective1 - objective2)) for i in range(1, len(losses)): loss_convergence.append(losses[i] / losses[i - 1]) avg_val = average(round(loss_convergence[-5:], 4)) self.assertTrue(0.4 <= avg_val <= 0.6, msg=f'avg value = {avg_val}')
def test00_linear_convergence_gradient_test_W(self): max_iter = 25 eps0 = 0.5 losses = [] loss_convergence = [] C, W, X, d, _ = create_C_W_X_d() for i in range(max_iter): eps = eps0 * (0.5**i) losses.append( abs( objective_soft_max(X, W + eps * d, C) - objective_soft_max(X, W, C))) for i in range(1, len(losses)): loss_convergence.append(losses[i] / losses[i - 1]) avg_val = average(round(loss_convergence[-5:], 4)) self.assertTrue(0.40 <= avg_val <= 0.6, msg=f'avg value = {avg_val}')
def test03_quadratic_convergence_gradient_X(self): max_iter = 17 eps0 = 0.5 losses = [] loss_convergence = [] C, W, X, _, d = create_C_W_X_d() for i in range(max_iter): eps = eps0 * (0.5**i) objective1 = objective_soft_max(X + eps * d, W, C) objective2 = objective_soft_max(X, W, C) objective3 = eps * trace( d.T @ objective_soft_max_gradient_X(X, W, C)) losses.append(abs(objective1 - objective2 - objective3)) for j in range(1, len(losses)): loss_convergence.append(losses[j] / losses[j - 1]) avg_val = average(round(loss_convergence[-5:], 4)) self.assertTrue(0.2 <= avg_val <= 0.3, msg=f'ans = {avg_val}')
def train(C_train, C_val, X_train, X_val, batch_size, epochs, lr, momentum=0): # ----------------- hyper params init ----------------- W0 = randn(X_train.shape[0], C_train.shape[0]) m, n = W0.shape W = W0.copy() optimizer = SGD(batch_size=batch_size, m=X_train.shape[1]) # ---------------------------------------------------- # ----------------- stats lists init ----------------- W_history = zeros((W.shape[0] * W.shape[1], epochs)) val_score = [] train_score = [] train_acc = [] val_acc = [] # ---------------------------------------------------- for epoch in range(epochs): W = optimizer.optimize(W, X_train, C_train, objective_soft_max, objective_soft_max_gradient_W, lr=lr, momentum=momentum) W_history[:, epoch] = W.reshape(W.shape[0] * W.shape[1]) train_score.append(objective_soft_max(X_train, W, C_train)) val_score.append(objective_soft_max(X_val, W, C_val)) train_acc.append(accuracy(X_train, W, C_train)) val_acc.append(accuracy(X_val, W, C_val)) W_res = average(W_history, axis=1).reshape(m, n) train_score.append(objective_soft_max(X_train, W_res, C_train)) val_score.append(objective_soft_max(X_val, W_res, C_val)) # todo add plot epoch \ accuracy (wrote in train) plot(range(len(train_score)), train_score) return train_score, train_acc, val_score, val_acc
def test00_forward_tanh_sanity(self): C_train, C_val, X_train, X_val = data_factory( 'Swiss') # options: 'Swiss','PeaksData','GMMData' n = X_train.shape[0] l = C_train.shape[0] layer_function = Function(ReLU_F, f_grad_X_mul_V, f_grad_W_mul_V) model = NeuralNetwork(10, f=layer_function, sigma=tanh, layer_dim=(n, n), output_dim=(n, l)) output = model(X_val) objective_value = objective_soft_max(X=None, W=None, C=C_val, WT_X=output) self.assertTrue(True)