def grad(X, W, C, v, param, column=0): if param == 'w': W1 = np.zeros(W.shape) W1 += W W1[:, column] = v X1 = X return grads.softmax_gradient(X1, W1, C) elif param == 'x': X1 = np.zeros(X.shape) X1 += X X1[:, column] = v W1 = W return grads.softmax_data_gradient(X1, W1, C) else: return None
def backward_propagation(W, X, B, C, relu_derivative, x_history, learning_rate): # last layer gradient decent grad = grads.softmax_gradient(x_history[-1], np.transpose(W[-1]), C) W[-1] = W[-1] - learning_rate * np.transpose(grad) temp_w = W[-1] # loss function gradient w.r.t X , excluding the last row of W ( the biases row) x_grad = grads.softmax_data_gradient(x_history[-1], np.transpose(temp_w[:, :-1]), C) # going through all hidden layers for i in range(B.shape[0] - 1, -1, -1): B[i] = B[i] - learning_rate * grads.JacV_b( relu_derivative[i], x_grad) # updating B by the jacobian of B # updating W by the jacobian of W W[i] = W[i] - learning_rate * grads.JacV_w(x_history[i], relu_derivative[i], x_grad) x_grad = grads.JacV_x(W[i], relu_derivative[i], x_grad) return W, B
c_training, c_validation = rearrange_labels(C, c_valid) for i in range(max_iter): num_of_mini_batches = round(X.shape[1] / batch_size) perm = np.random.permutation(X.shape[1]) learning_rate = 1 / np.sqrt(i + 1) for j in range(num_of_mini_batches): batch_indexes = perm[(j * batch_size):((j + 1) * batch_size)] # iterating over all mini batches mini_batch_x = X[:, batch_indexes] mini_batch_c = C[:, batch_indexes] # iterate over the mini_batch [previous_index, ... next_index] grad = grads.softmax_gradient(mini_batch_x, W, mini_batch_c) W = W - learning_rate * grad train_success_rate, validation_success_rate = check_predication(W, X, x_valid, c_training, c_validation) history.append([train_success_rate, validation_success_rate]) if i % 100 == 0: print('loss: ', softmax_objective(X, W, C), ' epoch: ', i) print("train success rate is: " + str(train_success_rate * 100) + "%" + " validation success rate is: " + str(validation_success_rate * 100) + "%") # appending data for the plots train_rate_data.append(train_success_rate * 100) validation_rate_data.append(validation_success_rate * 100) epoch_data.append(i)