x = np.random.randn(20, 40) grad_out = np.random.randn(20, 30) fc = layers.FC(x, 30, "fc") print("Check grad wrt input") check_grad_inputs(fc, x, grad_out) print("Check grad wrt params") check_grad_params(fc, x, fc.weights, fc.bias, grad_out) print("\nSoftmaxCrossEntropyWithLogits") x = np.random.randn(50, 20) y = np.zeros([50, 20]) y[:, 0] = 1 loss = layers.SoftmaxCrossEntropyWithLogits() grad_x_num = eval_numerical_gradient(lambda x: loss.forward(x, y), x, 1) out = loss.forward(x, y) grad_x = loss.backward_inputs(x, y) print("Relative error = ", rel_error(grad_x_num, grad_x)) print("Error norm = ", np.linalg.norm(grad_x_num - grad_x)) print("\nL2Regularizer") x = np.random.randn(5, 4, 8, 8) grad_out = np.random.randn(5, 4, 4, 4) l2reg = layers.L2Regularizer(x, 1e-2, 'L2reg') print("Check grad wrt params") func = lambda params: l2reg.forward() grad_num = eval_numerical_gradient(func, l2reg.weights, 1) grads = l2reg.backward_params() grad = grads[0][1] print("Relative error = ", rel_error(grad_num, grad)) print("Error norm = ", np.linalg.norm(grad_num - grad))
train_x, valid_x = train_x[:55000], train_x[55000:] train_y, valid_y = train_y[:55000], train_y[55000:] test_x = ds_test.data.reshape([-1, 1, 28, 28]).numpy().astype(np.float) / 255 test_y = ds_test.targets.numpy() train_mean = train_x.mean() train_x, valid_x, test_x = (x - train_mean for x in (train_x, valid_x, test_x)) train_y, valid_y, test_y = (dense_to_one_hot(y, 10) for y in (train_y, valid_y, test_y)) weight_decay = config['weight_decay'] net = [] regularizers = [] inputs = np.random.randn(config['batch_size'], 1, 28, 28) net += [layers.Convolution(inputs, 16, 5, "conv1")] regularizers += [ layers.L2Regularizer(net[-1].weights, weight_decay, 'conv1_l2reg') ] net += [layers.MaxPooling(net[-1], "pool1")] net += [layers.ReLU(net[-1], "relu1")] net += [layers.Convolution(net[-1], 32, 5, "conv2")] regularizers += [ layers.L2Regularizer(net[-1].weights, weight_decay, 'conv2_l2reg') ] net += [layers.MaxPooling(net[-1], "pool2")] net += [layers.ReLU(net[-1], "relu2")] ## 7x7 net += [layers.Flatten(net[-1], "flatten3")] net += [layers.FC(net[-1], 512, "fc3")] regularizers += [ layers.L2Regularizer(net[-1].weights, weight_decay, 'fc3_l2reg') ]
for weight_decay in [1e-3, 1e-2, 1e-1]: print(f"Training for decay of {weight_decay:.03f}") config["save_dir"] = SAVE_DIR / f"lambda{weight_decay:.03f}" config["weight_decay"] = weight_decay os.makedirs(config["save_dir"], exist_ok=True) net = [] regularizers = [] inputs = np.random.randn(config["batch_size"], 1, 28, 28) net += [layers.Convolution(inputs, 16, 5, "conv1")] regularizers += [ layers.L2Regularizer(net[-1].weights, weight_decay, "conv1_l2reg") ] net += [layers.MaxPooling(net[-1], "pool1")] net += [layers.ReLU(net[-1], "relu1")] net += [layers.Convolution(net[-1], 32, 5, "conv2")] regularizers += [ layers.L2Regularizer(net[-1].weights, weight_decay, "conv2_l2reg") ] net += [layers.MaxPooling(net[-1], "pool2")] net += [layers.ReLU(net[-1], "relu2")] # 7x7 net += [layers.Flatten(net[-1], "flatten3")] net += [layers.FC(net[-1], 512, "fc3")] regularizers += [ layers.L2Regularizer(net[-1].weights, weight_decay, "fc3_l2reg") ]