def __train(lr, weight_decay, epocs=50, verbose=False): # 减少epoch的数量 network = MultiLayerNet(input_size=784, hidden_size_list=[100, 100, 100, 100, 100, 100], output_size=10, weight_decay_lambda=weight_decay) optimizer = SGD(lr) iter_per_epoch = max(train_size / mini_batch_size, 1) current_iter = 0 current_epoch = 0 train_loss_list = [] train_acc_list = [] val_acc_list = [] for i in range(int(epochs * iter_per_epoch)): batch_mask = np.random.choice(train_size, mini_batch_size) x_batch = x_train[batch_mask] t_batch = t_train[batch_mask] grads = network.gradient(x_batch, t_batch) optimizer.update(network.params, grads) loss = network.loss(x_batch, t_batch) train_loss_list.append(loss) if verbose: print("train loss:" + str(loss)) if current_iter % iter_per_epoch == 0: current_epoch += 1 train_acc = network.accuracy(x_train, t_train) val_acc = network.accuracy(x_val, t_val) train_acc_list.append(train_acc) val_acc_list.append(val_acc) if verbose: print("=== epoch:" + str(current_epoch) + ", train acc:" + str(train_acc) + ", validation acc:" + str(val_acc) + " ===") current_iter += 1 return val_acc_list, train_acc_list
# 慣性 momentum = 0.9 train_loss_list = [] accuracies_train = [] accuracies_test = [] plot_interval=10 for i in range(iters_num): batch_mask = np.random.choice(train_size, batch_size) x_batch = x_train[batch_mask] d_batch = d_train[batch_mask] # 勾配 grad = network.gradient(x_batch, d_batch) if i == 0: v = {} for key in ('W1', 'W2', 'W3', 'b1', 'b2', 'b3'): if i == 0: v[key] = np.zeros_like(network.params[key]) v[key] = momentum * v[key] - learning_rate * grad[key] network.params[key] += v[key] loss = network.loss(x_batch, d_batch) train_loss_list.append(loss) if (i + 1) % plot_interval == 0: accr_test = network.accuracy(x_test, d_test) accuracies_test.append(accr_test) accr_train = network.accuracy(x_batch, d_batch)
train_size = x_train.shape[0] batch_size = 100 train_loss_list = [] train_acc_list = [] test_acc_list = [] iter_per_epoch = max(train_size / batch_size, 1) epoch_cnt = 0 for i in range(1000000000): batch_mask = np.random.choice(train_size, batch_size) x_batch = x_train[batch_mask] t_batch = t_train[batch_mask] grads = network.gradient(x_batch, t_batch) optimizer.update(network.params, grads) if i % iter_per_epoch == 0: train_acc = network.accuracy(x_train, t_train) test_acc = network.accuracy(x_test, t_test) train_acc_list.append(train_acc) test_acc_list.append(test_acc) print("epoch:" + str(epoch_cnt) + ", train acc:" + str(train_acc) + ", test acc:" + str(test_acc)) epoch_cnt += 1 if epoch_cnt >= max_epochs: break
import numpy as np from multi_layer_net import MultiLayerNet # 生成数据 x = np.random.randn(1, 784) t = np.random.randint(0, 10, (1, )) # 创建网络 network = MultiLayerNet(input_size=784, hidden_size_list=[100, 100], output_size=10, use_batchnorm=True) grad_numerical = network.numerical_gradient(x, t) grad_backprop = network.gradient(x, t) for key in grad_numerical.keys(): diff = np.average(np.abs(grad_backprop[key] - grad_numerical[key])) print(key + ':' + str(diff)) # break symmetry # x = np.random.randn(3, 2) # t = np.random.randint(0, 10, (3, )) # # 创建网络 # network = TwoLayerNet(input_size=2, hidden_size=3, output_size=10) # grad = network.gradient(x, t) # for key in ('W1', 'b1', 'W2', 'b2'): # network.params[key] -= 0.1 * grad[key]
normalize=True, one_hot_label=True) iters_num = 2000 train_size = x_train.shape[0] batch_size = 100 train_loss = {} for key, weight_type in weight_init_types.items(): network = MultiLayerNet(input_size=784, hidden_size_list=[100, 100, 100, 100], output_size=10, weight_init_std=weight_type) optimizer = SGD() train_loss[key] = [] for i in range(iters_num): mask = np.random.choice(train_size, batch_size) x_batch = x_train[mask] t_batch = t_train[mask] grads = network.gradient(x_batch, t_batch) optimizer.update(network.params, grads) train_loss[key].append(network.loss(x_batch, t_batch)) markers = {'std=0.01': 'o', 'Xavier': 's', 'He': 'D'} x = np.arange(iters_num) for key in weight_init_types.keys(): plt.plot(x, train_loss[key], marker=markers[key], markevery=100, label=key) plt.xlabel("iterations") plt.ylabel("loss") plt.ylim(0, 2.5) plt.legend() plt.show()
optimizer['momentu'] = op.Momentu() # 5层神经网络,对最后的一层神经元有个数要求 network = MultiLayerNet(input_size=784, hidden_size_list=[100, 100, 100, 100, 10], output_size=10) train_loss = [] # itetrain_accuracy = [] train_accuracy = [] test_accuracy = [] for i in range(iters_num): batch_mask = np.random.choice(train_size, batch_size) x_batch = images_train[batch_mask] t_batch = labels_train[batch_mask] #在样本中随机选择 grads = dict() grads['dW'], grads['db'] = network.gradient(x_batch, t_batch) optimizer['sgd'].update(network.W, grads['dW']) #多层过程直接拿掉了param,而直接使用W,b optimizer['sgd'].update(network.b, grads['db']) loss_value = network.loss(x_batch, t_batch) train_loss.append(loss_value) if i % iter_per_epoch == 0: train_acc = network.accuracy(images_train, labels_train) test_acc = network.accuracy(images_test, labels_test) train_accuracy.append(train_acc) test_accuracy.append(test_acc) epoch_cnt += 1 if epoch_cnt >= max_epoch: break x = np.arange(len(train_accuracy)) plot.plot(x, train_accuracy, test_accuracy)