def __train(lr, weight_decay, epocs=50, verbose=False): # 减少epoch的数量 network = MultiLayerNet(input_size=784, hidden_size_list=[100, 100, 100, 100, 100, 100], output_size=10, weight_decay_lambda=weight_decay) optimizer = SGD(lr) iter_per_epoch = max(train_size / mini_batch_size, 1) current_iter = 0 current_epoch = 0 train_loss_list = [] train_acc_list = [] val_acc_list = [] for i in range(int(epochs * iter_per_epoch)): batch_mask = np.random.choice(train_size, mini_batch_size) x_batch = x_train[batch_mask] t_batch = t_train[batch_mask] grads = network.gradient(x_batch, t_batch) optimizer.update(network.params, grads) loss = network.loss(x_batch, t_batch) train_loss_list.append(loss) if verbose: print("train loss:" + str(loss)) if current_iter % iter_per_epoch == 0: current_epoch += 1 train_acc = network.accuracy(x_train, t_train) val_acc = network.accuracy(x_val, t_val) train_acc_list.append(train_acc) val_acc_list.append(val_acc) if verbose: print("=== epoch:" + str(current_epoch) + ", train acc:" + str(train_acc) + ", validation acc:" + str(val_acc) + " ===") current_iter += 1 return val_acc_list, train_acc_list
for i in range(iters_num): batch_mask = np.random.choice(train_size, batch_size) x_batch = x_train[batch_mask] d_batch = d_train[batch_mask] # 勾配 grad = network.gradient(x_batch, d_batch) if i == 0: v = {} for key in ('W1', 'W2', 'W3', 'b1', 'b2', 'b3'): if i == 0: v[key] = np.zeros_like(network.params[key]) v[key] = momentum * v[key] - learning_rate * grad[key] network.params[key] += v[key] loss = network.loss(x_batch, d_batch) train_loss_list.append(loss) if (i + 1) % plot_interval == 0: accr_test = network.accuracy(x_test, d_test) accuracies_test.append(accr_test) accr_train = network.accuracy(x_batch, d_batch) accuracies_train.append(accr_train) print('Generation: ' + str(i+1) + '. 正答率(トレーニング) = ' + str (round (100 * accr_train, 2)) + '%') print(' : ' + str(i+1) + '. 正答率(テスト) = ' + str (round (100 * accr_test, 2)) + '%') lists = range(0, iters_num, plot_interval) plt.plot(lists, accuracies_train, label="training set") plt.plot(lists, accuracies_test, label="test set")
batch_size = 100 train_loss = [] #train_acc = [] test_loss = [] #test_acc = [] iter_per_epoch = max(train_size / batch_size, 1) values = {} for key in network.indexes: values[key] = [] for i in range(iters_num): batch_mask = np.random.choice(train_size, batch_size) x_batch = x_train[batch_mask] t_batch = t_train[batch_mask] grad = network.gradient(x_batch, t_batch) optimizer.update(network.params, grad) loss = network.loss(x_batch, t_batch) train_loss.append(loss) batch_mask_val = np.random.choice(test_size, batch_size) x_batch_val = x_test[batch_mask_val] t_batch_val = t_test[batch_mask_val] loss_val = network.loss(x_batch_val, t_batch_val) test_loss.append(loss_val) for key, value in values.items(): value.append(np.sum(network.params[key]) / network.params[key].size) if i % iter_per_epoch == 0: """train_acc = network.accuracy(x_train, t_train) test_acc = network.accuracy(x_test, t_test) train_acc.append(train_acc) test_acc.append(test_acc)""" #print(train_acc, test_acc) print("iter number : {}/{}, loss = {}".format(i, iters_num, loss))
train_loss_list = [] accuracies_train = [] accuracies_test = [] plot_interval = 10 for i in range(iters_num): batch_mask = np.random.choice(train_size, batch_size) x_batch = x_train[batch_mask] d_batch = d_train[batch_mask] grad = network.gradient(x_batch, d_batch) optimizer.update(network.params, grad) loss = network.loss(x_batch, d_batch) train_loss_list.append(loss) if (i + 1) % plot_interval == 0: accr_train = network.accuracy(x_train, d_train) accr_test = network.accuracy(x_test, d_test) accuracies_train.append(accr_train) accuracies_test.append(accr_test) print('Generation: ' + str(i + 1) + '. 正答率(トレーニング) = ' + str(accr_train)) print(' : ' + str(i + 1) + '. 正答率(テスト) = ' + str(accr_test)) lists = range(0, iters_num, plot_interval) plt.plot(lists, accuracies_train, label="training set")
normalize=True, one_hot_label=True) iters_num = 2000 train_size = x_train.shape[0] batch_size = 100 train_loss = {} for key, weight_type in weight_init_types.items(): network = MultiLayerNet(input_size=784, hidden_size_list=[100, 100, 100, 100], output_size=10, weight_init_std=weight_type) optimizer = SGD() train_loss[key] = [] for i in range(iters_num): mask = np.random.choice(train_size, batch_size) x_batch = x_train[mask] t_batch = t_train[mask] grads = network.gradient(x_batch, t_batch) optimizer.update(network.params, grads) train_loss[key].append(network.loss(x_batch, t_batch)) markers = {'std=0.01': 'o', 'Xavier': 's', 'He': 'D'} x = np.arange(iters_num) for key in weight_init_types.keys(): plt.plot(x, train_loss[key], marker=markers[key], markevery=100, label=key) plt.xlabel("iterations") plt.ylabel("loss") plt.ylim(0, 2.5) plt.legend() plt.show()
network = MultiLayerNet(input_size=784, hidden_size_list=[100, 100, 100, 100, 10], output_size=10) train_loss = [] # itetrain_accuracy = [] train_accuracy = [] test_accuracy = [] for i in range(iters_num): batch_mask = np.random.choice(train_size, batch_size) x_batch = images_train[batch_mask] t_batch = labels_train[batch_mask] #在样本中随机选择 grads = dict() grads['dW'], grads['db'] = network.gradient(x_batch, t_batch) optimizer['sgd'].update(network.W, grads['dW']) #多层过程直接拿掉了param,而直接使用W,b optimizer['sgd'].update(network.b, grads['db']) loss_value = network.loss(x_batch, t_batch) train_loss.append(loss_value) if i % iter_per_epoch == 0: train_acc = network.accuracy(images_train, labels_train) test_acc = network.accuracy(images_test, labels_test) train_accuracy.append(train_acc) test_accuracy.append(test_acc) epoch_cnt += 1 if epoch_cnt >= max_epoch: break x = np.arange(len(train_accuracy)) plot.plot(x, train_accuracy, test_accuracy) plot.show()