def main(input_train, input_test, correct_train_orig, correct_test_orig, train_data_name, test_data_name): # input_train, correct_train: numpy.array # input_test, correct_test: list 要素はnumpy.arary n_train = len(input_train) # 学習データの数 n_test = len(input_test[0]) # テストデータの数 # 正解データをone-hot表現に correct_train = np.zeros((n_train, len(chars))) # 2000x20 correct_test = [ np.zeros((n_test, len(chars))) for i in range(len(input_test)) ] # 2000x20がlen(input_test)個 for i in range(n_train): correct_train[i, correct_train_orig[i]] = 1 for i in range(len(input_test)): for j in range(n_test): correct_test[i][j, correct_test_orig[i][j]] = 1 # 各層の初期化 middle_layer = MiddleLayer(n_input, n_middle, eta, alpha) output_layer = OutputLayer(n_middle, n_output, eta, alpha) # ネットワークの初期化 net = NeuralNetwork([middle_layer, output_layer]) # 誤差の記録 train_error_x = [] train_error_y = [] test_error_x = [] test_error_y = [[] for i in range(len(input_test))] n_batch = n_train // batch_size # 1epochあたりのバッチ数 for i in range(epoch): # 誤差の計算 net.forward_prop(input_train) # 順伝播 error_train = net.cross_entropy(correct_train, n_train) # クロスエントロピー誤差の計算 error_test = [] for j in range(len(input_test)): net.forward_prop(input_test[j]) # 順伝播 error_test.append(net.cross_entropy(correct_test[j], n_test)) # クロスエントロピー誤差の計算 # 誤差の記録 train_error_x.append(i) train_error_y.append(error_train) test_error_x.append(i) for j in range(len(input_test)): test_error_y[j].append(error_test[j]) # 学習経過 if i % interval == 0: print("Epoch: {}".format(i)) # クロスエントロピー誤差表示 print("Error_train({}): {}".format(train_data_name, error_train)) for j in range(len(input_test)): print("Error_test({}): {}".format(test_data_name[j], error_test[j])) # 正答率表示 print("Accuracy_train({}): {}".format( train_data_name, net.accuracy(input_train, correct_train))) for j in range(len(input_test)): print("Accuracy_test({}): {}".format( test_data_name[j], net.accuracy(input_test[j], correct_test[j]))) print("=" * 50) # 学習 index_random = np.arange(n_train) np.random.shuffle(index_random) for j in range(n_batch): # ミニバッチ作成 mb_index = index_random[j * batch_size:(j + 1) * batch_size] input_ = input_train[mb_index, :] # 入力 correct = correct_train[mb_index, :] # 正解 net.forward_prop(input_) # 順伝播 net.back_prop(correct) # 逆伝播 net.update_wb() # 重みとバイアスの更新 # 結果の保存 make_dir(os.getcwd() + os.sep + "result") graph_name = os.path.join(os.getcwd(), "result", "error.png") save_error_graph(train_error_x, train_error_y, test_error_x, test_error_y, train_data_name, test_data_name, graph_name) # グラフの表示 if show_error_graph: img = cv2.imread(graph_name) cv2.imshow(graph_name.split("/")[-1], img) cv2.waitKey(0) cv2.destroyAllWindows() # 正答率をファイルに書き込む if write_accuracy: now = datetime.datetime.now() file_name = "accuracy_" + now.strftime("%Y-%m-%d_%H-%M-%S") + ".txt" make_dir(os.getcwd() + os.sep + "result") with open(os.path.join("result", file_name), mode="w") as f: # 訓練データの正答率書き込み f.write("{}: {}\n".format(train_data_name, net.accuracy(input_train, correct_train))) # テストデータの正答率書き込み for j in range(len(input_test)): f.write("{}: {}\n".format( test_data_name[j], net.accuracy(input_test[j], correct_test[j]))) # 任意の文字「あ」を入力した時の出力層の各ユニットの出力値をファイルに書き込む if write_accuracy: input_ = input_train[0] # 一番初めの「あ」を取り出す input_ = input_.reshape((1, input_.shape[0])) # 行列に変換 net.forward_prop(input_) make_dir(os.getcwd() + os.sep + "result") # 書き込み with open(os.path.join("result", "output.txt"), mode="w") as f: output_result = "" for i, p in enumerate(net.output[0]): f.write("{}: {}\n".format(i, p))
if __name__ == "__main__": camera1 = Camera(plt.figure()) plt.ylim(0, 0.45) x = np.linspace(-50, 50, 26) y = x**2 y = y / np.linalg.norm(y) plt.yticks(y, "") x = x.reshape((-1, 1)) x = np.c_[x, np.ones(len(x))] nn = NeuralNetwork(x, y, layer_size=10, layer1='sigmoid', layer2='sigmoid') print("Training with sigmoid") for i in range(80001): nn.feed_forward() nn.back_prop() if i % 200 == 0: points = np.c_[np.linspace(-50, 50, 26), nn.output].T plt.scatter(*points, c="black") plt.text(-52., 0.45, 'Iteration: {} error: {}'.format(i, error(nn.output, nn.y)), fontsize=12) camera1.snap() print("Ended training, waiting for closing animation") animation = camera1.animate() plt.show()
def start(train_data, test_data, resuming=False, testing=True): nn = NeuralNetwork(train_data[:, :-1], train_data[:, -1].reshape(-1, 1) / 800) if resuming: si, last_lossf, nn.learning_rate = np.load('state/state.npy') nn.weights0 = np.load('state/weights0.npy') nn.weights1 = np.load('state/weights1.npy') nn.weights2 = np.load('state/weights2.npy') nn.biasw1 = np.load('state/biasw1.npy') nn.biasw2 = np.load('state/biasw2.npy') best_test_lossf = np.load('state/best/test_lossf.npy') else: if not os.path.exists('state'): os.makedirs('state') if not os.path.exists('state/best'): os.makedirs('state/best') best_test_lossf = np.inf si = 0 out = open('track_info.dat', 'a') it = 2000 for i in range(int(si), it): print('\n\niteration %d of %d\n' % (i + 1, it)) print(' layer 1: n = %d' % nn.n1) print(' layer 2: n = %d' % nn.n2) t0 = time.time() print('\nbegin feed_forward() at ' + datetime.datetime.now().strftime("%H:%M:%S on %d %B %Y")) nn.feed_forward() print( '\n first third ' ) print( ' min quartile median quartile max ' ) print( ' ---------- ---------- ---------- ---------- ---------- ' ) print(' y |%11s %11s %11s %11s %11s' % quartiles(nn.y)) print(' output |%11s %11s %11s %11s %11s' % quartiles(nn.output)) print('') print(' weights0 |%11s %11s %11s %11s %11s' % quartiles(nn.weights0)) print(' weights1 |%11s %11s %11s %11s %11s' % quartiles(nn.weights1)) print(' weights2 |%11s %11s %11s %11s %11s' % quartiles(nn.weights2)) print('') print(' biasw1 |%11s %11s %11s %11s %11s' % quartiles(nn.biasw1)) print(' biasw2 |%11s %11s %11s %11s %11s' % quartiles(nn.biasw2)) te = time.time() - t0 print('\nfeed_forward() took %dm %.2fs\n' % (te // 60, te % 60)) lossf = nn.loss_function() print('\n loss function (training): %.9f\n' % lossf) out.write(datetime.datetime.now().strftime("%H:%M:%S %D") + ' %2s %6s %e %.6f' % (k, i + 1, nn.learning_rate, lossf)) if i > 10: if last_lossf > lossf: nn.save_weights() np.save('state/state.npy', [i, lossf, nn.learning_rate]) if i < 200: nn.learning_rate *= 1.01 else: nn.learning_rate *= 1.005 else: nn.learning_rate /= 1.007 tn = TrainedNeuralNetwork(test_data[:, :-1]) tn.load_weights( (nn.weights0, nn.weights1, nn.weights2, nn.biasw1, nn.biasw2)) tn.feed_forward() test_lossf = np.mean( np.square(test_data[:, -1].reshape(-1, 1) - 800 * tn.output)) print( ' loss function (validation): %.3f HKD^2 -> %.3f HKD (rms)\n' % (test_lossf, np.sqrt(test_lossf))) out.write(' %.3f' % np.sqrt(test_lossf)) if best_test_lossf > test_lossf: best_test_lossf = test_lossf np.save('state/best/test_lossf.npy', best_test_lossf) nn.save_weights(best=True) t0 = time.time() print('\nbegin back_prop() at ' + datetime.datetime.now().strftime("%H:%M:%S on %d %B %Y")) print('\n learning rate = %e' % nn.learning_rate) nn.back_prop(testing) if testing: print('\n weights0 |%11s %11s %11s %11s %11s' % quartiles(nn.weights0)) print(' weights1 |%11s %11s %11s %11s %11s' % quartiles(nn.weights1)) print(' weights2 |%11s %11s %11s %11s %11s' % quartiles(nn.weights2)) print(' biasw1 |%11s %11s %11s %11s %11s' % quartiles(nn.biasw1)) print(' biasw2 |%11s %11s %11s %11s %11s' % quartiles(nn.biasw2)) te = time.time() - t0 print('\nback_prop() took %dm %.2fs\n' % (te // 60, te % 60)) last_lossf = lossf out.write('\n') out.close()