learning_rate = 0.07 # 抽出数 epoch = 1000 # パラメータの初期化 network = init_network() # データのランダム抽出 random_datasets = np.random.choice(data_sets, epoch) # 勾配降下の繰り返し for dataset in random_datasets: x, d = dataset['x'], dataset['d'] z1, y = forward(network, x) grad = backward(x, d, z1, y) # パラメータに勾配適用 for key in ('W1', 'W2', 'b1', 'b2'): network[key] -= learning_rate * grad[key] # 誤差 loss = functions.mean_squared_error(d, y) losses.append(loss) print("##### 結果表示 #####") lists = range(epoch) plt.plot(lists, losses, '.') plt.title('loss') # グラフの表示 plt.show()
from pathlib import Path import sys sys.path.append(Path.cwd().parent) import numpy as np from common.functions import mean_squared_error #「2」を正解とする t = [0, 0, 1, 0, 0, 0, 0, 0, 0, 0] #例1:「2」の確率が最も高い場合(0.6) y1 = [0.1, 0.05, 0.6, 0.0, 0.05, 0.1, 0.0, 0.1, 0.0, 0.0] mean_squared_error(np.array(y1), np.array(t)) #例2:「7」の確率が最も高い場合(0.6) y2 = [0.1, 0.05, 0.1, 0.0, 0.05, 0.1, 0.0, 0.6, 0.0, 0.0] mean_squared_error(np.array(y2), np.array(t))
all_loss = 0 # 時系列ループ for t in range(binary_dim): # 入力値 X = np.array([a_bin[ - t - 1], b_bin[ - t - 1]]).reshape(1, -1) # 時刻tにおける正解データ dd = np.array([d_bin[binary_dim - t - 1]]) u[:,t+1] = np.dot(X, W_in) + np.dot(z[:,t].reshape(1, -1), W) z[:,t+1] = functions.sigmoid(u[:,t+1]) y[:,t] = functions.sigmoid(np.dot(z[:,t+1].reshape(1, -1), W_out)) #誤差 loss = functions.mean_squared_error(dd, y[:,t]) delta_out[:,t] = functions.d_mean_squared_error(dd, y[:,t]) * functions.d_sigmoid(y[:,t]) all_loss += loss out_bin[binary_dim - t - 1] = np.round(y[:,t]) for t in range(binary_dim)[::-1]: X = np.array([a_bin[-t-1],b_bin[-t-1]]).reshape(1, -1) delta[:,t] = (np.dot(delta[:,t+1].T, W.T) + np.dot(delta_out[:,t].T, W_out.T)) * functions.d_sigmoid(u[:,t+1]) # 勾配更新 W_out_grad += np.dot(z[:,t+1].reshape(-1,1), delta_out[:,t].reshape(-1,1)) W_grad += np.dot(z[:,t].reshape(-1,1), delta[:,t].reshape(1,-1))
def forward(self, x, t): self.t = t self.y = np.tanh(x) self.loss = mean_squared_error(self.y, self.t) return self.loss
# 4.2.1 평균 제곱 오차(MSE) # 4.2.2 교차 엔트로피 오차(CEE) ############################################################################## import numpy as np from common.functions import mean_squared_error, cross_entropy_error # 정답은 '2' <= 2번 index 값을 1로 세팅 (one-hot encoding) t = [0, 0, 1, 0, 0, 0, 0, 0, 0, 0] ############################################################################## # 예1 : '2'일 확률이 가장 높다고 추정 <= 2번 index 값이 0.6으로 가장 높다. # - 아래 예2 와 비교해서 손실함수의 출력이 더 작다. (매개변수가 더 최적화되어있다는 의미) ############################################################################## # (1) y = [0.1, 0.05, 0.6, 0.0, 0.05, 0.1, 0.0, 0.1, 0.0, 0.0] print(mean_squared_error(np.array(y), np.array(t))) # 0.09750000000000003 # cross_entropy_error 사용하는 경우 정답(t)이 2번째 index이므로 y의 2번째만 계산한 것과 같다. print(cross_entropy_error(np.array(y), np.array(t))) # 0.510825457099338 ############################################################################## # 예2 : '7'일 확률이 가장 높다고 추정 <= 7번 index 값이 0.6으로 가장 높다. ############################################################################## y = [0.1, 0.05, 0.1, 0.0, 0.05, 0.1, 0.0, 0.6, 0.0, 0.0] print(mean_squared_error(np.array(y), np.array(t))) # 0.5975 # cross_entropy_error 사용하는 경우 정답(t)이 2번째 index이므로 y의 2번째만 계산한 것과 같다. print(cross_entropy_error(np.array(y), np.array(t))) # 2.302584092994546
import sys, os sys.path.append(os.pardir) sys.path.append(os.curdir) import numpy as np from common.functions import mean_squared_error t = [0, 0, 1, 0, 0, 0] y = [0.1, 0.05, 0.6, 0.0, 0.05, 0.1] error = mean_squared_error(np.array(y), np.array(t)) print(error)