def forward(self): y = np.exp(self.x.d - self.x.d.max(axis=-1, keepdims=True)) y /= y.sum(axis=-1, keepdims=True) self.y.d = y self.y_2d = self.y.d.reshape(-1, self.y.d.shape[-1]) self.t_1d = self.t.d.reshape(-1) self.size = self.y_2d.shape[0] loss = self.y_2d[np.arange(self.size), self.t_1d] self.loss.d = -np.sum(np.log(loss + 1e-7)) / self.size
def train_epoch(epoch): count = 0 total_loss = 0 for i, loss in trainer: total_loss += loss count += 1 if i % 20 == 0: ppl = np.exp(total_loss / count) if i % 200 == 0: print(epoch + 1, i + 1, ppl) count = 0 total_loss = 0 return ppl
def validate(): lstm1.reset_state() lstm2.reset_state() model.set_train(False) count = 0 total_loss = 0 for x, t in data_val: model.set_data(x, t) model.forward() total_loss += model.loss count += 1 lstm1.reset_state() lstm2.reset_state() model.set_train(True) return np.exp(total_loss / count)
def perplexity(self) -> float: return np.exp(self.loss)
def forward(self): self.y.d = 1 / (1 + np.exp(-self.x.d)) y = self.y.d.reshape(-1) self.size = y.shape[0] loss = np.c_[1 - y, y][np.arange(self.size), self.t.d.reshape(-1)] self.loss.d = -np.sum(np.log(loss + 1e-7)) / self.size
for layer in model.layers: if layer.name.startswith('Dropout'): print(layer.name, layer.dropout_ratio.d) # type:ignore # 重みの共有をします。 em = model.layers[0] affine = model.layers[-2] affine.W.share_variable(em.W, transpose=True) # type:ignore trainer.build() for v in trainer.optimizer.variables: print(v) # 訓練を実施します。 trainer.fit(data) it = iter(trainer) loss = next(it)[1] print(data.iteration, int(np.exp(loss))) for i in range(8): loss = 0.0 for _ in range(20): loss += next(it)[1] loss /= 20.0 print(data.iteration, int(np.exp(loss))) # 「ゼロから作るDeep Learning ❷」の`ch06/train_better_rnnlm.py`の実行結果の冒頭を記載しま # す。 # ~~~bash # | epoch 1 | iter 1 / 1327 | time 2[s] | perplexity 9999.86 # | epoch 1 | iter 21 / 1327 | time 60[s] | perplexity 4233.17 # | epoch 1 | iter 41 / 1327 | time 116[s] | perplexity 1645.35
trainer.optimizer.learning_rate = 0.1 model = trainer.model for layer in model.layers: print(layer) # 重みの初期値を設定します。 from ivory.common.context import np # isort:skip model.init(std="xavier") for p in model.weights: if p.name != "b": std1, std2 = f"{p.d.std():.03f}", f"{np.sqrt(1/p.d.shape[0]):.03f}" print(p.layer.name, p.name, std1, std2) # モデルに代入し、パープレキシティを計算してみます。 trainer.set_data(*data[0]) model.forward() print(model.perplexity) # 訓練を実施します。 trainer.fit(data) df = trainer.to_frame() df["epoch"] = df.iteration // len(data) df = df.groupby("epoch").mean().reset_index() df["ppl"] = np.exp(df.loss) df.tail() # 可視化します import altair as alt # isort:skip alt.Chart(df).mark_line().encode(x="epoch", y="ppl").properties(width=300, height=200)
def forward(self): self.y.d = 1 / (1 + np.exp(-self.x.d))
def sigmoid(x): return 1 / (1 + np.exp(-x))
def softmax(x): y = np.exp(x - x.max()) return y / y.sum()
("affine", 10000, "softmax_cross_entropy"), ] model = sequential(net) # 重みの共有をします。 em = model.layers[0] affine = model.layers[-2] affine.W.share_variable(em.W, transpose=True) # type:ignore model.build() # 学習済みの重みを読み出します。 import pickle # isort:skip with open(os.path.join(directory, 'better_rnnlm.pkl'), 'rb') as f: weights = pickle.load(f) for v, weight in zip(model.weight_variables, weights): v.data = np.asarray(weight) # テストデータでのパープレキシティを求めます。 count = 0 total_loss = 0.0 for x, t in data: model.set_data(x, t) model.forward() total_loss += model.loss count += 1 print(np.exp(total_loss / count)) # 「ゼロから作るDeep Learning ❷」と同等の結果が得られました。