def forward(self, X, h_old, train=True): m = self.model Wz, Wr, Wh, Wy = m['Wz'], m['Wr'], m['Wh'], m['Wy'] bz, br, bh, by = m['bz'], m['br'], m['bh'], m['by'] X_one_hot = np.zeros(self.D) X_one_hot[X] = 1. X_one_hot = X_one_hot.reshape(1, -1) X = np.column_stack((h_old, X_one_hot)) hz, hz_cache = l.fc_forward(X, Wz, bz) hz, hz_sigm_cache = l.sigmoid_forward(hz) hr, hr_cache = l.fc_forward(X, Wr, br) hr, hr_sigm_cache = l.sigmoid_forward(hr) X_prime = np.column_stack((hr * h_old, X_one_hot)) hh, hh_cache = l.fc_forward(X_prime, Wh, bh) hh, hh_tanh_cache = l.tanh_forward(hh) h = (1. - hz) * h_old + hz * hh y, y_cache = l.fc_forward(h, Wy, by) cache = ( X, X_prime, h_old, hz, hz_cache, hz_sigm_cache, hr, hr_cache, hr_sigm_cache, hh, hh_cache, hh_tanh_cache, h, y_cache ) if not train: y = util.softmax(y) return y, h, cache
def forward(self, X, h, train=True): Wxh, Whh, Why = self.model['Wxh'], self.model['Whh'], self.model['Why'] bh, by = self.model['bh'], self.model['by'] X_one_hot = np.zeros(self.D) X_one_hot[X] = 1. X_one_hot = X_one_hot.reshape(1, -1) hprev = h.copy() h, h_cache = l.tanh_forward(X_one_hot @ Wxh + hprev @ Whh + bh) y, y_cache = l.fc_forward(h, Why, by) cache = (X_one_hot, Whh, h, hprev, y, h_cache, y_cache) if not train: y = util.softmax(y) return y, h, cache
def forward(self, X, state, train=True): m = self.model Wf, Wi, Wc, Wo, Wy = m['Wf'], m['Wi'], m['Wc'], m['Wo'], m['Wy'] bf, bi, bc, bo, by = m['bf'], m['bi'], m['bc'], m['bo'], m['by'] h_old, c_old = state X_one_hot = np.zeros(self.D) X_one_hot[X] = 1. X_one_hot = X_one_hot.reshape(1, -1) X = np.column_stack((h_old, X_one_hot)) hf, hf_cache = l.fc_forward(X, Wf, bf) hf, hf_sigm_cache = l.sigmoid_forward(hf) hi, hi_cache = l.fc_forward(X, Wi, bi) hi, hi_sigm_cache = l.sigmoid_forward(hi) ho, ho_cache = l.fc_forward(X, Wo, bo) ho, ho_sigm_cache = l.sigmoid_forward(ho) hc, hc_cache = l.fc_forward(X, Wc, bc) hc, hc_tanh_cache = l.tanh_forward(hc) c = hf * c_old + hi * hc c, c_tanh_cache = l.tanh_forward(c) h = ho * c y, y_cache = l.fc_forward(h, Wy, by) cache = ( X, hf, hi, ho, hc, hf_cache, hf_sigm_cache, hi_cache, hi_sigm_cache, ho_cache, ho_sigm_cache, hc_cache, hc_tanh_cache, c_old, c, c_tanh_cache, y_cache ) if not train: y = util.softmax(y) return y, (h, c), cache
def predict_proba(self, X): # print(X) score, _ = self.forward(X, False) return util.softmax(score)