def __forward(self, is_training, src_batch, trg_batch = None, generation_limit = None): m = self.__model tanh = functions.tanh lstm = functions.lstm batch_size = len(src_batch) src_len = len(src_batch[0]) #src_stoi = self.__src_vocab.stoi trg_stoi = self.__trg_vocab.stoi trg_itos = self.__trg_vocab.itos s_c = wrapper.zeros((batch_size, self.__n_hidden)) # encoding s_x = wrapper.make_var(src_batch) #s_x = wrapper.make_var([src_stoi('</s>') for _ in range(batch_size)], dtype=np.int32) #s_i = tanh(m.w_xi(s_x)) #s_c, s_p = lstm(s_c, m.w_ip(s_i)) #for l in reversed(range(src_len)): # s_x = wrapper.make_var([src_stoi(src_batch[k][l]) for k in range(batch_size)], dtype=np.int32) # s_i = tanh(m.w_xi(s_x)) # s_c, s_p = lstm(s_c, m.w_ip(s_i) + m.w_pp(s_p)) s_c, s_q = lstm(s_c, m.w_x(s_x)) hyp_batch = [[] for _ in range(batch_size)] # decoding if is_training: accum_loss = wrapper.zeros(()) trg_len = len(trg_batch[0]) for l in range(trg_len): s_j = tanh(m.w_qj(s_q)) r_y = m.w_jy(s_j) s_t = wrapper.make_var([trg_stoi(trg_batch[k][l]) for k in range(batch_size)], dtype=np.int32) accum_loss += functions.softmax_cross_entropy(r_y, s_t) output = wrapper.get_data(r_y).argmax(1) for k in range(batch_size): hyp_batch[k].append(trg_itos(output[k])) s_c, s_q = lstm(s_c, m.w_yq(s_t) + m.w_qq(s_q)) return hyp_batch, accum_loss else: while len(hyp_batch[0]) < generation_limit: s_j = tanh(m.w_qj(s_q)) r_y = m.w_jy(s_j) output = wrapper.get_data(r_y).argmax(1) for k in range(batch_size): hyp_batch[k].append(trg_itos(output[k])) if all(hyp_batch[k][-1] == '</s>' for k in range(batch_size)): break s_y = wrapper.make_var(output, dtype=np.int32) s_c, s_q = lstm(s_c, m.w_yq(s_y) + m.w_qq(s_q)) return hyp_batch
def __forward(self, is_training, text): m = self.__model tanh = functions.tanh lstm = functions.lstm letters, labels = self.__make_input(is_training, text) n_letters = len(letters) accum_loss = wrapper.zeros(()) if is_training else None hidden_zeros = wrapper.zeros((1, self.__n_hidden)) # embedding list_e = [] for i in range(n_letters): s_x = wrapper.make_var([letters[i]], dtype=np.int32) list_e.append(tanh(m.w_xe(s_x))) # forward encoding s_a = hidden_zeros c = hidden_zeros list_a = [] for i in range(n_letters): c, s_a = lstm(c, m.w_ea(list_e[i]) + m.w_aa(s_a)) list_a.append(s_a) # backward encoding s_b = hidden_zeros c = hidden_zeros list_b = [] for i in reversed(range(n_letters)): c, s_b = lstm(c, m.w_eb(list_e[i]) + m.w_bb(s_b)) list_b.append(s_b) # segmentation scores = [] for i in range(n_letters - 1): s_y = tanh(m.w_ay1(list_a[i]) + m.w_by1(list_b[i]) + m.w_ay2(list_a[i + 1]) + m.w_by2(list_b[i + 1])) scores.append(float(wrapper.get_data(s_y))) if is_training: s_t = wrapper.make_var([[labels[i]]]) accum_loss += functions.mean_squared_error(s_y, s_t) return scores, accum_loss
def __forward(self, is_training, text): m = self.__model tanh = functions.tanh letters, labels = self.__make_input(is_training, text) scores = [] accum_loss = wrapper.zeros(()) if is_training else None for n in range(len(letters) - 2 * self.__n_context + 1): s_hu = wrapper.zeros((1, self.__n_hidden)) for k in range(2 * self.__n_context): wid = k * len(self.__vocab) + letters[n + k] s_x = wrapper.make_var([wid], dtype=np.int32) s_hu += m.w_xh(s_x) s_hv = tanh(s_hu) s_y = tanh(m.w_hy(s_hv)) scores.append(float(wrapper.get_data(s_y))) if is_training: s_t = wrapper.make_var([[labels[n]]]) accum_loss += functions.mean_squared_error(s_y, s_t) return scores, accum_loss
def __forward(self, is_training, src_batch, trg_batch = None, generation_limit = None): m = self.__model tanh = functions.tanh lstm = functions.lstm batch_size = len(src_batch) hidden_size = self.__n_hidden src_len = len(src_batch[0]) trg_len = len(trg_batch[0]) - 1 if is_training else generation_limit src_stoi = self.__src_vocab.stoi trg_stoi = self.__trg_vocab.stoi trg_itos = self.__trg_vocab.itos hidden_zeros = wrapper.zeros((batch_size, hidden_size)) sum_e_zeros = wrapper.zeros((batch_size, 1)) # make embedding list_x = [] for l in range(src_len): s_x = wrapper.make_var([src_stoi(src_batch[k][l]) for k in range(batch_size)], dtype=np.int32) list_x.append(s_x) # forward encoding c = hidden_zeros s_a = hidden_zeros list_a = [] for l in range(src_len): s_x = list_x[l] s_i = tanh(m.w_xi(s_x)) c, s_a = lstm(c, m.w_ia(s_i) + m.w_aa(s_a)) list_a.append(s_a) # backward encoding c = hidden_zeros s_b = hidden_zeros list_b = [] for l in reversed(range(src_len)): s_x = list_x[l] s_i = tanh(m.w_xi(s_x)) c, s_b = lstm(c, m.w_ib(s_i) + m.w_bb(s_b)) list_b.insert(0, s_b) # decoding c = hidden_zeros s_p = tanh(m.w_ap(list_a[-1]) + m.w_bp(list_b[0])) s_y = wrapper.make_var([trg_stoi('<s>') for k in range(batch_size)], dtype=np.int32) hyp_batch = [[] for _ in range(batch_size)] accum_loss = wrapper.zeros(()) if is_training else None #for n in range(src_len): # six.print_(src_batch[0][n], end=' ') #six.print_() for l in range(trg_len): # calculate attention weights list_e = [] sum_e = sum_e_zeros for n in range(src_len): s_w = tanh(m.w_aw(list_a[n]) + m.w_bw(list_b[n]) + m.w_pw(s_p)) r_e = functions.exp(m.w_we(s_w)) #list_e.append(functions.concat(r_e for _ in range(self.__n_hidden))) list_e.append(r_e) sum_e += r_e #sum_e = functions.concat(sum_e for _ in range(self.__n_hidden)) # make attention vector s_c = hidden_zeros s_d = hidden_zeros for n in range(src_len): s_e = list_e[n] / sum_e #s_c += s_e * list_a[n] #s_d += s_e * list_b[n] s_c += functions.reshape(functions.batch_matmul(list_a[n], s_e), (batch_size, hidden_size)) s_d += functions.reshape(functions.batch_matmul(list_b[n], s_e), (batch_size, hidden_size)) #zxcv = wrapper.get_data(s_e)[0][0] #if zxcv > 0.9: asdf='#' #elif zxcv > 0.7: asdf='*' #elif zxcv > 0.3: asdf='+' #elif zxcv > 0.1: asdf='.' #else: asdf=' ' #six.print_(asdf * len(src_batch[0][n]), end=' ') # generate next word c, s_p = lstm(c, m.w_yp(s_y) + m.w_pp(s_p) + m.w_cp(s_c) + m.w_dp(s_d)) r_y = m.w_py(s_p) output = wrapper.get_data(r_y).argmax(1) for k in range(batch_size): hyp_batch[k].append(trg_itos(output[k])) #six.print_(hyp_batch[0][-1]) if is_training: s_t = wrapper.make_var([trg_stoi(trg_batch[k][l + 1]) for k in range(batch_size)], dtype=np.int32) accum_loss += functions.softmax_cross_entropy(r_y, s_t) s_y = s_t else: if all(hyp_batch[k][-1] == '</s>' for k in range(batch_size)): break s_y = wrapper.make_var(output, dtype=np.int32) return hyp_batch, accum_loss