def check_forward(self, x1_data, x2_data, x3_data): xp = self.link.xp x1 = chainer.Variable(x1_data) if self.input_variable else x1_data h1 = self.link(x1) with cuda.get_device_from_array(x1_data): c0 = chainer.Variable(xp.zeros((len(self.x1), self.out_size), dtype=self.x1.dtype)) c1_expect, h1_expect = functions.lstm(c0, self.link.upward(x1)) testing.assert_allclose(h1.data, h1_expect.data) testing.assert_allclose(self.link.h.data, h1_expect.data) testing.assert_allclose(self.link.c.data, c1_expect.data) batch = len(x2_data) x2 = chainer.Variable(x2_data) if self.input_variable else x2_data h1_in, h1_rest = functions.split_axis( self.link.h.data, [batch], axis=0) y2 = self.link(x2) with cuda.get_device_from_array(x1): c2_expect, y2_expect = \ functions.lstm(c1_expect, self.link.upward(x2) + self.link.lateral(h1_in)) testing.assert_allclose(y2.data, y2_expect.data) testing.assert_allclose(self.link.h.data[:batch], y2_expect.data) testing.assert_allclose(self.link.h.data[batch:], h1_rest.data) x3 = chainer.Variable(x3_data) if self.input_variable else x3_data h2_rest = self.link.h y3 = self.link(x3) c3_expect, y3_expect = \ functions.lstm(c2_expect, self.link.upward(x3)) testing.assert_allclose(y3.data, y3_expect.data) testing.assert_allclose(self.link.h.data, h2_rest.data)
def _encode(self, x_list): batch_size = len(x_list[0]) source_length = len(x_list) # Encoding fc = bc = f = b = _zeros((batch_size, self.hidden_size)) i_list = [self.x_i(_mkivar(x)) for x in x_list] f_list = [] b_list = [] for i in i_list: fc, f = F.lstm(fc, self.i_f(i) + self.f_f(f)) f_list.append(f) for i in reversed(i_list): bc, b = F.lstm(bc, self.i_b(i) + self.b_b(b)) b_list.append(b) b_list.reverse() # Making concatenated matrix # {f,b}_mat: shape = [batch, srclen, hidden] f_mat = F.concat([F.expand_dims(f, 1) for f in f_list], 1) b_mat = F.concat([F.expand_dims(b, 1) for b in b_list], 1) # fb_mat: shape = [batch, srclen, 2 * hidden] fb_mat = F.concat([f_mat, b_mat], 2) # fbe_mat: shape = [batch * srclen, atten] fbe_mat = self.fb_e( F.reshape(fb_mat, [batch_size * source_length, 2 * self.hidden_size])) return fb_mat, fbe_mat, fc, bc, f_list[-1], b_list[0]
def check_forward(self, x1_data, x2_data, x3_data): xp = self.link.xp x1 = chainer.Variable(x1_data) if self.input_variable else x1_data h1 = self.link(x1) with cuda.get_device_from_array(x1_data): c0 = chainer.Variable( xp.zeros((len(self.x1), self.out_size), dtype=self.x1.dtype)) c1_expect, h1_expect = functions.lstm(c0, self.link.upward(x1)) assert h1.data == pytest.approx(h1_expect.data) assert self.link.h.data == pytest.approx(h1_expect.data) assert self.link.c.data == pytest.approx(c1_expect.data) batch = len(x2_data) x2 = chainer.Variable(x2_data) if self.input_variable else x2_data h1_in, h1_rest = functions.split_axis(self.link.h.data, [batch], axis=0) y2 = self.link(x2) with cuda.get_device_from_array(x1): c2_expect, y2_expect = \ functions.lstm(c1_expect, self.link.upward(x2) + self.link.lateral(h1_in)) assert y2.data == pytest.approx(y2_expect.data) assert self.link.h.data[:batch] == pytest.approx(y2_expect.data) assert self.link.h.data[batch:] == pytest.approx(h1_rest.data) x3 = chainer.Variable(x3_data) if self.input_variable else x3_data h2_rest = self.link.h y3 = self.link(x3) _, y3_expect = \ functions.lstm(c2_expect, self.link.upward(x3)) assert y3.data == pytest.approx(y3_expect.data) assert self.link.h.data == pytest.approx(h2_rest.data)
def __call__(self, words): ### forward LSTM c = chainer.Variable(np.zeros((1, self.n_hid), dtype=np.float32)) h = chainer.Variable(np.zeros((1, self.n_hid), dtype=np.float32)) hs = [] for word in words: e = self.embed(word) lstm_in = self.xh(e) + self.hh(h) c, h = F.lstm(c, lstm_in) hs.append(h) ### backward LSTM c = chainer.Variable(np.zeros((1, self.n_hid), dtype=np.float32)) h = chainer.Variable(np.zeros((1, self.n_hid), dtype=np.float32)) hs_b = [] for word in reversed(words): e = self.embed(word) lstm_in = self.xh_b(e) + self.hh_b(h) c, h = F.lstm(c, lstm_in) hs_b.append(h) hs_b.reverse() ### MLP ys = [] for h_i, hs_b_i in zip(hs, hs_b): o1 = self.ho(F.concat([h_i, hs_b_i])) y_i = self.o(F.sigmoid(o1)) ys.append(y_i) return ys
def forward(self, x_data, y_data, train=True): x = Variable(np.array(x_data, dtype=np.float32), volatile=not train) t = Variable(y_data, volatile=not train) state_c1 = Variable(LSTMcomponent.inputs["state_c1"], volatile=not train) state_h1 = Variable(LSTMcomponent.inputs["state_h1"], volatile=not train) state_c2 = Variable(LSTMcomponent.inputs["state_c2"], volatile=not train) state_h2 = Variable(LSTMcomponent.inputs["state_h2"], volatile=not train) #h0 = self.l0(x) h1_in = self.l1_x(F.dropout(x, train=train)) + self.l1_h(state_h1) c1, h1 = F.lstm(c2, h1_in) h2_in = self.l2_x(F.dropout(state_h1, train=train)) + self.l2_h(state_h2) c2, h2 = F.lstm(state_c1, h2_in) y = self.l3(F.dropout(h2, train=train)) LSTMcomponent.inputs["state_c1"] = c1 LSTMcomponent.inputs["state_h1"] = h1 LSTMcomponent.inputs["state_c2"] = c2 LSTMcomponent.inputs["state_h2"] = h2 loss = F.softmax_cross_entropy(y, t) accuracy = F.accuracy(y, t) return loss, accuracy
def forward_one_step(self, x_data, y_data, state, train=True, dropout_ratio=0.5): x = Variable(x_data) t = Variable(y_data) h0 = self.embed(x) h1_in = self.l1_x(F.dropout(h0, ratio=dropout_ratio)) + self.l1_h( state['h1']) c1, h1 = F.lstm(state['c1'], h1_in) h2_in = self.l2_x(F.dropout(h1, ratio=dropout_ratio)) + self.l2_h( state['h2']) c2, h2 = F.lstm(state['c2'], h2_in) y = self.l3(F.dropout(h2, ratio=dropout_ratio)) state = {'c1': c1, 'h1': h1, 'c2': c2, 'h2': h2} if train: return state, F.softmax_cross_entropy(y, t) else: return state, F.softmax(y)
def check_forward(self, x1_data, x2_data, x3_data): xp = self.link.xp x1 = chainer.Variable(x1_data) if self.input_variable else x1_data h1 = self.link(x1) c0 = chainer.Variable( xp.zeros((len(self.x1), self.out_size), dtype=self.x1.dtype)) c1_expect, h1_expect = functions.lstm(c0, self.link.upward(x1)) testing.assert_allclose(h1.data, h1_expect.data) testing.assert_allclose(self.link.h.data, h1_expect.data) testing.assert_allclose(self.link.c.data, c1_expect.data) batch = len(x2_data) x2 = chainer.Variable(x2_data) if self.input_variable else x2_data h1_in, h1_rest = functions.split_axis(self.link.h.data, [batch], axis=0) y2 = self.link(x2) c2_expect, y2_expect = \ functions.lstm(c1_expect, self.link.upward(x2) + self.link.lateral(h1_in)) testing.assert_allclose(y2.data, y2_expect.data) testing.assert_allclose(self.link.h.data[:batch], y2_expect.data) testing.assert_allclose(self.link.h.data[batch:], h1_rest.data) x3 = chainer.Variable(x3_data) if self.input_variable else x3_data h2_rest = self.link.h y3 = self.link(x3) c3_expect, y3_expect = \ functions.lstm(c2_expect, self.link.upward(x3)) testing.assert_allclose(y3.data, y3_expect.data) testing.assert_allclose(self.link.h.data, h2_rest.data)
def forward_one( self, word: Variable, state: State, dropout: bool=False, train: bool=False ) -> Tuple[Variable, State]: y0 = self.embed(word) if dropout: h1_in = self.l1(F.dropout(y0, train=train)) + self.h1(state["h1"]) c1, h1 = F.lstm(state["c1"], h1_in) h2_in = self.l2(F.dropout(h1, train=train)) + self.h2(state["h2"]) c2, h2 = F.lstm(state["c2"], h2_in) h3 = self.l3(F.dropout(h2, train=train)) else: h1_in = self.l1(y0) + self.h1(state["h1"]) c1, h1 = F.lstm(state["c1"], h1_in) h2_in = self.l2(h1) + self.h2(state["h2"]) c2, h2 = F.lstm(state["c2"], h2_in) h3 = self.l3(h2) new_state = { "h1": h1, "c1": c1, "h2": h2, "c2": c2, } return h3, new_state
def generate_z_x(self, seq_length_per_z, sample_z): print("sample_z shape: " + str(sample_z.shape)) # output = np.zeros((seq_length_per_z * sample_z.shape[0], self.recog_in_h.W.data.shape[1])) output = [] state = self.make_initial_state() for epoch in xrange(sample_z.shape[0]): # gen_out = np.zeros((seq_length_per_z, output.shape[1])) z = Variable(sample_z[epoch].reshape((1, sample_z.shape[1]))) print("epoch: " + str(epoch) + " z: " + str(z.data)) # =====[ Step 2: Compute p(x|z) - decoding step ]===== # Initial step # output = [] h_in = self.gen_z_h(z) c_t, h_t = F.lstm(state["c_gen"], h_in) state.update({"c_gen": c_t, "h_gen": h_t}) rec_loss = Variable(np.zeros((), dtype=np.float32)) for i in range(seq_length_per_z): # Get output and loss x_t = self.output(h_t) print("size of x-prime's output data sequence: " + str(x_t.data.shape)) # reshape data from (1,88) to (88) output.append(x_t.data.reshape(x_t.data.shape[1:])) # Get next hidden state h_in = self.gen_x_h(x_t) + self.gen_h_h(state["h_gen"]) c_t, h_t = F.lstm(state["c_gen"], h_in) state.update({"c_gen": c_t, "h_gen": h_t}) return np.array(output)
def decode(self, p, t=None): """ @param p @param t ground truth """ y = self.w_py(p) if self.phase is Seq2Seq.Train: loss = F.mean_squared_error(y, t) self.cell_state, p = F.lstm( self.cell_state, self.w_yp(t) + self.w2_pp(self.previous_p) ) self.previous_p = p return p, loss elif self.phase is Seq2Seq.Valid: loss = F.mean_squared_error(y, t) self.cell_state, p = F.lstm( self.cell_state, self.w_yp(y) + self.w2_pp(self.previous_p) ) self.previous_p = p return p, loss else: # Test self.cell_state, p = F.lstm( self.cell_state, self.w_yp(y) + self.w2_pp(self.previous_p) ) self.previous_p = p return p, y
def forward_one(self, word: Variable, state: State, dropout: bool = False, train: bool = False) -> Tuple[Variable, State]: y0 = self.embed(word) if dropout: h1_in = self.l1(F.dropout(y0, train=train)) + self.h1(state["h1"]) c1, h1 = F.lstm(state["c1"], h1_in) h2_in = self.l2(F.dropout(h1, train=train)) + self.h2(state["h2"]) c2, h2 = F.lstm(state["c2"], h2_in) h3 = self.l3(F.dropout(h2, train=train)) else: h1_in = self.l1(y0) + self.h1(state["h1"]) c1, h1 = F.lstm(state["c1"], h1_in) h2_in = self.l2(h1) + self.h2(state["h2"]) c2, h2 = F.lstm(state["c2"], h2_in) h3 = self.l3(h2) new_state = { "h1": h1, "c1": c1, "h2": h2, "c2": c2, } return h3, new_state
def __call__(self, x, y, state, train=True, target=True): if train: h = Variable(x.reshape(self.batchsize, 12), volatile=not train) else: h = Variable(x, volatile=not train) t = Variable(y.flatten(), volatile=not train) h0 = F.relu(self.l0(h)) if target == False: data = h0.data self.data_first.append(data) h1_in = self.l1_x(h0) + self.l1_h(state['h1']) h1_in = F.dropout(F.tanh(h1_in), train=train) c1, h1 = F.lstm(state['c1'], h1_in) h2_in = F.dropout(F.tanh(self.l2_x(h1)), train=train) + self.l2_h( state['h2']) c2, h2 = F.lstm(state['c2'], h2_in) if target == False: data = h1.data self.data_hidden.append(data) y = self.l3(h2) if target == False: data = y.data self.data_output.append(data) state = {'c1': c1, 'h1': h1, 'c2': c2, 'h2': h2} self.loss = F.softmax_cross_entropy(y, t) return state, self.loss
def __call__(self,x,y,state,train=True,target=True): if train: h = Variable(x.reshape(self.batchsize,12), volatile=not train) else: h = Variable(x, volatile=not train) t = Variable(y.flatten(), volatile=not train) h0 = F.relu(self.l0(h)) if target == False: data = h0.data self.data_first.append(data) h1_in = self.l1_x(h0) + self.l1_h(state['h1']) h1_in = F.dropout(F.tanh(h1_in),train=train) c1, h1 = F.lstm(state['c1'], h1_in) h2_in = F.dropout(F.tanh(self.l2_x(h1)), train=train) + self.l2_h(state['h2']) c2, h2 = F.lstm(state['c2'], h2_in) if target == False: data = h1.data self.data_hidden.append(data) y = self.l3(h2) if target ==False: data = y.data self.data_output.append(data) state = {'c1': c1, 'h1': h1,'c2':c2,'h2':h2} self.loss = F.softmax_cross_entropy(y,t) return state,self.loss
def forward_one_step(self, x_data, c_data, y_data, state, train=True): x = chainer.Variable(x_data, volatile=not train) t = chainer.Variable(y_data, volatile=not train) c = chainer.Variable(c_data, volatile=not train) h1_in = self.l1_first(x) + self.l1_recur(state['h1']) + self.l1_w(state['w']) c1, h1 = F.lstm(state['c1'], h1_in) # soft window ws = F.exp(self.lw(h1)) w_mixws, w_gains, w_means = split_axis_by_widths(ws, 3) w_means += state['w_means'] w = self.forward_window(w_mixws, w_gains, w_means, c) h2_in = self.l2_first(x) + self.l2_recur(state['h2']) + self.l1_w(w) + self.l2_input(h1) c2, h2 = F.lstm(state['c2'], h2_in) h3_in = self.l3_first(x) + self.l3_recur(state['h3']) + self.l1_w(w) + self.l3_input(h2) c3, h3 = F.lstm(state['c3'], h3_in) y = self.l4(F.concat(h1, h2, h3)) state = {'c1': c1, 'h1': h1, 'c2': c2, 'h2': h2, 'c3': c3, 'h3': h3, 'w': w, 'w_means': w_means} return state, loss_func(self.noutput_gauss, y, t)
def predict(self, x_data): x = Variable(x_data) h1_in = self.l1_x(x) + self.l1_h(state['h1']) c1, h1 = F.lstm(state['c1'], h1_in) h2_in = self.l2_x(h1) + self.l2_h(state['h2']) c2, h2 = F.lstm(state['c2'], h2_in) y = F.softmax(self.l3(h2)) return y.data
def forward_one_step_embed(x_data, state): x = chainer.Variable(x_data, volatile=True) h0 = model.embed(x) h1_in = model.l1_x(F.dropout(h0, train=False)) + model.l1_h(state['h1']) c1, h1 = F.lstm(state['c1'], h1_in) h2_in = model.l2_x(F.dropout(h1, train=False)) + model.l2_h(state['h2']) c2, h2 = F.lstm(state['c2'], h2_in) return {'c1': c1, 'h1': h1, 'c2': c2, 'h2': h2}
def predict(self, x_data, state): x = Variable(x_data.astype(np.int32), volatile=True) h0 = self.embed(x) h1_in = self.l1_x(h0) + self.l1_h(state['h1']) c1, h1 = F.lstm(state['c1'], h1_in) h2_in = self.l2_x(h1) + self.l2_h(state['h2']) c2, h2 = F.lstm(state['c2'], h2_in) y = self.l3(h2) state = {'c1': c1, 'h1': h1, 'c2': c2, 'h2': h2}
def forward_one_predict(x_data, state, train=False): # Neural net architecture x = chainer.Variable(x_data, volatile=not train) h0 = model.embed(x) h1_in = model.l1_x(F.dropout(h0, train=train)) + model.l1_h(state['h1']) c1, h1 = F.lstm(state['c1'], h1_in) h2_in = model.l2_x(F.dropout(h1, train=train)) + model.l2_h(state['h2']) c2, h2 = F.lstm(state['c2'], h2_in) y = model.l3(F.dropout(h2, train=train)) state = {'c1': c1, 'h1': h1, 'c2': c2, 'h2': h2} return state, F.softmax(y).data
def _forward_one_step(self, x_data, state, train=True): # Neural net architecture x = chainer.Variable(x_data, volatile=not train) h0 = self.model.embed(x) h1_in = self.model.l1_x(F.dropout(h0, train=train)) + self.model.l1_h(state['h1']) c1, h1 = F.lstm(state['c1'], h1_in) h2_in = self.model.l2_x(F.dropout(h1, train=train)) + self.model.l2_h(state['h2']) c2, h2 = F.lstm(state['c2'], h2_in) y = self.model.l3(F.dropout(h2, train=train)) state = {'c1': c1, 'h1': h1, 'c2': c2, 'h2': h2} return state, y
def __call__(self, word): x_list = [XP.iarray([min(ord(x), 0x7f)]) for x in word] ac = self.__EMBED_ZEROS a = self.__EMBED_ZEROS for x in x_list: ac, a = functions.lstm(ac, self.w_xa(x) + self.w_aa(a)) bc = self.__EMBED_ZEROS b = self.__EMBED_ZEROS for x in reversed(x_list): bc, b = functions.lstm(bc, self.w_xb(x) + self.w_bb(b)) return a, b
def __call__(self, word): x_list = [self.__char_vram[min(ord(x), 0x7f)] for x in word] ac = self.__EMBED_ZEROS a = self.__EMBED_ZEROS for x in x_list: ac, a = functions.lstm(ac, self.w_xa(x) + self.w_aa(a)) bc = self.__EMBED_ZEROS b = self.__EMBED_ZEROS for x in reversed(x_list): bc, b = functions.lstm(bc, self.w_xb(x) + self.w_bb(b)) return a, b
def predict(self, x_data, state, dropout_ratio=0.5): x = Variable(x_data) h0 = self.embed(x) h1_in = self.l1_x(F.dropout(h0, ratio=dropout_ratio)) + self.l1_h( state['h1']) c1, h1 = F.lstm(state['c1'], h1_in) h2_in = self.l2_x(F.dropout(h1, ratio=dropout_ratio)) + self.l2_h( state['h2']) c2, h2 = F.lstm(state['c2'], h2_in) y = self.l3(F.dropout(h2, ratio=dropout_ratio)) state = {'c1': c1, 'h1': h1, 'c2': c2, 'h2': h2} return state, F.softmax(y)
def forward(model, source_sentence, target_sentence, training): # convert word to ID, add End of Sentence source = model.src_vocabulary.convert(source_sentence) if target_sentence: target = model.trg_vocabulary.convert(target_sentence) c = Variable(np.zeros((1, model.hidden_size), dtype=np.float32)) p = Variable(np.zeros((1, model.hidden_size), dtype=np.float32)) # encoder for word_id in source[::-1]: x = Variable(np.array(word_id, dtype=np.int32)) e = model.w_xe(x) p1 = model.w_ep(e) p2 = model.w_pp(p) # ( W*x + W*h ) lstm_input = p1 + p2 # メモリセルと隠れ層の更新 c, p = F.lstm(c, lstm_input) # decoder EOS = model.trg_vocabulary.word_to_id("<EOS>") q = p y = Variable(np.array([EOS], dtype=np.int32)) if training: loss = Variable(np.zeros((), dtype=np.float32)) for word_id in target: e = model.w_ey(y) lstm_input = model.w_qq(q) + model.w_qe(e) c, q = F.lstm(c, lstm_input) y = model.w_yq(q) t = Variable(np.array(word_id, dtype=np.int32)) loss += F.softmax_cross_entropy(y, t) y = t return loss else: sentence = [] while len(sentence) < 100: e = model.w_ey(y) lstm_input = model.w_qq(q) + model.w_qe(e) c, q = F.lstm(c, lstm_input) y = model.w_yq(q) word_id = np.argmax(y.data, axis=1) y = Variable(np.array(word_id, dtype=np.int32)) if word_id[0] == EOS: sentence.append(model.trg_vocabulary.id_to_word(word_id[0])) break sentence.append(model.trg_vocabulary.id_to_word(word_id[0])) return sentence
def forward_one_step(x_data, y_data, state, train=True): # Neural net architecture x = chainer.Variable(x_data, volatile=not train) t = chainer.Variable(y_data, volatile=not train) h0 = model.embed(x) h1_in = model.l1_x(F.dropout(h0, train=train)) + model.l1_h(state['h1']) c1, h1 = F.lstm(state['c1'], h1_in) h2_in = model.l2_x(F.dropout(h1, train=train)) + model.l2_h(state['h2']) c2, h2 = F.lstm(state['c2'], h2_in) y = model.l3(F.dropout(h2, train=train)) state = {'c1': c1, 'h1': h1, 'c2': c2, 'h2': h2} return state, F.softmax_cross_entropy(y, t)
def forward_one_step_lstm(model, state, cur_word, next_word, train=True): x = Variable(cur_word, volatile=not train) t = Variable(next_word, volatile=not train) h0 = model.embed(x) h1_in = model.l1_x(F.tanh(h0)) + model.l1_h(state["h1"]) c1, h1 = F.lstm(state["c1"], h1_in) h2_in = model.l2_x(F.tanh(h1)) + model.l2_h(state["h2"]) c2, h2 = F.lstm(state["c2"], h2_in) y = model.l3(F.tanh(h2)) state = {"c1": c1, "h1": h1, "c2": c2, "h2":h2} loss = F.softmax_cross_entropy(y, t) return state, loss
def predict(self, x_data, state): x = Variable(x_data, volatile=True) h0 = self.embed(x) h1_in = self.l1_x(h0) + self.l1_h(state['h1']) c1, h1 = F.lstm(state['c1'], h1_in) h2_in = self.l2_x(h1) + self.l2_h(state['h2']) c2, h2 = F.lstm(state['c2'], h2_in) y = self.l3(h2) state = {'c1': c1, 'h1': h1, 'c2': c2, 'h2': h2} return state, F.softmax(y)
def _forward_one_step(self, x_data, state, train=True): # Neural net architecture x = chainer.Variable(x_data, volatile=not train) h0 = self.model.embed(x) h1_in = self.model.l1_x(F.dropout(h0, train=train)) + self.model.l1_h( state['h1']) c1, h1 = F.lstm(state['c1'], h1_in) h2_in = self.model.l2_x(F.dropout(h1, train=train)) + self.model.l2_h( state['h2']) c2, h2 = F.lstm(state['c2'], h2_in) y = self.model.l3(F.dropout(h2, train=train)) state = {'c1': c1, 'h1': h1, 'c2': c2, 'h2': h2} return state, y
def __call__(self, word): x_list = [XP.iarray([min(ord(x), 0x7f)]) for x in word] ac = self.__EMBED_ZEROS a = self.__EMBED_ZEROS for x in x_list: ac, a = functions.lstm(ac, self.w_xa(x) + self.w_aa(a)) a = XP.dropout(a) bc = self.__EMBED_ZEROS b = self.__EMBED_ZEROS for x in reversed(x_list): bc, b = functions.lstm(bc, self.w_xb(x) + self.w_bb(b)) b = XP.dropout(b) return a, b
def forward_one_step(x_data, state, train=True): if args.gpu >= 0: x_data = cuda.to_gpu(x_data) x = chainer.Variable(x_data, volatile=not train) h0 = model.embed(x) h1_in = model.l1_x(F.dropout(h0, train=train)) + model.l1_h(state['h1']) c1, h1 = F.lstm(state['c1'], h1_in) h2_in = model.l2_x(F.dropout(h1, train=train)) + model.l2_h(state['h2']) c2, h2 = F.lstm(state['c2'], h2_in) y = model.l3(F.dropout(h2, train=train)) state = {'c1': c1, 'h1': h1, 'c2': c2, 'h2': h2} return state, F.softmax(y)
def forward_one_step(x, state, train=True): drop_ratio = 0.5 h0 = model.embed(x) h1_in = model.l1_x(F.dropout(h0, ratio=drop_ratio, train=train)) + model.l1_h(state["h1"]) c1, h1 = F.lstm(state["c1"], h1_in) h2_in = model.l2_x(F.dropout(h1, ratio=drop_ratio, train=train)) + model.l2_h(state["h2"]) c2, h2 = F.lstm(state["c2"], h2_in) # ya = F.relu(model.l3a(F.dropout(h2,ratio=drop_ratio, train=train))) y = model.l3(F.dropout(h2, ratio=drop_ratio, train=train)) state = {"c1": c1, "h1": h1, "c2": c2, "h2": h2} return state, y
def __call__(self,feature, state, test=False, train=True, image=False): if image: h1_in = self.l1_x(feature) + self.l1_h(state['h1']) c1, h1 = F.lstm(state['c1'], h1_in) y = self.out(h1) state = {'c1': c1, 'h1': h1} else: h0 = self.embed(feature) h1_in = self.l1_x(h0) + self.l1_h(F.dropout(state['h1'], train=train)) c1, h1 = F.lstm(state['c1'], h1_in) y = self.out(h1) state = {'c1': c1, 'h1': h1} return state, y
def forward_one_step(x, state, train=True): drop_ratio = 0.5 h0 = model.embed(x) h1_in = model.l1_x(F.dropout(h0,ratio=drop_ratio, train=train)) + model.l1_h(state['h1']) c1, h1 = F.lstm(state['c1'], h1_in) h2_in = model.l2_x(F.dropout(h1,ratio=drop_ratio, train=train)) + model.l2_h(state['h2']) c2, h2 = F.lstm(state['c2'], h2_in) y = model.l3(F.dropout(h2,ratio=drop_ratio, train=train)) state = {'c1': c1, 'h1': h1, 'c2': c2, 'h2': h2} return state, y
def check_forward(self, x_data): xp = self.link.xp x = chainer.Variable(x_data) h1 = self.link(x) c0 = chainer.Variable(xp.zeros((len(self.x), self.out_size), dtype=self.x.dtype)) c1_expect, h1_expect = functions.lstm(c0, self.link.upward(x)) testing.assert_allclose(h1.data, h1_expect.data) testing.assert_allclose(self.link.h.data, h1_expect.data) testing.assert_allclose(self.link.c.data, c1_expect.data) h2 = self.link(x) c2_expect, h2_expect = functions.lstm(c1_expect, self.link.upward(x) + self.link.lateral(h1)) testing.assert_allclose(h2.data, h2_expect.data)
def forward_one_step(self, x_data, y_data, state, train=True, dropout_ratio=0.5): x = Variable(x_data.astype(np.int32), volatile=not train) t = Variable(y_data.astype(np.int32), volatile=not train) h0 = self.embed(x) h1_in = self.l1_x(F.dropout(h0, ratio=dropout_ratio, train=train)) + self.l1_h(state['h1']) c1, h1 = F.lstm(state['c1'], h1_in) h2_in = self.l2_x(F.dropout(h1, ratio=dropout_ratio, train=train)) + self.l2_h(state['h2']) c2, h2 = F.lstm(state['c2'], h2_in) y = self.l3(F.dropout(h2, ratio=dropout_ratio, train=train)) state = {'c1': c1, 'h1': h1, 'c2': c2, 'h2': h2} return state, F.softmax_cross_entropy(y, t)
def forward(self,x_data,y_data,state,train=True): x = Variable(x_data, volatile = not train) t = Variable(y_data) h1_in = self.l1_x(F.dropout(x,train=train)) + self.l1_h(state['h1']) c1,h1 = F.lstm(state['c1'],h1_in) h2_in = self.l2_x(F.dropout(h1,train=train)) + self.l2_h(state['h2']) c2,h2 = F.lstm(state['c2'],h2_in) y = self.l3(F.dropout(h2,train=train)) state = {'c1':c1, 'h1':h1, 'c2':c2, 'h2':h2 } Loss = F.softmax_cross_entropy(y,t) accuracy = F.accuracy(y,t) return state,Loss,accuracy,y.data,t.data
def forward_one_step(x, state, train=True): drop_ratio = 0.5 h0 = model.embed(x) h1_in = model.l1_x(F.dropout(h0,ratio=drop_ratio, train=train)) + model.l1_h(state['h1']) c1, h1 = F.lstm(state['c1'], h1_in) h2_in = model.l2_x(F.dropout(h1,ratio=drop_ratio, train=train)) + model.l2_h(state['h2']) c2, h2 = F.lstm(state['c2'], h2_in) # ya = F.relu(model.l3a(F.dropout(h2,ratio=drop_ratio, train=train))) y = model.l3(F.dropout(h2,ratio=drop_ratio, train=train)) state = {'c1': c1, 'h1': h1, 'c2': c2, 'h2': h2} return state, y
def forward(self, x_data, y_data, state, train=True): x = Variable(x_data, volatile=not train) t = Variable(y_data) h1_in = self.l1_x(F.dropout(x, train=train)) + self.l1_h(state['h1']) c1, h1 = F.lstm(state['c1'], h1_in) h2_in = self.l2_x(F.dropout(h1, train=train)) + self.l2_h(state['h2']) c2, h2 = F.lstm(state['c2'], h2_in) y = self.l3(F.dropout(h2, train=train)) state = {'c1': c1, 'h1': h1, 'c2': c2, 'h2': h2} Loss = F.softmax_cross_entropy(y, t) accuracy = F.accuracy(y, t) return state, Loss, accuracy, y.data, t.data
def forward_one_step(self, state, x_last, train=True): x = Variable(x_last, volatile=False) a = F.elu(self.conv1(x)) l1 = F.dropout(F.elu(self.l1_x(a) + self.l1_h(state['h1'])), train=train) c1, h1 = F.lstm(state['c1'], l1) l2 = F.dropout(F.elu(self.l2_h1(h1) + self.l2_h(state['h2'])), train=train) c2, h2 = F.lstm(state['c2'], l2) state = {'c1': c1, 'h1': h1, 'c2': c2, 'h2': h2, 'x_last': x} return state
def forward(self, x_data, state): """ Does encode/decode on x_data. :param x_data: input data (a single timestep) as a numpy.ndarray :param state: previous state of RNN :param nonlinear_q: nonlinearity used in q(z|x) (encoder) :param nonlinear_p: nonlinearity used in p(x|z) (decoder) :param output_f: #TODO# :return: output, recognition loss, KL Divergence, state """ # =====[ Step 1: Compute q(z|x) - encoding step, get z ]===== # Forward encoding for i in range(x_data.shape[0]): sum_ones_reshape = np.sum(x_data[i].reshape((1, x_data.shape[1]))) sum_ones_reg = np.sum(x_data[i]) # grab the i-th element of x x = Variable(x_data[i].reshape((1, x_data.shape[1]))) h_in = self.recog_x_h(x) + self.recog_h_h(state["h_rec"]) c_t, h_t = F.lstm(state["c_rec"], h_in) state.update({"c_rec": c_t, "h_rec": h_t}) # Compute q_mean and q_log_sigma q_mean = self.recog_mean(state["h_rec"]) q_log_sigma = 0.5 * self.recog_log_sigma(state["h_rec"]) # Compute KL divergence based on q_mean and q_log_sigma KLD = -0.0005 * F.sum(1 + q_log_sigma - q_mean ** 2 - F.exp(q_log_sigma)) # Compute as q_mean + noise*exp(q_log_sigma) eps = Variable(np.random.normal(0, 1, q_log_sigma.data.shape).astype(np.float32)) z = q_mean + F.exp(q_log_sigma) * eps # =====[ Step 2: Compute p(x|z) - decoding step ]===== # Initial step output = [] h_in = self.gen_z_h(z) c_t, h_t = F.lstm(state["c_gen"], h_in) state.update({"c_gen": c_t, "h_gen": h_t}) rec_loss = Variable(np.zeros((), dtype=np.float32)) for i in range(x_data.shape[0]): # Get output and loss x_t = self.output(h_t) output.append(x_t.data) # print("size of x_t output data sequence: " + str(x_t.data.shape)) rec_loss += self.loss_func(x_t, Variable(x_data[i].reshape((1, x_data.shape[1])))) # Get next hidden state h_in = self.gen_x_h(x_t) + self.gen_h_h(state["h_gen"]) c_t, h_t = F.lstm(state["c_gen"], h_in) state.update({"c_gen": c_t, "h_gen": h_t}) # =====[ Step 3: Compute KL-Divergence based on all terms ]===== return np.array(output), rec_loss, KLD, state
def __call__(self, feature, state, test=False, train=True, image=False): if image: h1_in = self.l1_x(feature) + self.l1_h(state['h1']) c1, h1 = F.lstm(state['c1'], h1_in) y = self.out(h1) state = {'c1': c1, 'h1': h1} else: h0 = self.embed(feature) h1_in = self.l1_x(h0) + self.l1_h( F.dropout(state['h1'], train=train)) c1, h1 = F.lstm(state['c1'], h1_in) y = self.out(h1) state = {'c1': c1, 'h1': h1} return state, y
def check_forward(self, x_data): xp = self.link.xp x = chainer.Variable(x_data) if self.input_variable else x_data c1, h1 = self.link(None, None, x) c0 = chainer.Variable( xp.zeros((len(self.x), self.out_size), dtype=self.x.dtype)) c1_expect, h1_expect = functions.lstm(c0, self.link.upward(x)) testing.assert_allclose(h1.data, h1_expect.data) testing.assert_allclose(c1.data, c1_expect.data) c2, h2 = self.link(c1, h1, x) c2_expect, h2_expect = \ functions.lstm(c1_expect, self.link.upward(x) + self.link.lateral(h1)) testing.assert_allclose(h2.data, h2_expect.data) testing.assert_allclose(c2.data, c2_expect.data)
def __call__(self, c, a, b, s1, r1, s2, r2, z): c, h = functions.lstm( c, self.w_az(a) + self.w_bz(b) + self.w_s1z(s1) + self.w_r1z(r1) + \ self.w_s2z(s2) + self.w_r2z(r2) + self.w_zz(z), ) return c, XP.dropout(h)
def lstm_without_dropout(n_layer, dropout, hx, cx, ws, bs, xs): xws = [_stack_weight([w[2], w[0], w[1], w[3]]) for w in ws] hws = [_stack_weight([w[6], w[4], w[5], w[7]]) for w in ws] xbs = [_stack_weight([b[2], b[0], b[1], b[3]]) for b in bs] hbs = [_stack_weight([b[6], b[4], b[5], b[7]]) for b in bs] xs = [xs[i] for i in range(3)] ys = [] for x in xs: cx_next = [] hx_next = [] for layer in range(n_layer): c = cx[layer] h = hx[layer] if layer != 0: # Only multiply ratio x = x * (1 / (1.0 - dropout)) lstm_in = functions.linear(x, xws[layer], xbs[layer]) + \ functions.linear(h, hws[layer], hbs[layer]) c_new, h_new = functions.lstm(c, lstm_in) cx_next.append(c_new) hx_next.append(h_new) x = h_new cx = cx_next hx = hx_next ys.append(x) cy = functions.stack(cx) hy = functions.stack(hx) return hy, cy, ys
def forward_one(x, target, hidden, prev_c, train_flag): # make input window vector distance = window // 2 char_vecs = list() x = list(x) for i in range(distance): x.append('</s>') x.insert(0,'<s>') for i in range(-distance+1 , distance + 2): char = x[target + i] char_id = char2id[char] char_vec = model.embed(get_onehot(char_id)) char_vecs.append(char_vec) concat = F.concat(tuple(char_vecs)) dropout_concat = F.dropout(concat, ratio=dropout_rate, train=train_flag) concat = F.concat((concat, hidden)) i_gate = F.sigmoid(model.i_gate(concat)) f_gate = F.sigmoid(model.f_gate(concat)) o_gate = F.sigmoid(model.o_gate(concat)) concat = F.concat((hidden, i_gate, f_gate, o_gate)) prev_c, hidden = F.lstm(prev_c, concat) output = model.output(hidden) dist = F.softmax(output) #print(dist.data, label, np.argmax(dist.data)) #correct = get_onehot(label) #print(output.data, correct.data) return dist
def check_forward(self, x_data): xp = self.link.xp x = chainer.Variable(x_data) if self.input_variable else x_data c1, h1 = self.link(None, None, x) c0 = chainer.Variable(xp.zeros((len(self.x), self.out_size), dtype=self.x.dtype)) c1_expect, h1_expect = functions.lstm(c0, self.link.upward(x)) testing.assert_allclose(h1.data, h1_expect.data) testing.assert_allclose(c1.data, c1_expect.data) c2, h2 = self.link(c1, h1, x) c2_expect, h2_expect = \ functions.lstm(c1_expect, self.link.upward(x) + self.link.lateral(h1)) testing.assert_allclose(h2.data, h2_expect.data) testing.assert_allclose(c2.data, c2_expect.data)
def predict(self, x_data, y_data, state): x ,t = Variable(x_data,volatile=False),Variable(y_data,volatile=False) h1_in = self.l1_x(x) + self.l1_h(state['h1']) c1, h1 = F.lstm(state['c1'], h1_in) y = self.l6(h1) state = {'c1': c1, 'h1': h1} return state,F.mean_squared_error(y,t)
def check_forward(self, x_data): xp = self.link.xp x = chainer.Variable(x_data) h1 = self.link(x) c0 = chainer.Variable( xp.zeros((len(self.x), self.out_size), dtype=self.x.dtype)) c1_expect, h1_expect = functions.lstm(c0, self.link.upward(x)) gradient_check.assert_allclose(h1.data, h1_expect.data) gradient_check.assert_allclose(self.link.h.data, h1_expect.data) gradient_check.assert_allclose(self.link.c.data, c1_expect.data) h2 = self.link(x) c2_expect, h2_expect = \ functions.lstm(c1_expect, self.link.upward(x) + self.link.lateral(h1)) gradient_check.assert_allclose(h2.data, h2_expect.data)
def forward_one_step(self, x_vis, x_dep, train_label, c, h, volatile=False): x1 = Variable(x_vis.reshape(1, 1, x_vis.shape[0], x_vis.shape[1]), volatile=volatile) h1 = F.max_pooling_2d(F.relu(self.bn11(self.conv11(x1))), 2, stride=2) h1 = F.max_pooling_2d(F.relu(self.bn12(self.conv12(h1))), 2, stride=2) h1 = F.max_pooling_2d(F.relu(self.conv13(h1)), 2, stride=2) h1 = self.fc14(h1) x2 = Variable(x_dep.reshape(1, 1, x_dep.shape[0], x_dep.shape[1]), volatile=volatile) h2 = F.max_pooling_2d(F.relu(self.bn21(self.conv21(x2))), 2, stride=2) h2 = F.max_pooling_2d(F.relu(self.bn22(self.conv22(h2))), 2, stride=2) h2 = F.max_pooling_2d(F.relu(self.conv23(h2)), 2, stride=2) h2 = self.fc24(h2) # 可視CNNとDepthCNNの出力を連結 lstm_input = F.concat((h1, h2), axis=1) t = Variable(train_label, volatile=volatile) h_in = self.i2h(F.dropout(lstm_input, train=not volatile)) + self.h2h(h) c, h = F.lstm(c, h_in) y = self.h2y(F.dropout(h, train=not volatile)) return F.softmax_cross_entropy(y, t), y, c, h
def forward_one_step(self, x_data, y_data, state, train=True,dropout_ratio=0.0): x ,t = Variable(x_data,volatile=not train),Variable(y_data,volatile=not train) h1_in = self.l1_x(F.dropout(x, ratio=dropout_ratio, train=train)) + self.l1_h(state['h1']) c1, h1 = F.lstm(state['c1'], h1_in) y = self.l6(F.dropout(h1, ratio=dropout_ratio, train=train)) state = {'c1': c1, 'h1': h1} return state, F.mean_squared_error(y, t)
def __call__(self, x, c_pre, h_pre, train=True): e = F.tanh(self.xe(x)) c_tmp, h_tmp = F.lstm(c_pre, self.eh(e) + self.hh(h_pre)) enable = chainer.Variable(chainer.Variable(x.data != -1).data.reshape(len(x), 1)) # calculate flg whether x is -1 or not c_next = F.where(enable, c_tmp, c_pre) # if x!=-1, c_tmp . elseif x=-1, c_pre. h_next = F.where(enable, h_tmp, h_pre) # if x!=-1, h_tmp . elseif x=-1, h_pre. return c_next, h_next
def _encode(self, x_list): batch_size = len(x_list[0]) pc = p = _zeros((batch_size, self.hidden_size)) for x in reversed(x_list): i = self.x_i(_mkivar(x)) pc, p = F.lstm(pc, self.i_p(i) + self.p_p(p)) return pc, p
def __call__(self, y, y_label, c_pre, h_pre, train=True): # input word embedding e = F.tanh(self.ye(y)) e_l = F.tanh(self.le(y_label)) # LSTM c_tmp, h_tmp = F.lstm( c_pre, F.dropout(self.eh(F.concat( (e, e_l))), ratio=0.2, train=train) + self.hh(h_pre)) enable = chainer.Variable( chainer.Variable(y.data != -1).data.reshape(len(y), 1)) c_next = F.where(enable, c_tmp, c_pre) h_next = F.where(enable, h_tmp, h_pre) # output using at at = F.sigmoid(self.vt(h_next)) #print(at.data) pg_pre = self.wg(h_next) pg = pg_pre * F.broadcast_to( (1 - at), shape=(pg_pre.data.shape[0], pg_pre.data.shape[1])) pe_pre = self.we(h_next) pe = pe_pre * F.broadcast_to( at, shape=(pe_pre.data.shape[0], pe_pre.data.shape[1])) # broadcast を使わない ver. # pg = chainer.Variable(self.wg(h_next).data * (1 - at).data) # pe = chainer.Variable(self.we(h_next).data * at.data) return F.concat((pg, pe)), at, c_next, h_next
def move(self, action, visual_image=None): action_units = [0, 0, 0, 0] action_units[action] = 1 if visual_image is None: data = np.array( [action_units + self.predicted_visual_image.tolist()], dtype='float32') else: data = np.array([action_units + visual_image.tolist()], dtype='float32') x = chainer.Variable(data, volatile=True) h_in = self.lstm.x_to_h(x) + self.lstm.h_to_h(self.state['h']) c, h = F.lstm(self.state['c'], h_in) self.state = {'c': c, 'h': h} y = self.lstm.h_to_y(h) sigmoid_y = 1 / (1 + np.exp(-y.data)) self.predicted_visual_image = \ np.round((np.sign(sigmoid_y - 0.5) + 1) / 2)[0] coordinate_id = self.svm.predict(h.data[0])[0] self.set_coordinate_id(coordinate_id) return self.virtual_coordinate
def forward_one_step(c, h, cur_word, next_word): i = Variable(np.array([cur_word], dtype=np.int32)) t = Variable(np.array([next_word], dtype=np.int32)) x = F.tanh(model.embed(i)) c, h = F.lstm(c, model.x_to_h(x) + model.h_to_h(h)) y = F.tanh(model.h_to_y(h)) return c, h, F.softmax_cross_entropy(y, t)