예제 #1
0
    def check_forward(self, x1_data, x2_data, x3_data):
        xp = self.link.xp
        x1 = chainer.Variable(x1_data) if self.input_variable else x1_data
        h1 = self.link(x1)
        with cuda.get_device_from_array(x1_data):
            c0 = chainer.Variable(xp.zeros((len(self.x1), self.out_size),
                                           dtype=self.x1.dtype))
            c1_expect, h1_expect = functions.lstm(c0, self.link.upward(x1))
        testing.assert_allclose(h1.data, h1_expect.data)
        testing.assert_allclose(self.link.h.data, h1_expect.data)
        testing.assert_allclose(self.link.c.data, c1_expect.data)

        batch = len(x2_data)
        x2 = chainer.Variable(x2_data) if self.input_variable else x2_data
        h1_in, h1_rest = functions.split_axis(
            self.link.h.data, [batch], axis=0)
        y2 = self.link(x2)
        with cuda.get_device_from_array(x1):
            c2_expect, y2_expect = \
                functions.lstm(c1_expect,
                               self.link.upward(x2) + self.link.lateral(h1_in))
        testing.assert_allclose(y2.data, y2_expect.data)
        testing.assert_allclose(self.link.h.data[:batch], y2_expect.data)
        testing.assert_allclose(self.link.h.data[batch:], h1_rest.data)

        x3 = chainer.Variable(x3_data) if self.input_variable else x3_data
        h2_rest = self.link.h
        y3 = self.link(x3)
        c3_expect, y3_expect = \
            functions.lstm(c2_expect, self.link.upward(x3))
        testing.assert_allclose(y3.data, y3_expect.data)
        testing.assert_allclose(self.link.h.data, h2_rest.data)
예제 #2
0
파일: model.py 프로젝트: odashi/chainer_nmt
  def _encode(self, x_list):
    batch_size = len(x_list[0])
    source_length = len(x_list)

    # Encoding
    fc = bc = f = b = _zeros((batch_size, self.hidden_size))
    i_list = [self.x_i(_mkivar(x)) for x in x_list]
    f_list = []
    b_list = []
    for i in i_list:
      fc, f = F.lstm(fc, self.i_f(i) + self.f_f(f))
      f_list.append(f)
    for i in reversed(i_list):
      bc, b = F.lstm(bc, self.i_b(i) + self.b_b(b))
      b_list.append(b)
    b_list.reverse()

    # Making concatenated matrix
    # {f,b}_mat: shape = [batch, srclen, hidden]
    f_mat = F.concat([F.expand_dims(f, 1) for f in f_list], 1)
    b_mat = F.concat([F.expand_dims(b, 1) for b in b_list], 1)
    # fb_mat: shape = [batch, srclen, 2 * hidden]
    fb_mat = F.concat([f_mat, b_mat], 2)
    # fbe_mat: shape = [batch * srclen, atten]
    fbe_mat = self.fb_e(
        F.reshape(fb_mat, [batch_size * source_length, 2 * self.hidden_size]))

    return fb_mat, fbe_mat, fc, bc, f_list[-1], b_list[0]
    def check_forward(self, x1_data, x2_data, x3_data):
        xp = self.link.xp
        x1 = chainer.Variable(x1_data) if self.input_variable else x1_data
        h1 = self.link(x1)
        with cuda.get_device_from_array(x1_data):
            c0 = chainer.Variable(
                xp.zeros((len(self.x1), self.out_size), dtype=self.x1.dtype))
            c1_expect, h1_expect = functions.lstm(c0, self.link.upward(x1))
        assert h1.data == pytest.approx(h1_expect.data)
        assert self.link.h.data == pytest.approx(h1_expect.data)
        assert self.link.c.data == pytest.approx(c1_expect.data)

        batch = len(x2_data)
        x2 = chainer.Variable(x2_data) if self.input_variable else x2_data
        h1_in, h1_rest = functions.split_axis(self.link.h.data, [batch],
                                              axis=0)
        y2 = self.link(x2)
        with cuda.get_device_from_array(x1):
            c2_expect, y2_expect = \
                functions.lstm(c1_expect,
                               self.link.upward(x2) + self.link.lateral(h1_in))
        assert y2.data == pytest.approx(y2_expect.data)
        assert self.link.h.data[:batch] == pytest.approx(y2_expect.data)
        assert self.link.h.data[batch:] == pytest.approx(h1_rest.data)

        x3 = chainer.Variable(x3_data) if self.input_variable else x3_data
        h2_rest = self.link.h
        y3 = self.link(x3)
        _, y3_expect = \
            functions.lstm(c2_expect, self.link.upward(x3))
        assert y3.data == pytest.approx(y3_expect.data)
        assert self.link.h.data == pytest.approx(h2_rest.data)
예제 #4
0
    def __call__(self, words):
        ### forward LSTM
        c = chainer.Variable(np.zeros((1, self.n_hid), dtype=np.float32))
        h = chainer.Variable(np.zeros((1, self.n_hid), dtype=np.float32))

        hs = []
        for word in words:
            e = self.embed(word)
            lstm_in = self.xh(e) + self.hh(h)
            c, h = F.lstm(c, lstm_in)
            hs.append(h)

        ### backward LSTM
        c = chainer.Variable(np.zeros((1, self.n_hid), dtype=np.float32))
        h = chainer.Variable(np.zeros((1, self.n_hid), dtype=np.float32))

        hs_b = []
        for word in reversed(words):
            e = self.embed(word)
            lstm_in = self.xh_b(e) + self.hh_b(h)
            c, h = F.lstm(c, lstm_in)
            hs_b.append(h)
        hs_b.reverse()

        ### MLP
        ys = []
        for h_i, hs_b_i in zip(hs, hs_b):
            o1 = self.ho(F.concat([h_i, hs_b_i]))
            y_i = self.o(F.sigmoid(o1))
            ys.append(y_i)
        return ys
예제 #5
0
    def forward(self, x_data, y_data, train=True):
        x = Variable(np.array(x_data, dtype=np.float32), volatile=not train)
        t = Variable(y_data, volatile=not train)
        state_c1 = Variable(LSTMcomponent.inputs["state_c1"],
                            volatile=not train)
        state_h1 = Variable(LSTMcomponent.inputs["state_h1"],
                            volatile=not train)
        state_c2 = Variable(LSTMcomponent.inputs["state_c2"],
                            volatile=not train)
        state_h2 = Variable(LSTMcomponent.inputs["state_h2"],
                            volatile=not train)

        #h0 = self.l0(x)
        h1_in = self.l1_x(F.dropout(x, train=train)) + self.l1_h(state_h1)
        c1, h1 = F.lstm(c2, h1_in)
        h2_in = self.l2_x(F.dropout(state_h1,
                                    train=train)) + self.l2_h(state_h2)
        c2, h2 = F.lstm(state_c1, h2_in)
        y = self.l3(F.dropout(h2, train=train))
        LSTMcomponent.inputs["state_c1"] = c1
        LSTMcomponent.inputs["state_h1"] = h1
        LSTMcomponent.inputs["state_c2"] = c2
        LSTMcomponent.inputs["state_h2"] = h2

        loss = F.softmax_cross_entropy(y, t)
        accuracy = F.accuracy(y, t)

        return loss, accuracy
예제 #6
0
    def forward_one_step(self,
                         x_data,
                         y_data,
                         state,
                         train=True,
                         dropout_ratio=0.5):

        x = Variable(x_data)

        t = Variable(y_data)

        h0 = self.embed(x)

        h1_in = self.l1_x(F.dropout(h0, ratio=dropout_ratio)) + self.l1_h(
            state['h1'])

        c1, h1 = F.lstm(state['c1'], h1_in)

        h2_in = self.l2_x(F.dropout(h1, ratio=dropout_ratio)) + self.l2_h(
            state['h2'])

        c2, h2 = F.lstm(state['c2'], h2_in)

        y = self.l3(F.dropout(h2, ratio=dropout_ratio))

        state = {'c1': c1, 'h1': h1, 'c2': c2, 'h2': h2}

        if train:

            return state, F.softmax_cross_entropy(y, t)

        else:

            return state, F.softmax(y)
예제 #7
0
    def check_forward(self, x1_data, x2_data, x3_data):
        xp = self.link.xp
        x1 = chainer.Variable(x1_data) if self.input_variable else x1_data
        h1 = self.link(x1)
        c0 = chainer.Variable(
            xp.zeros((len(self.x1), self.out_size), dtype=self.x1.dtype))
        c1_expect, h1_expect = functions.lstm(c0, self.link.upward(x1))
        testing.assert_allclose(h1.data, h1_expect.data)
        testing.assert_allclose(self.link.h.data, h1_expect.data)
        testing.assert_allclose(self.link.c.data, c1_expect.data)

        batch = len(x2_data)
        x2 = chainer.Variable(x2_data) if self.input_variable else x2_data
        h1_in, h1_rest = functions.split_axis(self.link.h.data, [batch],
                                              axis=0)
        y2 = self.link(x2)
        c2_expect, y2_expect = \
            functions.lstm(c1_expect,
                           self.link.upward(x2) + self.link.lateral(h1_in))
        testing.assert_allclose(y2.data, y2_expect.data)
        testing.assert_allclose(self.link.h.data[:batch], y2_expect.data)
        testing.assert_allclose(self.link.h.data[batch:], h1_rest.data)

        x3 = chainer.Variable(x3_data) if self.input_variable else x3_data
        h2_rest = self.link.h
        y3 = self.link(x3)
        c3_expect, y3_expect = \
            functions.lstm(c2_expect, self.link.upward(x3))
        testing.assert_allclose(y3.data, y3_expect.data)
        testing.assert_allclose(self.link.h.data, h2_rest.data)
예제 #8
0
파일: lstm.py 프로젝트: YinghanWang/seq2seq
    def forward_one(
            self,
            word: Variable,
            state: State,
            dropout: bool=False,
            train: bool=False
    ) -> Tuple[Variable, State]:
        y0 = self.embed(word)
        if dropout:
            h1_in = self.l1(F.dropout(y0, train=train)) + self.h1(state["h1"])
            c1, h1 = F.lstm(state["c1"], h1_in)
            h2_in = self.l2(F.dropout(h1, train=train)) + self.h2(state["h2"])
            c2, h2 = F.lstm(state["c2"], h2_in)
            h3 = self.l3(F.dropout(h2, train=train))
        else:
            h1_in = self.l1(y0) + self.h1(state["h1"])
            c1, h1 = F.lstm(state["c1"], h1_in)
            h2_in = self.l2(h1) + self.h2(state["h2"])
            c2, h2 = F.lstm(state["c2"], h2_in)
            h3 = self.l3(h2)

        new_state = {
            "h1": h1, "c1": c1,
            "h2": h2, "c2": c2,
        }
        return h3, new_state
예제 #9
0
    def generate_z_x(self, seq_length_per_z, sample_z):
        print("sample_z shape: " + str(sample_z.shape))
        # output = np.zeros((seq_length_per_z * sample_z.shape[0], self.recog_in_h.W.data.shape[1]))
        output = []

        state = self.make_initial_state()

        for epoch in xrange(sample_z.shape[0]):
            # gen_out = np.zeros((seq_length_per_z, output.shape[1]))
            z = Variable(sample_z[epoch].reshape((1, sample_z.shape[1])))
            print("epoch: " + str(epoch) + " z: " + str(z.data))

            # =====[ Step 2: Compute p(x|z) - decoding step ]=====
            # Initial step
            # output = []
            h_in = self.gen_z_h(z)
            c_t, h_t = F.lstm(state["c_gen"], h_in)
            state.update({"c_gen": c_t, "h_gen": h_t})
            rec_loss = Variable(np.zeros((), dtype=np.float32))

            for i in range(seq_length_per_z):
                # Get output and loss
                x_t = self.output(h_t)

                print("size of x-prime's output data sequence: " + str(x_t.data.shape))
                # reshape data from (1,88) to (88)
                output.append(x_t.data.reshape(x_t.data.shape[1:]))

                # Get next hidden state
                h_in = self.gen_x_h(x_t) + self.gen_h_h(state["h_gen"])
                c_t, h_t = F.lstm(state["c_gen"], h_in)
                state.update({"c_gen": c_t, "h_gen": h_t})

        return np.array(output)
예제 #10
0
 def decode(self, p, t=None):
     """
     @param p
     @param t ground truth
     """
     y = self.w_py(p) 
     if self.phase is Seq2Seq.Train:
         loss = F.mean_squared_error(y, t)
         self.cell_state, p = F.lstm(
             self.cell_state,
             self.w_yp(t) + self.w2_pp(self.previous_p)
         )
         self.previous_p = p
         return p, loss
     elif self.phase is Seq2Seq.Valid:
         loss = F.mean_squared_error(y, t)
         self.cell_state, p = F.lstm(
             self.cell_state,
             self.w_yp(y) + self.w2_pp(self.previous_p)
         )
         self.previous_p = p
         return p, loss
     else: # Test
         self.cell_state, p = F.lstm(
             self.cell_state,
             self.w_yp(y) + self.w2_pp(self.previous_p)
         )
         self.previous_p = p
         return p, y 
예제 #11
0
    def _encode(self, x_list):
        batch_size = len(x_list[0])
        source_length = len(x_list)

        # Encoding
        fc = bc = f = b = _zeros((batch_size, self.hidden_size))
        i_list = [self.x_i(_mkivar(x)) for x in x_list]
        f_list = []
        b_list = []
        for i in i_list:
            fc, f = F.lstm(fc, self.i_f(i) + self.f_f(f))
            f_list.append(f)
        for i in reversed(i_list):
            bc, b = F.lstm(bc, self.i_b(i) + self.b_b(b))
            b_list.append(b)
        b_list.reverse()

        # Making concatenated matrix
        # {f,b}_mat: shape = [batch, srclen, hidden]
        f_mat = F.concat([F.expand_dims(f, 1) for f in f_list], 1)
        b_mat = F.concat([F.expand_dims(b, 1) for b in b_list], 1)
        # fb_mat: shape = [batch, srclen, 2 * hidden]
        fb_mat = F.concat([f_mat, b_mat], 2)
        # fbe_mat: shape = [batch * srclen, atten]
        fbe_mat = self.fb_e(
            F.reshape(fb_mat,
                      [batch_size * source_length, 2 * self.hidden_size]))

        return fb_mat, fbe_mat, fc, bc, f_list[-1], b_list[0]
예제 #12
0
    def forward_one(self,
                    word: Variable,
                    state: State,
                    dropout: bool = False,
                    train: bool = False) -> Tuple[Variable, State]:
        y0 = self.embed(word)
        if dropout:
            h1_in = self.l1(F.dropout(y0, train=train)) + self.h1(state["h1"])
            c1, h1 = F.lstm(state["c1"], h1_in)
            h2_in = self.l2(F.dropout(h1, train=train)) + self.h2(state["h2"])
            c2, h2 = F.lstm(state["c2"], h2_in)
            h3 = self.l3(F.dropout(h2, train=train))
        else:
            h1_in = self.l1(y0) + self.h1(state["h1"])
            c1, h1 = F.lstm(state["c1"], h1_in)
            h2_in = self.l2(h1) + self.h2(state["h2"])
            c2, h2 = F.lstm(state["c2"], h2_in)
            h3 = self.l3(h2)

        new_state = {
            "h1": h1,
            "c1": c1,
            "h2": h2,
            "c2": c2,
        }
        return h3, new_state
예제 #13
0
파일: mynet.py 프로젝트: hkiyomaru/lm
    def __call__(self, x, y, state, train=True, target=True):
        if train:
            h = Variable(x.reshape(self.batchsize, 12), volatile=not train)
        else:
            h = Variable(x, volatile=not train)

        t = Variable(y.flatten(), volatile=not train)

        h0 = F.relu(self.l0(h))

        if target == False:
            data = h0.data
            self.data_first.append(data)

        h1_in = self.l1_x(h0) + self.l1_h(state['h1'])
        h1_in = F.dropout(F.tanh(h1_in), train=train)
        c1, h1 = F.lstm(state['c1'], h1_in)
        h2_in = F.dropout(F.tanh(self.l2_x(h1)), train=train) + self.l2_h(
            state['h2'])
        c2, h2 = F.lstm(state['c2'], h2_in)

        if target == False:
            data = h1.data
            self.data_hidden.append(data)

        y = self.l3(h2)

        if target == False:
            data = y.data
            self.data_output.append(data)
        state = {'c1': c1, 'h1': h1, 'c2': c2, 'h2': h2}
        self.loss = F.softmax_cross_entropy(y, t)

        return state, self.loss
예제 #14
0
파일: mynet.py 프로젝트: kiyomaro927/lm
    def __call__(self,x,y,state,train=True,target=True):
        if train:
            h = Variable(x.reshape(self.batchsize,12), volatile=not train)
        else:
            h = Variable(x, volatile=not train)
        
        t = Variable(y.flatten(), volatile=not train)
        
        h0 = F.relu(self.l0(h))
        
        if target == False:
            data = h0.data
            self.data_first.append(data)
        
        h1_in = self.l1_x(h0) + self.l1_h(state['h1'])
        h1_in = F.dropout(F.tanh(h1_in),train=train)
        c1, h1 = F.lstm(state['c1'], h1_in)
        h2_in = F.dropout(F.tanh(self.l2_x(h1)), train=train) + self.l2_h(state['h2'])
        c2, h2 = F.lstm(state['c2'], h2_in)

        if target == False:
            data = h1.data
            self.data_hidden.append(data)
        
        y = self.l3(h2)

        if target ==False:
            data = y.data
            self.data_output.append(data)
        state = {'c1': c1, 'h1': h1,'c2':c2,'h2':h2}
        self.loss = F.softmax_cross_entropy(y,t)

        return state,self.loss
    def forward_one_step(self, x_data, c_data, y_data, state, train=True):
        x = chainer.Variable(x_data, volatile=not train)
        t = chainer.Variable(y_data, volatile=not train)
        c = chainer.Variable(c_data, volatile=not train)

        h1_in = self.l1_first(x) + self.l1_recur(state['h1']) + self.l1_w(state['w'])
        c1, h1 = F.lstm(state['c1'], h1_in)

        # soft window
        ws = F.exp(self.lw(h1))
        w_mixws, w_gains, w_means = split_axis_by_widths(ws, 3)
        w_means += state['w_means']
        w = self.forward_window(w_mixws, w_gains, w_means, c)

        h2_in = self.l2_first(x) + self.l2_recur(state['h2']) + self.l1_w(w) + self.l2_input(h1)
        c2, h2 = F.lstm(state['c2'], h2_in)

        h3_in = self.l3_first(x) + self.l3_recur(state['h3']) + self.l1_w(w) + self.l3_input(h2)
        c3, h3 = F.lstm(state['c3'], h3_in)

        y = self.l4(F.concat(h1, h2, h3))

        state = {'c1': c1, 'h1': h1, 'c2': c2, 'h2': h2, 'c3': c3, 'h3': h3,
                 'w': w, 'w_means': w_means}
        return state, loss_func(self.noutput_gauss, y, t)
예제 #16
0
    def predict(self, x_data):
        x = Variable(x_data)
        h1_in = self.l1_x(x) + self.l1_h(state['h1'])
        c1, h1 = F.lstm(state['c1'], h1_in)
        h2_in = self.l2_x(h1) + self.l2_h(state['h2'])
        c2, h2 = F.lstm(state['c2'], h2_in)
        y = F.softmax(self.l3(h2))

        return y.data
def forward_one_step_embed(x_data, state):

    x = chainer.Variable(x_data, volatile=True)
    h0 = model.embed(x)
    h1_in = model.l1_x(F.dropout(h0, train=False)) + model.l1_h(state['h1'])
    c1, h1 = F.lstm(state['c1'], h1_in)
    h2_in = model.l2_x(F.dropout(h1, train=False)) + model.l2_h(state['h2'])
    c2, h2 = F.lstm(state['c2'], h2_in)
    return {'c1': c1, 'h1': h1, 'c2': c2, 'h2': h2}
예제 #18
0
    def predict(self, x_data, state):
        x = Variable(x_data.astype(np.int32), volatile=True)

        h0      = self.embed(x)
        h1_in   = self.l1_x(h0) + self.l1_h(state['h1'])
        c1, h1  = F.lstm(state['c1'], h1_in)
        h2_in   = self.l2_x(h1) + self.l2_h(state['h2'])
        c2, h2  = F.lstm(state['c2'], h2_in)
        y       = self.l3(h2)
        state   = {'c1': c1, 'h1': h1, 'c2': c2, 'h2': h2}
예제 #19
0
def forward_one_predict(x_data, state, train=False):
    # Neural net architecture
    x = chainer.Variable(x_data, volatile=not train)
    h0 = model.embed(x)
    h1_in = model.l1_x(F.dropout(h0, train=train)) + model.l1_h(state['h1'])
    c1, h1 = F.lstm(state['c1'], h1_in)
    h2_in = model.l2_x(F.dropout(h1, train=train)) + model.l2_h(state['h2'])
    c2, h2 = F.lstm(state['c2'], h2_in)
    y = model.l3(F.dropout(h2, train=train))
    state = {'c1': c1, 'h1': h1, 'c2': c2, 'h2': h2}
    return state, F.softmax(y).data
예제 #20
0
파일: lstm.py 프로젝트: mtjune/lstm_bot
 def _forward_one_step(self, x_data, state, train=True):
     # Neural net architecture
     x = chainer.Variable(x_data, volatile=not train)
     h0 = self.model.embed(x)
     h1_in = self.model.l1_x(F.dropout(h0, train=train)) + self.model.l1_h(state['h1'])
     c1, h1 = F.lstm(state['c1'], h1_in)
     h2_in = self.model.l2_x(F.dropout(h1, train=train)) + self.model.l2_h(state['h2'])
     c2, h2 = F.lstm(state['c2'], h2_in)
     y = self.model.l3(F.dropout(h2, train=train))
     state = {'c1': c1, 'h1': h1, 'c2': c2, 'h2': h2}
     return state, y
예제 #21
0
 def __call__(self, word):
     x_list = [XP.iarray([min(ord(x), 0x7f)]) for x in word]
     ac = self.__EMBED_ZEROS
     a = self.__EMBED_ZEROS
     for x in x_list:
         ac, a = functions.lstm(ac, self.w_xa(x) + self.w_aa(a))
     bc = self.__EMBED_ZEROS
     b = self.__EMBED_ZEROS
     for x in reversed(x_list):
         bc, b = functions.lstm(bc, self.w_xb(x) + self.w_bb(b))
     return a, b
예제 #22
0
 def __call__(self, word):
   x_list = [self.__char_vram[min(ord(x), 0x7f)] for x in word]
   ac = self.__EMBED_ZEROS
   a = self.__EMBED_ZEROS
   for x in x_list:
     ac, a = functions.lstm(ac, self.w_xa(x) + self.w_aa(a))
   bc = self.__EMBED_ZEROS
   b = self.__EMBED_ZEROS
   for x in reversed(x_list):
     bc, b = functions.lstm(bc, self.w_xb(x) + self.w_bb(b))
   return a, b
예제 #23
0
def forward_one_predict(x_data, state, train=False):
    # Neural net architecture
    x = chainer.Variable(x_data, volatile=not train)
    h0 = model.embed(x)
    h1_in = model.l1_x(F.dropout(h0, train=train)) + model.l1_h(state['h1'])
    c1, h1 = F.lstm(state['c1'], h1_in)
    h2_in = model.l2_x(F.dropout(h1, train=train)) + model.l2_h(state['h2'])
    c2, h2 = F.lstm(state['c2'], h2_in)
    y = model.l3(F.dropout(h2, train=train))
    state = {'c1': c1, 'h1': h1, 'c2': c2, 'h2': h2}
    return state, F.softmax(y).data
예제 #24
0
파일: CharRNN.py 프로젝트: k1nk/ch5
 def predict(self, x_data, state, dropout_ratio=0.5):
     x = Variable(x_data)
     h0 = self.embed(x)
     h1_in = self.l1_x(F.dropout(h0, ratio=dropout_ratio)) + self.l1_h(
         state['h1'])
     c1, h1 = F.lstm(state['c1'], h1_in)
     h2_in = self.l2_x(F.dropout(h1, ratio=dropout_ratio)) + self.l2_h(
         state['h2'])
     c2, h2 = F.lstm(state['c2'], h2_in)
     y = self.l3(F.dropout(h2, ratio=dropout_ratio))
     state = {'c1': c1, 'h1': h1, 'c2': c2, 'h2': h2}
     return state, F.softmax(y)
예제 #25
0
파일: seq2seq.py 프로젝트: lrlab/LSTM
def forward(model, source_sentence, target_sentence, training):

    # convert word to ID, add End of Sentence
    source = model.src_vocabulary.convert(source_sentence)
    if target_sentence:
        target = model.trg_vocabulary.convert(target_sentence)

    c = Variable(np.zeros((1, model.hidden_size), dtype=np.float32))
    p = Variable(np.zeros((1, model.hidden_size), dtype=np.float32))

    # encoder
    for word_id in source[::-1]:
        x = Variable(np.array(word_id, dtype=np.int32))
        e = model.w_xe(x)
        p1 = model.w_ep(e)
        p2 = model.w_pp(p)

        # ( W*x + W*h )
        lstm_input = p1 + p2
        # メモリセルと隠れ層の更新
        c, p = F.lstm(c, lstm_input)

    # decoder
    EOS = model.trg_vocabulary.word_to_id("<EOS>")
    q = p
    y = Variable(np.array([EOS], dtype=np.int32))

    if training:
        loss = Variable(np.zeros((), dtype=np.float32))
        for word_id in target:
            e = model.w_ey(y)
            lstm_input = model.w_qq(q) + model.w_qe(e)
            c, q = F.lstm(c, lstm_input)
            y = model.w_yq(q)
            t = Variable(np.array(word_id, dtype=np.int32))
            loss += F.softmax_cross_entropy(y, t)
            y = t
        return loss

    else:
        sentence = []
        while len(sentence) < 100:
            e = model.w_ey(y)
            lstm_input = model.w_qq(q) + model.w_qe(e)
            c, q = F.lstm(c, lstm_input)
            y = model.w_yq(q)
            word_id = np.argmax(y.data, axis=1)
            y = Variable(np.array(word_id, dtype=np.int32))
            if word_id[0] == EOS:
                sentence.append(model.trg_vocabulary.id_to_word(word_id[0]))
                break
            sentence.append(model.trg_vocabulary.id_to_word(word_id[0]))
        return sentence
예제 #26
0
def forward_one_step(x_data, y_data, state, train=True):
    # Neural net architecture
    x = chainer.Variable(x_data, volatile=not train)
    t = chainer.Variable(y_data, volatile=not train)
    h0 = model.embed(x)
    h1_in = model.l1_x(F.dropout(h0, train=train)) + model.l1_h(state['h1'])
    c1, h1 = F.lstm(state['c1'], h1_in)
    h2_in = model.l2_x(F.dropout(h1, train=train)) + model.l2_h(state['h2'])
    c2, h2 = F.lstm(state['c2'], h2_in)
    y = model.l3(F.dropout(h2, train=train))
    state = {'c1': c1, 'h1': h1, 'c2': c2, 'h2': h2}
    return state, F.softmax_cross_entropy(y, t)
예제 #27
0
파일: lm.py 프로젝트: philip30/chainn
def forward_one_step_lstm(model, state, cur_word, next_word, train=True):
    x      = Variable(cur_word, volatile=not train)
    t      = Variable(next_word, volatile=not train)
    h0     = model.embed(x)
    h1_in  = model.l1_x(F.tanh(h0)) + model.l1_h(state["h1"])
    c1, h1 = F.lstm(state["c1"], h1_in)
    h2_in  = model.l2_x(F.tanh(h1)) + model.l2_h(state["h2"])
    c2, h2 = F.lstm(state["c2"], h2_in)
    y      = model.l3(F.tanh(h2))
    state  = {"c1": c1, "h1": h1, "c2": c2, "h2":h2}
    loss   = F.softmax_cross_entropy(y, t)
    return state, loss
예제 #28
0
    def predict(self, x_data, state):
        x = Variable(x_data, volatile=True)

        h0 = self.embed(x)
        h1_in = self.l1_x(h0) + self.l1_h(state['h1'])
        c1, h1 = F.lstm(state['c1'], h1_in)
        h2_in = self.l2_x(h1) + self.l2_h(state['h2'])
        c2, h2 = F.lstm(state['c2'], h2_in)
        y = self.l3(h2)
        state = {'c1': c1, 'h1': h1, 'c2': c2, 'h2': h2}

        return state, F.softmax(y)
예제 #29
0
def forward_one_step(x_data, y_data, state, train=True):
    # Neural net architecture
    x = chainer.Variable(x_data, volatile=not train)
    t = chainer.Variable(y_data, volatile=not train)
    h0 = model.embed(x)
    h1_in = model.l1_x(F.dropout(h0, train=train)) + model.l1_h(state['h1'])
    c1, h1 = F.lstm(state['c1'], h1_in)
    h2_in = model.l2_x(F.dropout(h1, train=train)) + model.l2_h(state['h2'])
    c2, h2 = F.lstm(state['c2'], h2_in)
    y = model.l3(F.dropout(h2, train=train))
    state = {'c1': c1, 'h1': h1, 'c2': c2, 'h2': h2}
    return state, F.softmax_cross_entropy(y, t)
예제 #30
0
파일: lstm.py 프로젝트: mtjuney/lstm_bot
 def _forward_one_step(self, x_data, state, train=True):
     # Neural net architecture
     x = chainer.Variable(x_data, volatile=not train)
     h0 = self.model.embed(x)
     h1_in = self.model.l1_x(F.dropout(h0, train=train)) + self.model.l1_h(
         state['h1'])
     c1, h1 = F.lstm(state['c1'], h1_in)
     h2_in = self.model.l2_x(F.dropout(h1, train=train)) + self.model.l2_h(
         state['h2'])
     c2, h2 = F.lstm(state['c2'], h2_in)
     y = self.model.l3(F.dropout(h2, train=train))
     state = {'c1': c1, 'h1': h1, 'c2': c2, 'h2': h2}
     return state, y
예제 #31
0
 def __call__(self, word):
   x_list = [XP.iarray([min(ord(x), 0x7f)]) for x in word]
   ac = self.__EMBED_ZEROS
   a = self.__EMBED_ZEROS
   for x in x_list:
     ac, a = functions.lstm(ac, self.w_xa(x) + self.w_aa(a))
     a = XP.dropout(a)
   bc = self.__EMBED_ZEROS
   b = self.__EMBED_ZEROS
   for x in reversed(x_list):
     bc, b = functions.lstm(bc, self.w_xb(x) + self.w_bb(b))
     b = XP.dropout(b)
   return a, b
예제 #32
0
def forward_one_step(x_data, state, train=True):
    if args.gpu >= 0:
        x_data = cuda.to_gpu(x_data)
    x = chainer.Variable(x_data, volatile=not train)
    h0 = model.embed(x)
    h1_in = model.l1_x(F.dropout(h0, train=train)) + model.l1_h(state['h1'])
    c1, h1 = F.lstm(state['c1'], h1_in)
    h2_in = model.l2_x(F.dropout(h1, train=train)) + model.l2_h(state['h2'])
    c2, h2 = F.lstm(state['c2'], h2_in)
    y = model.l3(F.dropout(h2, train=train))
    state = {'c1': c1, 'h1': h1, 'c2': c2, 'h2': h2}

    return state, F.softmax(y)
예제 #33
0
def forward_one_step(x, state, train=True):
    drop_ratio = 0.5
    h0 = model.embed(x)
    h1_in = model.l1_x(F.dropout(h0, ratio=drop_ratio, train=train)) + model.l1_h(state["h1"])
    c1, h1 = F.lstm(state["c1"], h1_in)

    h2_in = model.l2_x(F.dropout(h1, ratio=drop_ratio, train=train)) + model.l2_h(state["h2"])
    c2, h2 = F.lstm(state["c2"], h2_in)

    #    ya = F.relu(model.l3a(F.dropout(h2,ratio=drop_ratio, train=train)))
    y = model.l3(F.dropout(h2, ratio=drop_ratio, train=train))
    state = {"c1": c1, "h1": h1, "c2": c2, "h2": h2}
    return state, y
예제 #34
0
 def __call__(self,feature, state, test=False, train=True, image=False):
     if image:
         h1_in = self.l1_x(feature) + self.l1_h(state['h1'])
         c1, h1 = F.lstm(state['c1'], h1_in)
         y = self.out(h1)
         state = {'c1': c1, 'h1': h1}            
     else:
         h0 = self.embed(feature)
         h1_in = self.l1_x(h0) + self.l1_h(F.dropout(state['h1'], train=train))
         c1, h1 = F.lstm(state['c1'], h1_in)
         y = self.out(h1)
         state = {'c1': c1, 'h1': h1}
     return state, y
예제 #35
0
def forward_one_step(x, state, train=True):
    drop_ratio = 0.5
    h0 = model.embed(x)
    h1_in = model.l1_x(F.dropout(h0,ratio=drop_ratio, train=train)) + model.l1_h(state['h1'])
    c1, h1 = F.lstm(state['c1'], h1_in)

    h2_in = model.l2_x(F.dropout(h1,ratio=drop_ratio, train=train)) + model.l2_h(state['h2'])
    c2, h2 = F.lstm(state['c2'], h2_in)


    y = model.l3(F.dropout(h2,ratio=drop_ratio, train=train))
    state = {'c1': c1, 'h1': h1, 'c2': c2, 'h2': h2}
    return state, y
예제 #36
0
파일: test_lstm.py 프로젝트: RE-ID/chainer
    def check_forward(self, x_data):
        xp = self.link.xp
        x = chainer.Variable(x_data)
        h1 = self.link(x)
        c0 = chainer.Variable(xp.zeros((len(self.x), self.out_size), dtype=self.x.dtype))
        c1_expect, h1_expect = functions.lstm(c0, self.link.upward(x))
        testing.assert_allclose(h1.data, h1_expect.data)
        testing.assert_allclose(self.link.h.data, h1_expect.data)
        testing.assert_allclose(self.link.c.data, c1_expect.data)

        h2 = self.link(x)
        c2_expect, h2_expect = functions.lstm(c1_expect, self.link.upward(x) + self.link.lateral(h1))
        testing.assert_allclose(h2.data, h2_expect.data)
예제 #37
0
파일: CharRNN.py 프로젝트: k-utsubo/stock
    def forward_one_step(self, x_data, y_data, state, train=True, dropout_ratio=0.5):
        x = Variable(x_data.astype(np.int32), volatile=not train)
        t = Variable(y_data.astype(np.int32), volatile=not train)

        h0      = self.embed(x)
        h1_in   = self.l1_x(F.dropout(h0, ratio=dropout_ratio, train=train)) + self.l1_h(state['h1'])
        c1, h1  = F.lstm(state['c1'], h1_in)
        h2_in   = self.l2_x(F.dropout(h1, ratio=dropout_ratio, train=train)) + self.l2_h(state['h2'])
        c2, h2  = F.lstm(state['c2'], h2_in)
        y       = self.l3(F.dropout(h2, ratio=dropout_ratio, train=train))
        state   = {'c1': c1, 'h1': h1, 'c2': c2, 'h2': h2}

        return state, F.softmax_cross_entropy(y, t)
예제 #38
0
 def forward(self,x_data,y_data,state,train=True):
     x = Variable(x_data, volatile = not train)
     t = Variable(y_data)
     h1_in = self.l1_x(F.dropout(x,train=train)) + self.l1_h(state['h1'])
     c1,h1 = F.lstm(state['c1'],h1_in)
     h2_in = self.l2_x(F.dropout(h1,train=train)) + self.l2_h(state['h2'])
     c2,h2 = F.lstm(state['c2'],h2_in)
     y = self.l3(F.dropout(h2,train=train))
     state = {'c1':c1, 'h1':h1, 'c2':c2, 'h2':h2 }
     Loss = F.softmax_cross_entropy(y,t)
     accuracy = F.accuracy(y,t)
     
     return state,Loss,accuracy,y.data,t.data
예제 #39
0
def forward_one_step(x, state, train=True):
    drop_ratio = 0.5
    h0 = model.embed(x)
    h1_in = model.l1_x(F.dropout(h0,ratio=drop_ratio, train=train)) + model.l1_h(state['h1'])
    c1, h1 = F.lstm(state['c1'], h1_in)

    h2_in = model.l2_x(F.dropout(h1,ratio=drop_ratio, train=train)) + model.l2_h(state['h2'])
    c2, h2 = F.lstm(state['c2'], h2_in)

#    ya = F.relu(model.l3a(F.dropout(h2,ratio=drop_ratio, train=train)))
    y =  model.l3(F.dropout(h2,ratio=drop_ratio, train=train))
    state = {'c1': c1, 'h1': h1, 'c2': c2, 'h2': h2}
    return state, y
예제 #40
0
    def forward(self, x_data, y_data, state, train=True):
        x = Variable(x_data, volatile=not train)
        t = Variable(y_data)
        h1_in = self.l1_x(F.dropout(x, train=train)) + self.l1_h(state['h1'])
        c1, h1 = F.lstm(state['c1'], h1_in)
        h2_in = self.l2_x(F.dropout(h1, train=train)) + self.l2_h(state['h2'])
        c2, h2 = F.lstm(state['c2'], h2_in)
        y = self.l3(F.dropout(h2, train=train))
        state = {'c1': c1, 'h1': h1, 'c2': c2, 'h2': h2}
        Loss = F.softmax_cross_entropy(y, t)
        accuracy = F.accuracy(y, t)

        return state, Loss, accuracy, y.data, t.data
예제 #41
0
    def forward_one_step(self, state, x_last, train=True):

        x = Variable(x_last, volatile=False)
        a = F.elu(self.conv1(x))

        l1 = F.dropout(F.elu(self.l1_x(a) + self.l1_h(state['h1'])),
                       train=train)
        c1, h1 = F.lstm(state['c1'], l1)
        l2 = F.dropout(F.elu(self.l2_h1(h1) + self.l2_h(state['h2'])),
                       train=train)
        c2, h2 = F.lstm(state['c2'], l2)

        state = {'c1': c1, 'h1': h1, 'c2': c2, 'h2': h2, 'x_last': x}
        return state
예제 #42
0
    def forward(self, x_data, state):
        """
        Does encode/decode on x_data.
        :param x_data: input data (a single timestep) as a numpy.ndarray
        :param state: previous state of RNN
        :param nonlinear_q: nonlinearity used in q(z|x) (encoder)
        :param nonlinear_p: nonlinearity used in p(x|z) (decoder)
        :param output_f: #TODO#
        :return: output, recognition loss, KL Divergence, state
        """
        # =====[ Step 1: Compute q(z|x) - encoding step, get z ]=====
        # Forward encoding
        for i in range(x_data.shape[0]):
            sum_ones_reshape = np.sum(x_data[i].reshape((1, x_data.shape[1])))
            sum_ones_reg = np.sum(x_data[i])
            # grab the i-th element of x
            x = Variable(x_data[i].reshape((1, x_data.shape[1])))
            h_in = self.recog_x_h(x) + self.recog_h_h(state["h_rec"])
            c_t, h_t = F.lstm(state["c_rec"], h_in)
            state.update({"c_rec": c_t, "h_rec": h_t})
        # Compute q_mean and q_log_sigma
        q_mean = self.recog_mean(state["h_rec"])
        q_log_sigma = 0.5 * self.recog_log_sigma(state["h_rec"])
        # Compute KL divergence based on q_mean and q_log_sigma
        KLD = -0.0005 * F.sum(1 + q_log_sigma - q_mean ** 2 - F.exp(q_log_sigma))
        # Compute as q_mean + noise*exp(q_log_sigma)
        eps = Variable(np.random.normal(0, 1, q_log_sigma.data.shape).astype(np.float32))
        z = q_mean + F.exp(q_log_sigma) * eps

        # =====[ Step 2: Compute p(x|z) - decoding step ]=====
        # Initial step
        output = []
        h_in = self.gen_z_h(z)
        c_t, h_t = F.lstm(state["c_gen"], h_in)
        state.update({"c_gen": c_t, "h_gen": h_t})
        rec_loss = Variable(np.zeros((), dtype=np.float32))
        for i in range(x_data.shape[0]):
            # Get output and loss
            x_t = self.output(h_t)
            output.append(x_t.data)
            # print("size of x_t output data sequence: " + str(x_t.data.shape))

            rec_loss += self.loss_func(x_t, Variable(x_data[i].reshape((1, x_data.shape[1]))))
            # Get next hidden state
            h_in = self.gen_x_h(x_t) + self.gen_h_h(state["h_gen"])
            c_t, h_t = F.lstm(state["c_gen"], h_in)
            state.update({"c_gen": c_t, "h_gen": h_t})

        # =====[ Step 3: Compute KL-Divergence based on all terms ]=====
        return np.array(output), rec_loss, KLD, state
예제 #43
0
 def __call__(self, feature, state, test=False, train=True, image=False):
     if image:
         h1_in = self.l1_x(feature) + self.l1_h(state['h1'])
         c1, h1 = F.lstm(state['c1'], h1_in)
         y = self.out(h1)
         state = {'c1': c1, 'h1': h1}
     else:
         h0 = self.embed(feature)
         h1_in = self.l1_x(h0) + self.l1_h(
             F.dropout(state['h1'], train=train))
         c1, h1 = F.lstm(state['c1'], h1_in)
         y = self.out(h1)
         state = {'c1': c1, 'h1': h1}
     return state, y
예제 #44
0
    def check_forward(self, x_data):
        xp = self.link.xp
        x = chainer.Variable(x_data) if self.input_variable else x_data
        c1, h1 = self.link(None, None, x)
        c0 = chainer.Variable(
            xp.zeros((len(self.x), self.out_size), dtype=self.x.dtype))
        c1_expect, h1_expect = functions.lstm(c0, self.link.upward(x))
        testing.assert_allclose(h1.data, h1_expect.data)
        testing.assert_allclose(c1.data, c1_expect.data)

        c2, h2 = self.link(c1, h1, x)
        c2_expect, h2_expect = \
            functions.lstm(c1_expect,
                           self.link.upward(x) + self.link.lateral(h1))
        testing.assert_allclose(h2.data, h2_expect.data)
        testing.assert_allclose(c2.data, c2_expect.data)
예제 #45
0
 def __call__(self, c, a, b, s1, r1, s2, r2, z):
   c, h = functions.lstm(
     c,
     self.w_az(a) + self.w_bz(b) + self.w_s1z(s1) + self.w_r1z(r1) + \
     self.w_s2z(s2) + self.w_r2z(r2) + self.w_zz(z),
   )
   return c, XP.dropout(h)
예제 #46
0
def lstm_without_dropout(n_layer, dropout, hx, cx, ws, bs, xs):
    xws = [_stack_weight([w[2], w[0], w[1], w[3]]) for w in ws]
    hws = [_stack_weight([w[6], w[4], w[5], w[7]]) for w in ws]
    xbs = [_stack_weight([b[2], b[0], b[1], b[3]]) for b in bs]
    hbs = [_stack_weight([b[6], b[4], b[5], b[7]]) for b in bs]
    xs = [xs[i] for i in range(3)]
    ys = []
    for x in xs:
        cx_next = []
        hx_next = []
        for layer in range(n_layer):
            c = cx[layer]
            h = hx[layer]

            if layer != 0:
                # Only multiply ratio
                x = x * (1 / (1.0 - dropout))
            lstm_in = functions.linear(x, xws[layer], xbs[layer]) + \
                functions.linear(h, hws[layer], hbs[layer])
            c_new, h_new = functions.lstm(c, lstm_in)
            cx_next.append(c_new)
            hx_next.append(h_new)
            x = h_new
        cx = cx_next
        hx = hx_next
        ys.append(x)
    cy = functions.stack(cx)
    hy = functions.stack(hx)
    return hy, cy, ys
예제 #47
0
def forward_one(x, target, hidden, prev_c, train_flag):
    # make input window vector
    distance = window // 2
    char_vecs = list()
    x = list(x)
    for i in range(distance):
        x.append('</s>')
        x.insert(0,'<s>')
    for i in range(-distance+1 , distance + 2):
        char = x[target + i]
        char_id = char2id[char]
        char_vec = model.embed(get_onehot(char_id))
        char_vecs.append(char_vec)
    concat = F.concat(tuple(char_vecs))
    dropout_concat = F.dropout(concat, ratio=dropout_rate, train=train_flag)
    concat = F.concat((concat, hidden))
    i_gate = F.sigmoid(model.i_gate(concat))
    f_gate = F.sigmoid(model.f_gate(concat))
    o_gate = F.sigmoid(model.o_gate(concat))
    concat = F.concat((hidden, i_gate, f_gate, o_gate))
    prev_c, hidden = F.lstm(prev_c, concat)
    output = model.output(hidden)
    dist = F.softmax(output)
    #print(dist.data, label, np.argmax(dist.data))
    #correct = get_onehot(label)
    #print(output.data, correct.data)
    return dist
예제 #48
0
    def check_forward(self, x_data):
        xp = self.link.xp
        x = chainer.Variable(x_data) if self.input_variable else x_data
        c1, h1 = self.link(None, None, x)
        c0 = chainer.Variable(xp.zeros((len(self.x), self.out_size),
                                       dtype=self.x.dtype))
        c1_expect, h1_expect = functions.lstm(c0, self.link.upward(x))
        testing.assert_allclose(h1.data, h1_expect.data)
        testing.assert_allclose(c1.data, c1_expect.data)

        c2, h2 = self.link(c1, h1, x)
        c2_expect, h2_expect = \
            functions.lstm(c1_expect,
                           self.link.upward(x) + self.link.lateral(h1))
        testing.assert_allclose(h2.data, h2_expect.data)
        testing.assert_allclose(c2.data, c2_expect.data)
예제 #49
0
 def predict(self, x_data, y_data, state):
     x ,t = Variable(x_data,volatile=False),Variable(y_data,volatile=False)
     h1_in   = self.l1_x(x) + self.l1_h(state['h1'])
     c1, h1  = F.lstm(state['c1'], h1_in)
     y       = self.l6(h1)
     state   = {'c1': c1, 'h1': h1}
     return state,F.mean_squared_error(y,t)
예제 #50
0
    def check_forward(self, x_data):
        xp = self.link.xp
        x = chainer.Variable(x_data)
        h1 = self.link(x)
        c0 = chainer.Variable(
            xp.zeros((len(self.x), self.out_size), dtype=self.x.dtype))
        c1_expect, h1_expect = functions.lstm(c0, self.link.upward(x))
        gradient_check.assert_allclose(h1.data, h1_expect.data)
        gradient_check.assert_allclose(self.link.h.data, h1_expect.data)
        gradient_check.assert_allclose(self.link.c.data, c1_expect.data)

        h2 = self.link(x)
        c2_expect, h2_expect = \
            functions.lstm(c1_expect,
                           self.link.upward(x) + self.link.lateral(h1))
        gradient_check.assert_allclose(h2.data, h2_expect.data)
예제 #51
0
    def forward_one_step(self,
                         x_vis,
                         x_dep,
                         train_label,
                         c,
                         h,
                         volatile=False):
        x1 = Variable(x_vis.reshape(1, 1, x_vis.shape[0], x_vis.shape[1]),
                      volatile=volatile)
        h1 = F.max_pooling_2d(F.relu(self.bn11(self.conv11(x1))), 2, stride=2)
        h1 = F.max_pooling_2d(F.relu(self.bn12(self.conv12(h1))), 2, stride=2)
        h1 = F.max_pooling_2d(F.relu(self.conv13(h1)), 2, stride=2)
        h1 = self.fc14(h1)

        x2 = Variable(x_dep.reshape(1, 1, x_dep.shape[0], x_dep.shape[1]),
                      volatile=volatile)
        h2 = F.max_pooling_2d(F.relu(self.bn21(self.conv21(x2))), 2, stride=2)
        h2 = F.max_pooling_2d(F.relu(self.bn22(self.conv22(h2))), 2, stride=2)
        h2 = F.max_pooling_2d(F.relu(self.conv23(h2)), 2, stride=2)
        h2 = self.fc24(h2)

        # 可視CNNとDepthCNNの出力を連結
        lstm_input = F.concat((h1, h2), axis=1)
        t = Variable(train_label, volatile=volatile)

        h_in = self.i2h(F.dropout(lstm_input,
                                  train=not volatile)) + self.h2h(h)
        c, h = F.lstm(c, h_in)

        y = self.h2y(F.dropout(h, train=not volatile))
        return F.softmax_cross_entropy(y, t), y, c, h
예제 #52
0
 def forward_one_step(self, x_data, y_data, state, train=True,dropout_ratio=0.0):
     x ,t = Variable(x_data,volatile=not train),Variable(y_data,volatile=not train)
     h1_in   = self.l1_x(F.dropout(x, ratio=dropout_ratio, train=train)) + self.l1_h(state['h1'])
     c1, h1  = F.lstm(state['c1'], h1_in)
     y       = self.l6(F.dropout(h1, ratio=dropout_ratio, train=train))
     state   = {'c1': c1, 'h1': h1}
     return state, F.mean_squared_error(y, t)
예제 #53
0
def lstm_without_dropout(n_layer, dropout, hx, cx, ws, bs, xs):
    xws = [_stack_weight([w[2], w[0], w[1], w[3]]) for w in ws]
    hws = [_stack_weight([w[6], w[4], w[5], w[7]]) for w in ws]
    xbs = [_stack_weight([b[2], b[0], b[1], b[3]]) for b in bs]
    hbs = [_stack_weight([b[6], b[4], b[5], b[7]]) for b in bs]
    xs = [xs[i] for i in range(3)]
    ys = []
    for x in xs:
        cx_next = []
        hx_next = []
        for layer in range(n_layer):
            c = cx[layer]
            h = hx[layer]

            if layer != 0:
                # Only multiply ratio
                x = x * (1 / (1.0 - dropout))
            lstm_in = functions.linear(x, xws[layer], xbs[layer]) + \
                functions.linear(h, hws[layer], hbs[layer])
            c_new, h_new = functions.lstm(c, lstm_in)
            cx_next.append(c_new)
            hx_next.append(h_new)
            x = h_new
        cx = cx_next
        hx = hx_next
        ys.append(x)
    cy = functions.stack(cx)
    hy = functions.stack(hx)
    return hy, cy, ys
예제 #54
0
 def __call__(self, x, c_pre, h_pre, train=True):
     e = F.tanh(self.xe(x))
     c_tmp, h_tmp = F.lstm(c_pre, self.eh(e) + self.hh(h_pre))
     enable = chainer.Variable(chainer.Variable(x.data != -1).data.reshape(len(x), 1))    # calculate flg whether x is -1 or not
     c_next = F.where(enable, c_tmp, c_pre)                                   # if x!=-1, c_tmp . elseif x=-1, c_pre.
     h_next = F.where(enable, h_tmp, h_pre)                                   # if x!=-1, h_tmp . elseif x=-1, h_pre.
     return c_next, h_next
예제 #55
0
 def __call__(self, c, a, b, s1, r1, s2, r2, z):
     c, h = functions.lstm(
       c,
       self.w_az(a) + self.w_bz(b) + self.w_s1z(s1) + self.w_r1z(r1) + \
       self.w_s2z(s2) + self.w_r2z(r2) + self.w_zz(z),
     )
     return c, XP.dropout(h)
예제 #56
0
 def _encode(self, x_list):
     batch_size = len(x_list[0])
     pc = p = _zeros((batch_size, self.hidden_size))
     for x in reversed(x_list):
         i = self.x_i(_mkivar(x))
         pc, p = F.lstm(pc, self.i_p(i) + self.p_p(p))
     return pc, p
예제 #57
0
    def __call__(self, y, y_label, c_pre, h_pre, train=True):
        # input word embedding
        e = F.tanh(self.ye(y))
        e_l = F.tanh(self.le(y_label))

        # LSTM
        c_tmp, h_tmp = F.lstm(
            c_pre,
            F.dropout(self.eh(F.concat(
                (e, e_l))), ratio=0.2, train=train) + self.hh(h_pre))
        enable = chainer.Variable(
            chainer.Variable(y.data != -1).data.reshape(len(y), 1))
        c_next = F.where(enable, c_tmp, c_pre)
        h_next = F.where(enable, h_tmp, h_pre)

        # output using at
        at = F.sigmoid(self.vt(h_next))
        #print(at.data)
        pg_pre = self.wg(h_next)
        pg = pg_pre * F.broadcast_to(
            (1 - at), shape=(pg_pre.data.shape[0], pg_pre.data.shape[1]))
        pe_pre = self.we(h_next)
        pe = pe_pre * F.broadcast_to(
            at, shape=(pe_pre.data.shape[0], pe_pre.data.shape[1]))

        # broadcast を使わない ver.
        # pg = chainer.Variable(self.wg(h_next).data * (1 - at).data)
        # pe = chainer.Variable(self.we(h_next).data * at.data)
        return F.concat((pg, pe)), at, c_next, h_next
    def move(self, action, visual_image=None):
        action_units = [0, 0, 0, 0]
        action_units[action] = 1

        if visual_image is None:
            data = np.array(
                [action_units + self.predicted_visual_image.tolist()],
                dtype='float32')
        else:
            data = np.array([action_units + visual_image.tolist()],
                            dtype='float32')
        x = chainer.Variable(data, volatile=True)
        h_in = self.lstm.x_to_h(x) + self.lstm.h_to_h(self.state['h'])
        c, h = F.lstm(self.state['c'], h_in)
        self.state = {'c': c, 'h': h}

        y = self.lstm.h_to_y(h)
        sigmoid_y = 1 / (1 + np.exp(-y.data))
        self.predicted_visual_image = \
            np.round((np.sign(sigmoid_y - 0.5) + 1) / 2)[0]

        coordinate_id = self.svm.predict(h.data[0])[0]
        self.set_coordinate_id(coordinate_id)

        return self.virtual_coordinate
예제 #59
0
def forward_one_step(c, h, cur_word, next_word):
    i = Variable(np.array([cur_word], dtype=np.int32))
    t = Variable(np.array([next_word], dtype=np.int32))
    x = F.tanh(model.embed(i))
    c, h = F.lstm(c, model.x_to_h(x) + model.h_to_h(h))
    y = F.tanh(model.h_to_y(h))
    return c, h, F.softmax_cross_entropy(y, t)