Beispiel #1
0
    def __forward(self, is_training, src_batch, trg_batch = None, generation_limit = None):
        m = self.__model
        tanh = functions.tanh
        lstm = functions.lstm
        batch_size = len(src_batch)
        src_len = len(src_batch[0])
        #src_stoi = self.__src_vocab.stoi
        trg_stoi = self.__trg_vocab.stoi
        trg_itos = self.__trg_vocab.itos
        s_c = wrapper.zeros((batch_size, self.__n_hidden))
        
        # encoding
        s_x = wrapper.make_var(src_batch)
        #s_x = wrapper.make_var([src_stoi('</s>') for _ in range(batch_size)], dtype=np.int32)
        #s_i = tanh(m.w_xi(s_x))
        #s_c, s_p = lstm(s_c, m.w_ip(s_i))

        #for l in reversed(range(src_len)):
        #    s_x = wrapper.make_var([src_stoi(src_batch[k][l]) for k in range(batch_size)], dtype=np.int32)
        #    s_i = tanh(m.w_xi(s_x))
        #    s_c, s_p = lstm(s_c, m.w_ip(s_i) + m.w_pp(s_p))

        s_c, s_q = lstm(s_c, m.w_x(s_x))
        hyp_batch = [[] for _ in range(batch_size)]
        
        # decoding
        if is_training:
            accum_loss = wrapper.zeros(())
            trg_len = len(trg_batch[0])
            
            for l in range(trg_len):
                s_j = tanh(m.w_qj(s_q))
                r_y = m.w_jy(s_j)
                s_t = wrapper.make_var([trg_stoi(trg_batch[k][l]) for k in range(batch_size)], dtype=np.int32)
                accum_loss += functions.softmax_cross_entropy(r_y, s_t)
                output = wrapper.get_data(r_y).argmax(1)

                for k in range(batch_size):
                    hyp_batch[k].append(trg_itos(output[k]))

                s_c, s_q = lstm(s_c, m.w_yq(s_t) + m.w_qq(s_q))

            return hyp_batch, accum_loss
        else:
            while len(hyp_batch[0]) < generation_limit:
                s_j = tanh(m.w_qj(s_q))
                r_y = m.w_jy(s_j)
                output = wrapper.get_data(r_y).argmax(1)

                for k in range(batch_size):
                    hyp_batch[k].append(trg_itos(output[k]))

                if all(hyp_batch[k][-1] == '</s>' for k in range(batch_size)): break

                s_y = wrapper.make_var(output, dtype=np.int32)
                s_c, s_q = lstm(s_c, m.w_yq(s_y) + m.w_qq(s_q))
            
            return hyp_batch
Beispiel #2
0
    def __forward(self, is_training, text):
        m = self.__model
        tanh = functions.tanh
        lstm = functions.lstm
        letters, labels = self.__make_input(is_training, text)
        n_letters = len(letters)

        accum_loss = wrapper.zeros(()) if is_training else None
        hidden_zeros = wrapper.zeros((1, self.__n_hidden))

        # embedding
        list_e = []
        for i in range(n_letters):
            s_x = wrapper.make_var([letters[i]], dtype=np.int32)
            list_e.append(tanh(m.w_xe(s_x)))

        # forward encoding
        s_a = hidden_zeros
        c = hidden_zeros
        list_a = []
        for i in range(n_letters):
            c, s_a = lstm(c, m.w_ea(list_e[i]) + m.w_aa(s_a))
            list_a.append(s_a)
        
        # backward encoding
        s_b = hidden_zeros
        c = hidden_zeros
        list_b = []
        for i in reversed(range(n_letters)):
            c, s_b = lstm(c, m.w_eb(list_e[i]) + m.w_bb(s_b))
            list_b.append(s_b)
        
        # segmentation
        scores = []
        for i in range(n_letters - 1):
            s_y = tanh(m.w_ay1(list_a[i]) + m.w_by1(list_b[i]) + m.w_ay2(list_a[i + 1]) + m.w_by2(list_b[i + 1]))
            scores.append(float(wrapper.get_data(s_y)))
            
            if is_training:
                s_t = wrapper.make_var([[labels[i]]])
                accum_loss += functions.mean_squared_error(s_y, s_t)

        return scores, accum_loss
Beispiel #3
0
    def __forward(self, is_training, text):
        m = self.__model
        tanh = functions.tanh
        letters, labels = self.__make_input(is_training, text)
        scores = []
        accum_loss = wrapper.zeros(()) if is_training else None
            
        for n in range(len(letters) - 2 * self.__n_context + 1):
            s_hu = wrapper.zeros((1, self.__n_hidden))
            
            for k in range(2 * self.__n_context):
                wid = k * len(self.__vocab) + letters[n + k]
                s_x = wrapper.make_var([wid], dtype=np.int32)
                s_hu += m.w_xh(s_x)
            
            s_hv = tanh(s_hu)
            s_y = tanh(m.w_hy(s_hv))
            scores.append(float(wrapper.get_data(s_y)))
            
            if is_training:
                s_t = wrapper.make_var([[labels[n]]])
                accum_loss += functions.mean_squared_error(s_y, s_t)

        return scores, accum_loss
    def __forward(self, is_training, src_batch, trg_batch = None, generation_limit = None):
        m = self.__model
        tanh = functions.tanh
        lstm = functions.lstm
        batch_size = len(src_batch)
        hidden_size = self.__n_hidden
        src_len = len(src_batch[0])
        trg_len = len(trg_batch[0]) - 1 if is_training else generation_limit
        src_stoi = self.__src_vocab.stoi
        trg_stoi = self.__trg_vocab.stoi
        trg_itos = self.__trg_vocab.itos

        hidden_zeros = wrapper.zeros((batch_size, hidden_size))
        sum_e_zeros = wrapper.zeros((batch_size, 1))

        # make embedding
        list_x = []
        for l in range(src_len):
            s_x = wrapper.make_var([src_stoi(src_batch[k][l]) for k in range(batch_size)], dtype=np.int32)
            list_x.append(s_x)

        # forward encoding
        c = hidden_zeros
        s_a = hidden_zeros
        list_a = []
        for l in range(src_len):
            s_x = list_x[l]
            s_i = tanh(m.w_xi(s_x))
            c, s_a = lstm(c, m.w_ia(s_i) + m.w_aa(s_a))
            list_a.append(s_a)
        
        # backward encoding
        c = hidden_zeros
        s_b = hidden_zeros
        list_b = []
        for l in reversed(range(src_len)):
            s_x = list_x[l]
            s_i = tanh(m.w_xi(s_x))
            c, s_b = lstm(c, m.w_ib(s_i) + m.w_bb(s_b))
            list_b.insert(0, s_b)

        # decoding
        c = hidden_zeros
        s_p = tanh(m.w_ap(list_a[-1]) + m.w_bp(list_b[0]))
        s_y = wrapper.make_var([trg_stoi('<s>') for k in range(batch_size)], dtype=np.int32)

        hyp_batch = [[] for _ in range(batch_size)]
        accum_loss = wrapper.zeros(()) if is_training else None
        
        #for n in range(src_len):
        #    six.print_(src_batch[0][n], end=' ')
        #six.print_()

        for l in range(trg_len):
            # calculate attention weights
            list_e = []
            sum_e = sum_e_zeros
            for n in range(src_len):
                s_w = tanh(m.w_aw(list_a[n]) + m.w_bw(list_b[n]) + m.w_pw(s_p))
                r_e = functions.exp(m.w_we(s_w))
                #list_e.append(functions.concat(r_e for _ in range(self.__n_hidden)))
                list_e.append(r_e)
                sum_e += r_e
            #sum_e = functions.concat(sum_e for _ in range(self.__n_hidden))

            # make attention vector
            s_c = hidden_zeros
            s_d = hidden_zeros
            for n in range(src_len):
                s_e = list_e[n] / sum_e
                #s_c += s_e * list_a[n]
                #s_d += s_e * list_b[n]
                s_c += functions.reshape(functions.batch_matmul(list_a[n], s_e), (batch_size, hidden_size))
                s_d += functions.reshape(functions.batch_matmul(list_b[n], s_e), (batch_size, hidden_size))

                #zxcv = wrapper.get_data(s_e)[0][0]
                #if zxcv > 0.9: asdf='#'
                #elif zxcv > 0.7: asdf='*'
                #elif zxcv > 0.3: asdf='+'
                #elif zxcv > 0.1: asdf='.'
                #else: asdf=' '
                #six.print_(asdf * len(src_batch[0][n]), end=' ')

            # generate next word
            c, s_p = lstm(c, m.w_yp(s_y) + m.w_pp(s_p) + m.w_cp(s_c) + m.w_dp(s_d))
            r_y = m.w_py(s_p)
            output = wrapper.get_data(r_y).argmax(1)
            for k in range(batch_size):
                hyp_batch[k].append(trg_itos(output[k]))

            #six.print_(hyp_batch[0][-1])
            
            if is_training:
                s_t = wrapper.make_var([trg_stoi(trg_batch[k][l + 1]) for k in range(batch_size)], dtype=np.int32)
                accum_loss += functions.softmax_cross_entropy(r_y, s_t)
                s_y = s_t
            else:
                if all(hyp_batch[k][-1] == '</s>' for k in range(batch_size)): break
                s_y = wrapper.make_var(output, dtype=np.int32)

        return hyp_batch, accum_loss