Ejemplo n.º 1
0
    def calc_loss(self, sys_ys, ref_ys, dists):
        loss = wrapper.make_var([[0.0]])
        sys_Tscore = wrapper.make_var([[0.0]])
        ref_Tscore = wrapper.make_var([[0.0]])

        sys_Tscore, sys_vecs = self.calc_trans_score(
            sys_ys)  #chainer.Variable, 1hotvecがconcateされたもの
        sys_matrix = wrapper.make_var([sys_vecs])

        ref_Tscore, ref_vecs = self.calc_trans_score(
            ref_ys)  #chainer.Variable, 1hotvec
        ref_matrix = wrapper.make_var([ref_vecs])

        dists_matrix = functions.concat(tuple(dists))

        #異なるラベル数のカウント
        diff_cnt = wrapper.make_var([[0.0]])
        for sys_y, ref_y in zip(sys_ys, ref_ys):
            if sys_y != ref_y:
                diff_cnt += wrapper.make_var([[1.0]])

        #max 0
        loss = functions.matmul(sys_matrix, dists_matrix, transb=True) + sys_Tscore\
               - functions.matmul(ref_matrix, dists_matrix, transb=True) - ref_Tscore\
               + self.__eta * diff_cnt
        """
        debug
        print("sys_score trans : ", wrapper.get_data(functions.matmul(sys_matrix, dists_matrix, transb=True)), wrapper.get_data(sys_Tscore))
        print("ref_score trans : ", wrapper.get_data(functions.matmul(ref_matrix, dists_matrix, transb=True)), wrapper.get_data(ref_Tscore))
        print("diff_cnt penal : ",wrapper.get_data(diff_cnt), wrapper.get_data(self.__eta * diff_cnt))
        """

        return loss
Ejemplo n.º 2
0
    def calc_loss(self, sys_ys, ref_ys, dists):
        loss = wrapper.make_var([[0.0]])
        sys_Tscore = wrapper.make_var([[0.0]])
        ref_Tscore = wrapper.make_var([[0.0]])
        
        sys_Tscore, sys_vecs = self.calc_trans_score(sys_ys) #chainer.Variable, 1hotvecがconcateされたもの
        sys_matrix = wrapper.make_var([sys_vecs])

        ref_Tscore, ref_vecs = self.calc_trans_score(ref_ys) #chainer.Variable, 1hotvec
        ref_matrix = wrapper.make_var([ref_vecs])

        dists_matrix = functions.concat(tuple(dists))

        #異なるラベル数のカウント
        diff_cnt = wrapper.make_var([[0.0]])
        for sys_y, ref_y in zip(sys_ys, ref_ys):
            if sys_y != ref_y:
                diff_cnt += wrapper.make_var([[1.0]])

        #max 0
        loss = functions.matmul(sys_matrix, dists_matrix, transb=True) + sys_Tscore\
               - functions.matmul(ref_matrix, dists_matrix, transb=True) - ref_Tscore\
               + self.__eta * diff_cnt

        """
        debug
        print("sys_score trans : ", wrapper.get_data(functions.matmul(sys_matrix, dists_matrix, transb=True)), wrapper.get_data(sys_Tscore))
        print("ref_score trans : ", wrapper.get_data(functions.matmul(ref_matrix, dists_matrix, transb=True)), wrapper.get_data(ref_Tscore))
        print("diff_cnt penal : ",wrapper.get_data(diff_cnt), wrapper.get_data(self.__eta * diff_cnt))
        """

        return loss
    def __forward(self, is_training, text):
        m = self.__model
        tanh = functions.tanh
        lstm = functions.lstm
        letters, labels = self.__make_input(is_training, text)
        n_letters = len(letters)

        accum_loss = wrapper.zeros(()) if is_training else None
        hidden_zeros = wrapper.zeros((1, self.__n_hidden))

        # embedding
        list_e = []
        for i in range(n_letters):
            s_x = wrapper.make_var([letters[i]], dtype=np.int32)
            list_e.append(tanh(m.w_xe(s_x)))

        # forward encoding
        s_a = hidden_zeros
        c = hidden_zeros
        list_a = []
        for i in range(n_letters):
            c, s_a = lstm(c, m.w_ea(list_e[i]) + m.w_aa(s_a))
            list_a.append(s_a)
        
        # backward encoding
        s_b = hidden_zeros
        c = hidden_zeros
        list_b = []
        for i in reversed(range(n_letters)):
            c, s_b = lstm(c, m.w_eb(list_e[i]) + m.w_bb(s_b))
            list_b.append(s_b)
        
        # segmentation
        scores = []
        for i in range(n_letters - 1):
            s_y = tanh(m.w_ay1(list_a[i]) + m.w_by1(list_b[i]) + m.w_ay2(list_a[i + 1]) + m.w_by2(list_b[i + 1]))
            scores.append(float(wrapper.get_data(s_y)))
            
            if is_training:
                s_t = wrapper.make_var([[labels[i]]])
                accum_loss += functions.mean_squared_error(s_y, s_t)

        return scores, accum_loss
    def __forward(self, is_training, text):
        m = self.__model
        tanh = functions.tanh
        letters, labels = self.__make_input(is_training, text)
        scores = []
        accum_loss = wrapper.zeros(()) if is_training else None
            
        for n in range(len(letters) - 2 * self.__n_context + 1):
            s_hu = wrapper.zeros((1, self.__n_hidden))
            
            for k in range(2 * self.__n_context):
                wid = k * len(self.__vocab) + letters[n + k]
                s_x = wrapper.make_var([wid], dtype=np.int32)
                s_hu += m.w_xh(s_x)
            
            s_hv = tanh(s_hu)
            s_y = tanh(m.w_hy(s_hv))
            scores.append(float(wrapper.get_data(s_y)))
            
            if is_training:
                s_t = wrapper.make_var([[labels[n]]])
                accum_loss += functions.mean_squared_error(s_y, s_t)

        return scores, accum_loss
Ejemplo n.º 5
0
    def __forward(self, is_training, text):
        m = self.__model
        tanh = functions.tanh
        letters, labels = self.__make_input(is_training, text)
        scores = []
        accum_loss = wrapper.zeros(()) if is_training else None

        for n in range(len(letters) - 2 * self.__n_context + 1):
            s_hu = wrapper.zeros((1, self.__n_hidden))

            for k in range(2 * self.__n_context):
                wid = k * len(self.__vocab) + letters[n + k]
                s_x = wrapper.make_var([wid], dtype=np.int32)
                s_hu += m.w_xh(s_x)

            s_hv = tanh(s_hu)
            s_y = tanh(m.w_hy(s_hv))
            scores.append(float(wrapper.get_data(s_y)))

            if is_training:
                s_t = wrapper.make_var([[labels[n]]])
                accum_loss += functions.mean_squared_error(s_y, s_t)

        return scores, accum_loss
Ejemplo n.º 6
0
    def calc_trans_score(self, labels):
        trans_score = wrapper.make_var([[0.0]])
        labels_vec = list()
        pre_label = None

        for label in labels:
            if pre_label != None:
                """
                debug
                print("pre_label : ",pre_label)
                print("label : ",label)
                """
                trans_score += wrapper.get_data(self.get_trans_prob(pre_label))[0][label] #trans_score(pre_label → 0,1)をsoftmaxをかけてスコアリング
            #1hot CONCATE vector
            for i in range(self.__n_labels):
                if i == label:
                    labels_vec.append(1)
                else:
                    labels_vec.append(0)
            pre_label = label

        return trans_score, labels_vec
Ejemplo n.º 7
0
    def calc_trans_score(self, labels):
        trans_score = wrapper.make_var([[0.0]])
        labels_vec = list()
        pre_label = None

        for label in labels:
            if pre_label != None:
                """
                debug
                print("pre_label : ",pre_label)
                print("label : ",label)
                """
                trans_score += wrapper.get_data(
                    self.get_trans_prob(pre_label))[0][
                        label]  #trans_score(pre_label → 0,1)をsoftmaxをかけてスコアリング
            #1hot CONCATE vector
            for i in range(self.__n_labels):
                if i == label:
                    labels_vec.append(1)
                else:
                    labels_vec.append(0)
            pre_label = label

        return trans_score, labels_vec
Ejemplo n.º 8
0
    def __forward(self, is_training, text):
        m = self.__model
        tanh = functions.tanh
        softmax = functions.softmax
        letters, labels = self.__make_input(is_training, text) #文字列(スペースなし), 単語境界ラベル(-1,+1)
        dists = []
        accum_loss = wrapper.make_var([[0.0]]) if is_training else None
        """
        debug
        print("length, letters : ",len(letters), letters)
        print("len(self.__vocab) : ",len(self.__vocab))
        """
            
        for n in range(len(letters) - 2 * self.__n_context + 1):
            """
            debug
            print("n : ",n)
            """
            s_hu = wrapper.zeros((1, self.__n_hidden))
            
            for k in range(2 * self.__n_context):
                wid = k * len(self.__vocab) + letters[n + k]
                s_x = wrapper.make_var([wid], dtype=np.int32)
                s_hu += m.w_xh(s_x)
                """
                debug
                print("k : ",k)
                print("wid : ",wid)
                """  

            
            s_hv = tanh(s_hu)
            s_y = softmax(m.w_hy(s_hv))

            """
            debug
            print("s_y : ",s_y.data) 
            """

            dists.append(s_y)
            
        
        """
        debug
        for i,dist in enumerate(dists):
            print("dist_", i,": ", wrapper.get_data(dist))
        self.check_trans()
        """

        
        sys_ys, end_score = self.viterbi(dists)

        """
        debug
        print("end_score : ", end_score)
        """
        
        if is_training:
            """
            debug
            print("labels : ", labels)
            print("sys_ys : ", sys_ys)
            """

            loss = self.calc_loss(sys_ys, labels, dists)

            """
            debug
            print("loss : ", wrapper.get_data(loss))
            """

            if float(wrapper.get_data(loss)) > 0:
                accum_loss += loss

        return sys_ys, accum_loss
Ejemplo n.º 9
0
    def __forward(self, is_training, text):
        m = self.__model
        tanh = functions.tanh
        softmax = functions.softmax
        letters, labels = self.__make_input(is_training,
                                            text)  #文字列(スペースなし), 単語境界ラベル(-1,+1)
        dists = []
        accum_loss = wrapper.make_var([[0.0]]) if is_training else None
        """
        debug
        print("length, letters : ",len(letters), letters)
        print("len(self.__vocab) : ",len(self.__vocab))
        """

        for n in range(len(letters) - 2 * self.__n_context + 1):
            """
            debug
            print("n : ",n)
            """
            s_hu = wrapper.zeros((1, self.__n_hidden))

            for k in range(2 * self.__n_context):
                wid = k * len(self.__vocab) + letters[n + k]
                s_x = wrapper.make_var([wid], dtype=np.int32)
                s_hu += m.w_xh(s_x)
                """
                debug
                print("k : ",k)
                print("wid : ",wid)
                """

            s_hv = tanh(s_hu)
            s_y = softmax(m.w_hy(s_hv))
            """
            debug
            print("s_y : ",s_y.data) 
            """

            dists.append(s_y)
        """
        debug
        for i,dist in enumerate(dists):
            print("dist_", i,": ", wrapper.get_data(dist))
        self.check_trans()
        """

        sys_ys, end_score = self.viterbi(dists)
        """
        debug
        print("end_score : ", end_score)
        """

        if is_training:
            """
            debug
            print("labels : ", labels)
            print("sys_ys : ", sys_ys)
            """

            loss = self.calc_loss(sys_ys, labels, dists)
            """
            debug
            print("loss : ", wrapper.get_data(loss))
            """

            if float(wrapper.get_data(loss)) > 0:
                accum_loss += loss

        return sys_ys, accum_loss