Ejemplo n.º 1
0
class SRLNetwork(GradientOptimizable):


    def __init__(self, problem_character = None,
                 nn_architecture = None, trans_mat_prior = None):
        # x shape: [mini-batch size, feature-dim].
        # In this problem [mini-batch feature-dim]

        if ( problem_character is None or nn_architecture is None):

            raise Exception("both problem and architecture must be provided")

        word_num = problem_character['word_num']
        POS_type_num = problem_character['POS_type_num']

        dist_to_verb_num = problem_character['dist_to_verb_num']
        dist_to_word_num = problem_character['dist_to_word_num']

        # 1,word vector
        #   output shape: (batch size,sentence_len, word_feature_num)
        self.word_embedding_layer = LookupTableLayer(
            table_size = word_num,
            feature_num = nn_architecture.word_feature_dim
        )

        # 3,word POS tag vector
        #   output shape: (batch size,sentence_len, POS_feature_num)
        self.pos_embedding_layer = LookupTableLayer(
            table_size = POS_type_num,
            feature_num = nn_architecture.pos_feature_dim,
        )

#            self.loc_embedding_layer = LookupTableLayer(
#                table_size = loc_type_num,
#                feature_num = nn_architecture.dist_feature_dim,
#            )


        # 5,distance tag vector
        #   output shape: (batch size,sentence_len, POS_feature_num)
        self.locdiff_word_embedding_layer = LookupTableLayer(
            table_size = dist_to_word_num,
            feature_num = nn_architecture.dist_feature_dim,
        )

        self.locdiff_verb_embedding_layer = LookupTableLayer(
            table_size = dist_to_verb_num,
            feature_num = nn_architecture.dist_feature_dim,
        )

        conv_input_dim = nn_architecture.word_feature_dim * 3 + \
            nn_architecture.pos_feature_dim * 3 + \
            nn_architecture.dist_feature_dim * 4



        conv_shape = (nn_architecture.conv_output_dim,
                           1,
                           nn_architecture.conv_window_height,
                           conv_input_dim)
        self.conv_layer = Conv1DMaxPoolLayer(
            activator_type="sigmoid",
            tensor_shape = conv_shape)


        self.embedding_conv_layers = [self.word_embedding_layer,
            self.pos_embedding_layer,
            self.locdiff_word_embedding_layer,
            self.locdiff_verb_embedding_layer,
            self.conv_layer]

        input_dim = nn_architecture.conv_output_dim
        self.perception_layers = []
        for idx, output_dim in enumerate(nn_architecture.hidden_layer_output_dims):

            hidden_layer = PerceptionLayer(
                input_dim = input_dim,
                output_dim = output_dim,
                activator_type = "sigmoid")

            self.perception_layers.append(hidden_layer)
            input_dim = output_dim

        out_layer = PerceptionLayer(
                input_dim = input_dim,
                output_dim = problem_character["SRL_type_num"],
                activator_type = "softmax")
        self.perception_layers.append(out_layer)

        self.cost = create_cost({"type": "cross_entropy"})

            # self.output_layer = PathTransitionLayer('output',
            #                             class_num=SRL_type_num,
            #                             trans_mat_prior= trans_mat_prior)
#            self.output_layer = SoftMaxLayer(n_in= nn_architecture.hidden_layer_output_dims[-1],
#                    n_out = SRL_type_num,)

        X = theano.tensor.matrix("X")

        self.__output_func = theano.function([X],
                                              outputs = self.__output(X))
        self.__predict_expr = theano.tensor.argmax(self.__output(X), axis = 1)
        self.__predict_func = theano.function([X],
                                              outputs = self.__predict_expr)

    def __output(self, X):

        # X.sentence_word_id = [] #当前句子的全局word id 列表
        # X.sentence_pos_id = [] #当前句子的全局词性 id 列表
        # 
        # #每个<word, verb> pair 一条记录
        # X.cur_word_id = []  # 当前word 的词id
        # X.cur_verb_id = []  # 当前verb 的词id
        # X.cur_word_pos_id = []  # 当前word的词性 id
        # X.cur_verb_pos_id = []  # 当前verb的词性 id
        # X.cur_word_loc_id = []  # 当前word的位置 id   # NOT IN USE
        # X.cur_verb_loc_id = []  # 当前verb的位置 id   # NOT IN USE
        # X.cur_word2verb_dist_id = []  # 当前word 到 当前verb的位置距离 id
        # X.cur_verb2word_dist_id = []  # 当前verb 到 当前word的位置距离 id
        # X.other_word2verb_dist_id = []  # 其他word 到当前verb的位置距离 id  # NOT IN USE
        # X.other_word2word_dist_id = []  # 其他word 到当前word的位置距离 id  # NOT IN USE

        start_idx = 0
        sentence_len = X[0, start_idx].astype('int32')
        start_idx += 1
        sentence_word_id = X[0, start_idx:start_idx+sentence_len].astype('int32')
        start_idx += sentence_len
        sentence_pos_id = X[0, start_idx:start_idx+sentence_len].astype('int32')
        start_idx += sentence_len
        cur_word_id = X[:, start_idx].astype('int32')
        start_idx += 1
        cur_verb_id = X[:, start_idx].astype('int32')
        start_idx += 1
        cur_word_pos_id = X[:, start_idx].astype('int32')
        start_idx += 1
        cur_verb_pos_id = X[:, start_idx].astype('int32')
        start_idx += 1
        cur_word_loc_id = X[:, start_idx].astype('int32')
        start_idx += 1
        cur_verb_loc_id = X[:, start_idx].astype('int32')
        start_idx += 1
        cur_word2verb_dist_id = X[:, start_idx].astype('int32')
        start_idx += 1
        cur_verb2word_dist_id = X[:,start_idx].astype('int32')
        start_idx += 1
        other_word2verb_dist_id = X[:, start_idx:start_idx+sentence_len].astype('int32')
        start_idx += sentence_len
        other_word2word_dist_id = X[:, start_idx:start_idx+sentence_len].astype('int32')
        start_idx += sentence_len
        
        wordvec = self.word_embedding_layer.output(
            inputs = cur_word_id #word_id_input
        )

        verbvec = self.word_embedding_layer.output(
            inputs = cur_verb_id #verb_id_input
        )

        wordPOSvec = self.pos_embedding_layer.output(
            inputs = cur_word_pos_id #word_pos_input
        )

        verbPOSvec = self.pos_embedding_layer.output(
            inputs = cur_verb_pos_id #verb_pos_input
        )

#        wordlocvec = self.loc_embedding_layer.output(
#            inputs = word_loc_input,
#        )

#        verblocvec = self.loc_embedding_layer.output(
#            inputs = verb_loc_input,
#        )

        locdiff_word2verb_vec = self.locdiff_verb_embedding_layer.output(
            inputs = cur_word2verb_dist_id
        )

        locdiff_verb2word_vec = self.locdiff_word_embedding_layer.output(
            inputs = cur_verb2word_dist_id
        )

        sentence_word_vec = self.word_embedding_layer.output(
            inputs = sentence_word_id,
        )

        sentence_pos_vec = self.pos_embedding_layer.output(
            inputs = sentence_pos_id,
        )

        other_loc2word_vec = self.locdiff_word_embedding_layer.output(
           inputs = other_word2word_dist_id
        )

        other_loc2verb_vec = self.locdiff_verb_embedding_layer.output(
           inputs = other_word2verb_dist_id
        )

        batch_size = sentence_len

        conv_input_feature = T.concatenate(

            (
                wordvec.dimshuffle(0,"x", "x",1).repeat(sentence_len, axis=2),
                verbvec.dimshuffle(0,"x", "x",1).repeat(sentence_len, axis=2),
                wordPOSvec.dimshuffle(0,"x", "x",1).repeat(sentence_len, axis=2),
                verbPOSvec.dimshuffle(0,"x", "x",1).repeat(sentence_len, axis=2),
                locdiff_word2verb_vec.dimshuffle(0,"x", "x",1).repeat(sentence_len, axis=2),
                locdiff_verb2word_vec.dimshuffle(0,"x", "x",1).repeat(sentence_len, axis=2),
                sentence_word_vec.dimshuffle("x", "x", 0, 1).repeat(batch_size, axis=0),
                sentence_pos_vec.dimshuffle("x", "x", 0, 1).repeat(batch_size, axis=0),
                other_loc2word_vec.dimshuffle(0, "x", 1, 2),
                other_loc2verb_vec.dimshuffle(0, "x", 1, 2),
            ),
            axis=3
        )

        conv_out = self.conv_layer.output(conv_input_feature).reshape((batch_size, -1))


        layer_input = conv_out
        for layer in self.perception_layers:
            layer_input = layer.output(layer_input)

        return layer_input


    def predict(self, X):
        return self.__predict_func(X)

    def predict_prob(self,X):
        return self.__output_func(X)

    def object_gradient(self, X, y):

        object_expr = self.cost.cost(self.__output(X), y)

        params = self.params()

        grad = T.grad(object_expr, params)

        gradient_vec = []
        for param in grad:
            gradient_vec.append(param.flatten())

        gradient_expr = theano.tensor.concatenate(gradient_vec)

        return [object_expr, gradient_expr]

    def get_parameter(self):

        all_layes = self.embedding_conv_layers + self.perception_layers
        param_vec = [layer.get_parameter() for layer in all_layes]

        return numpy.concatenate(param_vec)

    def set_parameter(self, param_vec):

        all_layes = self.embedding_conv_layers + self.perception_layers
        parameter_size_vec = [layer.get_parameter_size() for layer in all_layes]

        start_idx = [0] + list(numpy.cumsum(parameter_size_vec))

        for idx, layer in enumerate(all_layes):

            layer.set_parameter(param_vec[start_idx[idx]:start_idx[idx] + parameter_size_vec[idx]])


    def params(self):

        all_layes = self.embedding_conv_layers + self.perception_layers

        return list(itertools.chain.from_iterable([layer.params() for layer in all_layes]))



    def __getstate__(self):

        state = dict()
        state['name'] = "srl-machine"
        state['word_embedding_layer'] = self.word_embedding_layer.__getstate__()
        state['word_conv_layer'] = self.word_conv_layer.__getstate__()
        state['pos_embedding_layer'] = self.pos_embedding_layer.__getstate__()
        state['pos_conv_layer'] = self.pos_conv_layer.__getstate__()
        state['loc_embedding_layer'] = self.loc_embedding_layer.__getstate__()
        state['locdiff_word_embedding_layer'] = self.locdiff_word_embedding_layer.__getstate__()
        state['locdiff_word_conv_layer'] = self.locdiff_word_conv_layer.__getstate__()
        state['locdiff_verb_embedding_layer'] = self.locdiff_verb_embedding_layer.__getstate__()
        state['locdiff_verb_conv_layer'] = self.locdiff_verb_conv_layer.__getstate__()

        for idx, hidden_layer in enumerate(self.perception_layers):
            state['hidden_layer_' + str(idx)] = hidden_layer.__getstate__()

        state['output_layer'] = self.output_layer.__getstate__()

        return state

    def __setstate__(self, state):

        assert state['name'] == "srl-machine"

        self.word_embedding_layer = LookupTableLayer()
        self.word_embedding_layer.__setstate__(state["word_embedding_layer"])

        self.pos_embedding_layer = LookupTableLayer()
        self.pos_embedding_layer.__setstate__(state["pos_embedding_layer"])

        self.loc_embedding_layer = LookupTableLayer()
        self.loc_embedding_layer.__setstate__(state["loc_embedding_layer"])

        self.locdiff_word_embedding_layer = LookupTableLayer()
        self.locdiff_word_embedding_layer.__setstate__(state["locdiff_word_embedding_layer"])

        self.locdiff_verb_embedding_layer = LookupTableLayer()
        self.locdiff_verb_embedding_layer.__setstate__(state["locdiff_verb_embedding_layer"])

        self.word_conv_layer = Conv1DMaxPoolLayer()
        self.word_conv_layer.__setstate__(state["word_conv_layer"])

        self.pos_conv_layer = Conv1DMaxPoolLayer()
        self.pos_conv_layer.__setstate__(state["pos_conv_layer"])

        self.locdiff_word_conv_layer = Conv1DMaxPoolLayer()
        self.locdiff_word_conv_layer.__setstate__(state["locdiff_word_conv_layer"])

        self.locdiff_verb_conv_layer = Conv1DMaxPoolLayer()
        self.locdiff_verb_conv_layer.__setstate__(state["locdiff_verb_conv_layer"])