Esempio n. 1
0
    def __init__(self,
                 word_vocab,
                 char_vocab,
                 Edgelabel_vocab,
                 options=None,
                 mode='ce_train'):
        # here 'mode', whose value can be:
        #  'ce_train',
        #  'rl_train',
        #  'evaluate',
        #  'evaluate_bleu',
        #  'decode'.
        # it is different from 'mode_gen' in generator_utils.py
        # value of 'mode_gen' can be ['ce_loss', 'rl_loss', 'greedy' or 'sample']
        self.mode = mode

        # is_training controls whether to use dropout
        is_training = True if mode in ('ce_train', ) else False

        self.options = options
        self.word_vocab = word_vocab

        # encode the input instance
        # encoder.graph_hidden [batch, node_num, vsize]
        # encoder.graph_cell [batch, node_num, vsize]
        self.encoder = graph_encoder_utils.GraphEncoder(
            word_vocab=word_vocab,
            edge_label_vocab=Edgelabel_vocab,
            char_vocab=char_vocab,
            is_training=is_training,
            options=options)

        # ============== Choices of attention memory ================
        if options.attention_type == 'hidden':
            self.encoder_dim = options.neighbor_vector_dim
            self.encoder_states = self.encoder.graph_hiddens
        elif options.attention_type == 'hidden_cell':
            self.encoder_dim = options.neighbor_vector_dim * 2
            self.encoder_states = tf.concat(
                [self.encoder.graph_hiddens, self.encoder.graph_cells], 2)
        elif options.attention_type == 'hidden_embed':
            self.encoder_dim = options.neighbor_vector_dim + self.encoder.input_dim
            self.encoder_states = tf.concat([
                self.encoder.graph_hiddens, self.encoder.node_representations
            ], 2)
        else:
            assert False, '%s not supported yet' % options.attention_type

        # ============== Choices of initializing decoder state =============
        if options.way_init_decoder == 'zero':
            new_c = tf.zeros(
                [self.encoder.batch_size, options.gen_hidden_size])
            new_h = tf.zeros(
                [self.encoder.batch_size, options.gen_hidden_size])
        elif options.way_init_decoder == 'all':
            new_c = tf.reduce_sum(self.encoder.graph_cells, axis=1)
            new_h = tf.reduce_sum(self.encoder.graph_hiddens, axis=1)
        elif options.way_init_decoder == 'root':
            new_c = self.encoder.graph_cells[:, 0, :]
            new_h = self.encoder.graph_hiddens[:, 0, :]
        else:
            assert False, 'way to initial decoder (%s) not supported' % options.way_init_decoder
        self.init_decoder_state = tf.contrib.rnn.LSTMStateTuple(new_c, new_h)

        # prepare AMR-side input for decoder
        self.nodes = self.encoder.passage_nodes
        self.nodes_num = self.encoder.passage_nodes_size
        if options.with_char:
            self.nodes_chars = self.encoder.passage_nodes_chars
            self.nodes_chars_num = self.encoder.passage_nodes_chars_size
        self.nodes_mask = self.encoder.passage_nodes_mask

        self.in_neigh_indices = self.encoder.passage_in_neighbor_indices
        self.in_neigh_edges = self.encoder.passage_in_neighbor_edges
        self.in_neigh_mask = self.encoder.passage_in_neighbor_mask

        self.out_neigh_indices = self.encoder.passage_out_neighbor_indices
        self.out_neigh_edges = self.encoder.passage_out_neighbor_edges
        self.out_neigh_mask = self.encoder.passage_out_neighbor_mask

        self.create_placeholders(options)

        loss_weights = tf.sequence_mask(
            self.answer_len, options.max_answer_len,
            dtype=tf.float32)  # [batch_size, gen_steps]

        with variable_scope.variable_scope("generator"):
            # create generator
            self.generator = generator_utils.CovCopyAttenGen(
                self, options, word_vocab)
            # calculate encoder_features
            self.encoder_features = self.generator.calculate_encoder_features(
                self.encoder_states, self.encoder_dim)

            if mode == 'decode':
                self.context_t_1 = tf.placeholder(
                    tf.float32, [None, self.encoder_dim],
                    name='context_t_1')  # [batch_size, encoder_dim]
                self.coverage_t_1 = tf.placeholder(
                    tf.float32, [None, None],
                    name='coverage_t_1')  # [batch_size, encoder_dim]
                self.word_t = tf.placeholder(tf.int32, [None],
                                             name='word_t')  # [batch_size]

                (self.state_t, self.context_t, self.coverage_t,
                 self.attn_dist_t, self.p_gen_t, self.ouput_t,
                 self.topk_log_probs, self.topk_ids, self.greedy_prediction,
                 self.multinomial_prediction) = self.generator.decode_mode(
                     word_vocab, options.beam_size, self.init_decoder_state,
                     self.context_t_1, self.coverage_t_1, self.word_t,
                     self.encoder_states, self.encoder_features, self.nodes,
                     self.nodes_mask)
                # not buiding training op for this mode
                return
            elif mode == 'evaluate_bleu':
                _, _, self.greedy_words = self.generator.train_mode(
                    word_vocab,
                    self.encoder_dim,
                    self.encoder_states,
                    self.encoder_features,
                    self.nodes,
                    self.nodes_mask,
                    self.init_decoder_state,
                    self.answer_inp,
                    self.answer_ref,
                    loss_weights,
                    mode_gen='greedy')
                # not buiding training op for this mode
                return
            elif mode in (
                    'ce_train',
                    'evaluate',
            ):
                self.accu, self.loss, _ = self.generator.train_mode(
                    word_vocab,
                    self.encoder_dim,
                    self.encoder_states,
                    self.encoder_features,
                    self.nodes,
                    self.nodes_mask,
                    self.init_decoder_state,
                    self.answer_inp,
                    self.answer_ref,
                    loss_weights,
                    mode_gen='ce_loss')
                if mode == 'evaluate':
                    return  # not buiding training op for evaluation
            elif mode == 'rl_train':
                _, self.loss, _ = self.generator.train_mode(
                    word_vocab,
                    self.encoder_dim,
                    self.encoder_states,
                    self.encoder_features,
                    self.nodes,
                    self.nodes_mask,
                    self.init_decoder_state,
                    self.answer_inp,
                    self.answer_ref,
                    loss_weights,
                    mode_gen='rl_loss')

                tf.get_variable_scope().reuse_variables()

                _, _, self.sampled_words = self.generator.train_mode(
                    word_vocab,
                    self.encoder_dim,
                    self.encoder_states,
                    self.encoder_features,
                    self.nodes,
                    self.nodes_mask,
                    self.init_decoder_state,
                    self.answer_inp,
                    self.answer_ref,
                    None,
                    mode_gen='sample')

                _, _, self.greedy_words = self.generator.train_mode(
                    word_vocab,
                    self.encoder_dim,
                    self.encoder_states,
                    self.encoder_features,
                    self.nodes,
                    self.nodes_mask,
                    self.init_decoder_state,
                    self.answer_inp,
                    self.answer_ref,
                    None,
                    mode_gen='greedy')

        if options.optimize_type == 'adadelta':
            clipper = 50
            optimizer = tf.train.AdadeltaOptimizer(
                learning_rate=options.learning_rate)
            tvars = tf.trainable_variables()
            if options.lambda_l2 > 0.0:
                l2_loss = tf.add_n([
                    tf.nn.l2_loss(v) for v in tvars if v.get_shape().ndims > 1
                ])
                self.loss = self.loss + options.lambda_l2 * l2_loss
            grads, _ = tf.clip_by_global_norm(tf.gradients(self.loss, tvars),
                                              clipper)
            self.train_op = optimizer.apply_gradients(zip(grads, tvars))
        elif options.optimize_type == 'adam':
            clipper = 50
            optimizer = tf.train.AdamOptimizer(
                learning_rate=options.learning_rate)
            tvars = tf.trainable_variables()
            if options.lambda_l2 > 0.0:
                l2_loss = tf.add_n([
                    tf.nn.l2_loss(v) for v in tvars if v.get_shape().ndims > 1
                ])
                self.loss = self.loss + options.lambda_l2 * l2_loss
            grads, _ = tf.clip_by_global_norm(tf.gradients(self.loss, tvars),
                                              clipper)
            self.train_op = optimizer.apply_gradients(zip(grads, tvars))

        extra_train_ops = []
        train_ops = [self.train_op] + extra_train_ops
        self.train_op = tf.group(*train_ops)
Esempio n. 2
0
    def __init__(self,
                 template_vocab=None,
                 word_vocab=None,
                 char_vocab=None,
                 POS_vocab=None,
                 NER_vocab=None,
                 options=None,
                 mode='ce_train'):  ###

        # here 'mode', whose value can be:
        #  'ce_train',
        #  'rl_train',
        #  'evaluate',
        #  'evaluate_bleu',
        #  'decode'.
        # it is different from 'mode_gen' in generator_utils.py
        # value of 'mode_gen' can be 'ce_train', 'loss', 'greedy' or 'sample'
        self.mode = mode

        # is_training controls whether to use dropout
        is_training = True if mode in ('ce_train', ) else False

        self.options = options
        self.word_vocab = word_vocab

        self.template_vocab = template_vocab  ###

        # create placeholders
        self.create_placeholders(options)

        # create encoder
        if options.two_sent_inputs:  # take two sentences as inputs
            self.encoder = matching_encoder_utils.MatchingEncoder(
                self,
                options,
                word_vocab=word_vocab,
                char_vocab=char_vocab,
                POS_vocab=POS_vocab,
                NER_vocab=NER_vocab)
        else:  # take one sentence as input
            self.encoder = encoder_utils.SeqEncoder(self,
                                                    options,
                                                    word_vocab=word_vocab,
                                                    char_vocab=char_vocab,
                                                    POS_vocab=POS_vocab,
                                                    NER_vocab=NER_vocab)

        # encode the input instance
        self.encode_dim, self.encode_hiddens, self.init_decoder_state = self.encoder.encode(
            is_training=is_training)

        # project to phrase representation
        if options.with_phrase_projection:
            phrase_projection_layer = phrase_projection_layer_utils.PhraseProjectionLayer(
                self)
            self.phrase_representations = phrase_projection_layer.project_to_phrase_representation(
                self.encode_hiddens)
            self.encode_dim = 2 * self.encode_dim
        else:
            self.phrase_representations = self.encode_hiddens
            self.phrase_idx = self.in_passage_words
            self.phrase_lengths = self.passage_lengths

        phrase_length_max = tf.shape(self.phrase_idx)[1]
        self.phrase_mask = tf.sequence_mask(self.phrase_lengths,
                                            phrase_length_max,
                                            dtype=tf.float32)

        loss_weights = tf.sequence_mask(
            self.answer_lengths, options.max_answer_len,
            dtype=tf.float32)  # [batch_size, gen_steps]

        with variable_scope.variable_scope("generator"):
            # create generator
            ### self.generator = generator_utils.CovCopyAttenGen(self, options, word_vocab)
            self.generator = generator_utils.CovCopyAttenGen(
                self, options, word_vocab, template_vocab)  ###
            # calculate encoder_features
            self.encoder_features = self.generator.calculate_encoder_features(
                self.phrase_representations, self.encode_dim)

            if mode == 'decode':
                self.context_t_1 = tf.placeholder(
                    tf.float32, [None, self.encode_dim],
                    name='context_t_1')  # [batch_size, encode_dim]
                self.coverage_t_1 = tf.placeholder(
                    tf.float32, [None, None],
                    name='coverage_t_1')  # [batch_size, encode_dim]
                self.word_t = tf.placeholder(tf.int32, [None],
                                             name='word_t')  # [batch_size]

                (
                    self.state_t, self.context_t, self.coverage_t,
                    self.attn_dist_t, self.p_gen_t, self.ouput_t,
                    self.topk_log_probs, self.topk_ids, self.greedy_prediction,
                    self.multinomial_prediction
                ) = self.generator.decode_mode(
                    ### word_vocab, options.beam_size, self.init_decoder_state, self.context_t_1, self.coverage_t_1, self.word_t,
                    word_vocab,
                    self.template_words,
                    self.template_lengths,
                    options.beam_size,
                    self.init_decoder_state,
                    self.context_t_1,
                    self.coverage_t_1,
                    self.word_t,  ###
                    self.phrase_representations,
                    self.encoder_features,
                    self.phrase_idx,
                    self.phrase_mask)
                # not buiding training op for this mode
                return
            elif mode == 'evaluate_bleu':
                _, _, self.greedy_words = self.generator.train_mode(
                    word_vocab,
                    self.encode_dim,
                    self.phrase_representations,
                    self.encoder_features,
                    self.phrase_idx,
                    self.phrase_mask,
                    self.init_decoder_state,
                    self.gen_input_words,
                    self.in_answer_words,
                    loss_weights,
                    mode_gen='greedy')
                # not buiding training op for this mode
                return
            elif mode in (
                    'ce_train',
                    'evaluate',
            ):
                ### self.accu, self.loss, _ = self.generator.train_mode(self.question_template, template_vocab, word_vocab, self.encode_dim, self.phrase_representations, self.encoder_features,
                self.accu, self.loss, _ = self.generator.train_mode(
                    word_vocab,
                    self.template_words,
                    self.template_lengths,
                    self.encode_dim,
                    self.phrase_representations,
                    self.encoder_features,  ###   
                    self.phrase_idx,
                    self.phrase_mask,
                    self.init_decoder_state,
                    self.gen_input_words,
                    self.in_answer_words,
                    loss_weights,
                    mode_gen='ce_train')  ###
                if mode == 'evaluate':
                    return  # not buiding training op for evaluation
            elif mode == 'rl_train':
                _, self.loss, _ = self.generator.train_mode(
                    word_vocab,
                    self.encode_dim,
                    self.phrase_representations,
                    self.encoder_features,
                    self.phrase_idx,
                    self.phrase_mask,
                    self.init_decoder_state,
                    self.gen_input_words,
                    self.in_answer_words,
                    loss_weights,
                    mode_gen='loss')

                tf.get_variable_scope().reuse_variables()

                _, _, self.sampled_words = self.generator.train_mode(
                    word_vocab,
                    self.encode_dim,
                    self.phrase_representations,
                    self.encoder_features,
                    self.phrase_idx,
                    self.phrase_mask,
                    self.init_decoder_state,
                    self.gen_input_words,
                    self.in_answer_words,
                    None,
                    mode_gen='sample')

                _, _, self.greedy_words = self.generator.train_mode(
                    word_vocab,
                    self.encode_dim,
                    self.phrase_representations,
                    self.encoder_features,
                    self.phrase_idx,
                    self.phrase_mask,
                    self.init_decoder_state,
                    self.gen_input_words,
                    self.in_answer_words,
                    None,
                    mode_gen='greedy')
            elif mode == 'rl_train_for_phrase':
                _, self.loss, _ = self.generator.train_mode(
                    word_vocab,
                    self.encode_dim,
                    self.phrase_representations,
                    self.encoder_features,
                    self.phrase_idx,
                    self.phrase_mask,
                    self.init_decoder_state,
                    self.gen_input_words,
                    self.in_answer_words,
                    loss_weights,
                    mode_gen='loss')

        if options.optimize_type == 'adadelta':
            clipper = 50
            optimizer = tf.train.AdadeltaOptimizer(
                learning_rate=options.learning_rate)
            tvars = tf.trainable_variables()
            if options.lambda_l2 > 0.0:
                l2_loss = tf.add_n([
                    tf.nn.l2_loss(v) for v in tvars if v.get_shape().ndims > 1
                ])
                self.loss = self.loss + options.lambda_l2 * l2_loss
            grads, _ = tf.clip_by_global_norm(tf.gradients(self.loss, tvars),
                                              clipper)
            self.train_op = optimizer.apply_gradients(zip(grads, tvars))
        elif options.optimize_type == 'adam':
            clipper = 50
            optimizer = tf.train.AdamOptimizer(
                learning_rate=options.learning_rate)
            tvars = tf.trainable_variables()
            if options.lambda_l2 > 0.0:
                l2_loss = tf.add_n([
                    tf.nn.l2_loss(v) for v in tvars if v.get_shape().ndims > 1
                ])
                self.loss = self.loss + options.lambda_l2 * l2_loss
            grads, _ = tf.clip_by_global_norm(tf.gradients(self.loss, tvars),
                                              clipper)
            self.train_op = optimizer.apply_gradients(zip(grads, tvars))

        extra_train_ops = []
        train_ops = [self.train_op] + extra_train_ops
        self.train_op = tf.group(*train_ops)