Example #1
0
    def __init__(self,
                 template_vocab=None,
                 word_vocab=None,
                 char_vocab=None,
                 POS_vocab=None,
                 NER_vocab=None,
                 options=None,
                 mode='ce_train'):  ###

        # here 'mode', whose value can be:
        #  'ce_train',
        #  'rl_train',
        #  'evaluate',
        #  'evaluate_bleu',
        #  'decode'.
        # it is different from 'mode_gen' in generator_utils.py
        # value of 'mode_gen' can be 'ce_train', 'loss', 'greedy' or 'sample'
        self.mode = mode

        # is_training controls whether to use dropout
        is_training = True if mode in ('ce_train', ) else False

        self.options = options
        self.word_vocab = word_vocab

        self.template_vocab = template_vocab  ###

        # create placeholders
        self.create_placeholders(options)

        # create encoder
        if options.two_sent_inputs:  # take two sentences as inputs
            self.encoder = matching_encoder_utils.MatchingEncoder(
                self,
                options,
                word_vocab=word_vocab,
                char_vocab=char_vocab,
                POS_vocab=POS_vocab,
                NER_vocab=NER_vocab)
        else:  # take one sentence as input
            self.encoder = encoder_utils.SeqEncoder(self,
                                                    options,
                                                    word_vocab=word_vocab,
                                                    char_vocab=char_vocab,
                                                    POS_vocab=POS_vocab,
                                                    NER_vocab=NER_vocab)

        # encode the input instance
        self.encode_dim, self.encode_hiddens, self.init_decoder_state = self.encoder.encode(
            is_training=is_training)

        # project to phrase representation
        if options.with_phrase_projection:
            phrase_projection_layer = phrase_projection_layer_utils.PhraseProjectionLayer(
                self)
            self.phrase_representations = phrase_projection_layer.project_to_phrase_representation(
                self.encode_hiddens)
            self.encode_dim = 2 * self.encode_dim
        else:
            self.phrase_representations = self.encode_hiddens
            self.phrase_idx = self.in_passage_words
            self.phrase_lengths = self.passage_lengths

        phrase_length_max = tf.shape(self.phrase_idx)[1]
        self.phrase_mask = tf.sequence_mask(self.phrase_lengths,
                                            phrase_length_max,
                                            dtype=tf.float32)

        loss_weights = tf.sequence_mask(
            self.answer_lengths, options.max_answer_len,
            dtype=tf.float32)  # [batch_size, gen_steps]

        with variable_scope.variable_scope("generator"):
            # create generator
            ### self.generator = generator_utils.CovCopyAttenGen(self, options, word_vocab)
            self.generator = generator_utils.CovCopyAttenGen(
                self, options, word_vocab, template_vocab)  ###
            # calculate encoder_features
            self.encoder_features = self.generator.calculate_encoder_features(
                self.phrase_representations, self.encode_dim)

            if mode == 'decode':
                self.context_t_1 = tf.placeholder(
                    tf.float32, [None, self.encode_dim],
                    name='context_t_1')  # [batch_size, encode_dim]
                self.coverage_t_1 = tf.placeholder(
                    tf.float32, [None, None],
                    name='coverage_t_1')  # [batch_size, encode_dim]
                self.word_t = tf.placeholder(tf.int32, [None],
                                             name='word_t')  # [batch_size]

                (
                    self.state_t, self.context_t, self.coverage_t,
                    self.attn_dist_t, self.p_gen_t, self.ouput_t,
                    self.topk_log_probs, self.topk_ids, self.greedy_prediction,
                    self.multinomial_prediction
                ) = self.generator.decode_mode(
                    ### word_vocab, options.beam_size, self.init_decoder_state, self.context_t_1, self.coverage_t_1, self.word_t,
                    word_vocab,
                    self.template_words,
                    self.template_lengths,
                    options.beam_size,
                    self.init_decoder_state,
                    self.context_t_1,
                    self.coverage_t_1,
                    self.word_t,  ###
                    self.phrase_representations,
                    self.encoder_features,
                    self.phrase_idx,
                    self.phrase_mask)
                # not buiding training op for this mode
                return
            elif mode == 'evaluate_bleu':
                _, _, self.greedy_words = self.generator.train_mode(
                    word_vocab,
                    self.encode_dim,
                    self.phrase_representations,
                    self.encoder_features,
                    self.phrase_idx,
                    self.phrase_mask,
                    self.init_decoder_state,
                    self.gen_input_words,
                    self.in_answer_words,
                    loss_weights,
                    mode_gen='greedy')
                # not buiding training op for this mode
                return
            elif mode in (
                    'ce_train',
                    'evaluate',
            ):
                ### self.accu, self.loss, _ = self.generator.train_mode(self.question_template, template_vocab, word_vocab, self.encode_dim, self.phrase_representations, self.encoder_features,
                self.accu, self.loss, _ = self.generator.train_mode(
                    word_vocab,
                    self.template_words,
                    self.template_lengths,
                    self.encode_dim,
                    self.phrase_representations,
                    self.encoder_features,  ###   
                    self.phrase_idx,
                    self.phrase_mask,
                    self.init_decoder_state,
                    self.gen_input_words,
                    self.in_answer_words,
                    loss_weights,
                    mode_gen='ce_train')  ###
                if mode == 'evaluate':
                    return  # not buiding training op for evaluation
            elif mode == 'rl_train':
                _, self.loss, _ = self.generator.train_mode(
                    word_vocab,
                    self.encode_dim,
                    self.phrase_representations,
                    self.encoder_features,
                    self.phrase_idx,
                    self.phrase_mask,
                    self.init_decoder_state,
                    self.gen_input_words,
                    self.in_answer_words,
                    loss_weights,
                    mode_gen='loss')

                tf.get_variable_scope().reuse_variables()

                _, _, self.sampled_words = self.generator.train_mode(
                    word_vocab,
                    self.encode_dim,
                    self.phrase_representations,
                    self.encoder_features,
                    self.phrase_idx,
                    self.phrase_mask,
                    self.init_decoder_state,
                    self.gen_input_words,
                    self.in_answer_words,
                    None,
                    mode_gen='sample')

                _, _, self.greedy_words = self.generator.train_mode(
                    word_vocab,
                    self.encode_dim,
                    self.phrase_representations,
                    self.encoder_features,
                    self.phrase_idx,
                    self.phrase_mask,
                    self.init_decoder_state,
                    self.gen_input_words,
                    self.in_answer_words,
                    None,
                    mode_gen='greedy')
            elif mode == 'rl_train_for_phrase':
                _, self.loss, _ = self.generator.train_mode(
                    word_vocab,
                    self.encode_dim,
                    self.phrase_representations,
                    self.encoder_features,
                    self.phrase_idx,
                    self.phrase_mask,
                    self.init_decoder_state,
                    self.gen_input_words,
                    self.in_answer_words,
                    loss_weights,
                    mode_gen='loss')

        if options.optimize_type == 'adadelta':
            clipper = 50
            optimizer = tf.train.AdadeltaOptimizer(
                learning_rate=options.learning_rate)
            tvars = tf.trainable_variables()
            if options.lambda_l2 > 0.0:
                l2_loss = tf.add_n([
                    tf.nn.l2_loss(v) for v in tvars if v.get_shape().ndims > 1
                ])
                self.loss = self.loss + options.lambda_l2 * l2_loss
            grads, _ = tf.clip_by_global_norm(tf.gradients(self.loss, tvars),
                                              clipper)
            self.train_op = optimizer.apply_gradients(zip(grads, tvars))
        elif options.optimize_type == 'adam':
            clipper = 50
            optimizer = tf.train.AdamOptimizer(
                learning_rate=options.learning_rate)
            tvars = tf.trainable_variables()
            if options.lambda_l2 > 0.0:
                l2_loss = tf.add_n([
                    tf.nn.l2_loss(v) for v in tvars if v.get_shape().ndims > 1
                ])
                self.loss = self.loss + options.lambda_l2 * l2_loss
            grads, _ = tf.clip_by_global_norm(tf.gradients(self.loss, tvars),
                                              clipper)
            self.train_op = optimizer.apply_gradients(zip(grads, tvars))

        extra_train_ops = []
        train_ops = [self.train_op] + extra_train_ops
        self.train_op = tf.group(*train_ops)
Example #2
0
    def __init__(self, word_vocab, char_vocab=None, POS_vocab=None, NER_vocab=None, options=None, \
            has_ref=True, is_training=True):
        # is_training controls whether to use dropout and update parameters
        self.is_training = is_training
        # has_ref distinguish 'dev' evaluation from 'final test' evaluation
        self.has_ref = has_ref

        self.options = options
        self.word_vocab = word_vocab

        # separately encode passage and question
        self.passage_encoder = encoder_utils.SeqEncoder(options,
                                                        word_vocab,
                                                        POS_vocab=POS_vocab,
                                                        NER_vocab=NER_vocab)

        self.question_encoder = encoder_utils.SeqEncoder(options,
                                                         word_vocab,
                                                         POS_vocab=POS_vocab,
                                                         NER_vocab=NER_vocab,
                                                         embed_reuse=True)

        with tf.variable_scope('passage'):
            passage_dim, passage_repre, passage_mask = self.passage_encoder.encode(
                is_training=is_training)
        with tf.variable_scope('question'):
            question_dim, question_repre, question_mask = self.question_encoder.encode(
                is_training=is_training)

        # modeling entities
        self.entity_starts = tf.placeholder(tf.int32, [None, None],
                                            'entity_starts')
        self.entity_ends = tf.placeholder(tf.int32, [None, None],
                                          'entity_ends')
        self.entity_lengths = tf.placeholder(tf.int32, [None],
                                             'entity_lengths')

        batch_size = tf.shape(self.entity_starts)[0]
        entity_len_max = tf.shape(self.entity_starts)[1]
        entity_mask = tf.sequence_mask(self.entity_lengths,
                                       entity_len_max,
                                       dtype=tf.float32)  # [batch, entity]

        entity_st_rep = operation_utils.collect_node(
            passage_repre, self.entity_starts)  # [batch, entity, rep_dim]
        entity_ed_rep = operation_utils.collect_node(
            passage_repre, self.entity_ends)  # [batch, entity, rep_dim]
        entity_rep = tf.concat([entity_st_rep, entity_ed_rep],
                               axis=2)  # [batch, entity, rep_dim * 2]
        entity_dim = passage_dim * 2

        qfull_st_rep = question_repre[:, 0, :]  # [batch, rep_dim]
        qfull_ed_rep = operation_utils.collect_final_step(
            question_repre,
            self.question_encoder.sequence_lengths - 1)  # [batch, rep_dim]
        qfull_rep = tf.concat([qfull_st_rep, qfull_ed_rep],
                              axis=1)  # [batch, rep_dim * 2]
        qfull_dim = question_dim * 2

        matching_results = []
        rst_seq = self.perform_matching(entity_rep,
                                        entity_dim,
                                        entity_mask,
                                        question_repre,
                                        qfull_rep,
                                        question_dim,
                                        question_mask,
                                        scope_name='seq_match',
                                        options=options,
                                        is_training=is_training)
        matching_results.append(rst_seq)

        # encode entity representation with GRN
        if options.with_grn or options.with_gcn:
            # merge question representation into passage
            q4p_rep = tf.tile(
                tf.expand_dims(qfull_rep, 1),  # [batch, 1, rep_dim * 2]
                [1, entity_len_max, 1])  # [batch, entity, rep_dim * 2]
            entity_rep = tf.concat([entity_rep, q4p_rep], axis=2)
            entity_dim = entity_dim + qfull_dim

            # compress before going to GRN
            merge_w = tf.get_variable('merge_w',
                                      [entity_dim, options.merge_dim])
            merge_b = tf.get_variable('merge_b', [options.merge_dim])

            entity_rep = tf.reshape(entity_rep, [-1, entity_dim])
            entity_rep = tf.matmul(entity_rep, merge_w) + merge_b
            entity_rep = tf.reshape(
                entity_rep, [batch_size, entity_len_max, options.merge_dim])
            entity_rep = entity_rep * tf.expand_dims(entity_mask, axis=-1)
            entity_dim = options.merge_dim

            # main part: encoding
            scope_name = 'GRN' if options.with_grn else 'GCN'

            with tf.variable_scope(scope_name):
                self.edges = tf.placeholder(tf.int32, [None, None, None],
                                            'edges')
                self.edges_mask = tf.placeholder(tf.float32,
                                                 [None, None, None],
                                                 'edges_mask')
                if options.with_grn:
                    print("With Graph recurrent network as the graph encoder")
                    self.graph_encoder = graph_encoder_utils.GraphEncoder(
                        entity_rep,
                        entity_mask,
                        entity_dim,
                        self.edges,
                        self.edges_mask,
                        is_training=is_training,
                        options=options)
                else:
                    print("With GCN as the graph encoder")
                    self.graph_encoder = gcn_encoder_utils.GCNEncoder(
                        entity_rep,
                        entity_mask,
                        entity_dim,
                        self.edges,
                        self.edges_mask,
                        is_training=is_training,
                        options=options)

                for i in range(options.num_grn_step):
                    if options.grn_rep_type == 'hidden':
                        entity_grn_rep = self.graph_encoder.grn_historys[
                            i]  # [batch, entity, grn_dim]
                        entity_grn_dim = options.grn_dim
                    elif options.grn_rep_type == 'hidden_embed':
                        entity_grn_rep = tf.concat(
                            [self.graph_encoder.grn_historys[i], entity_rep],
                            2)  # [batch, entity, grn_dim + merge_dim]
                        entity_grn_dim = options.grn_dim + entity_dim
                    else:
                        assert False, '%s not supported yet' % options.grn_rep_type

                    if options.with_multi_perspective:
                        assert entity_grn_dim == question_dim

                    rst_grn = self.perform_matching(entity_grn_rep,
                                                    entity_grn_dim,
                                                    entity_mask,
                                                    question_repre,
                                                    qfull_rep,
                                                    question_dim,
                                                    question_mask,
                                                    scope_name='grn%d_match' %
                                                    i,
                                                    options=options,
                                                    is_training=is_training)
                    matching_results.append(rst_grn)

        self.candidates = tf.placeholder(
            tf.int32, [None, None, None],
            'candidates')  # [batch, c_num, c_occur]
        self.candidates_len = tf.placeholder(tf.float32, [None],
                                             'candidates_len')  # [batch]
        self.candidates_occur_mask = tf.placeholder(
            tf.float32, [None, None, None],
            'candidates_occur_mask')  # [batch, c_num, c_occur]

        # matching_results: list of [batch, cands]
        self.attn_dist = self.perform_integration(matching_results,
                                                  scope_name='integration',
                                                  options=options,
                                                  is_training=is_training)

        cand_num = tf.shape(self.candidates)[1]
        self.topk_probs, self.topk_ids = tf.nn.top_k(self.attn_dist,
                                                     k=cand_num,
                                                     name='topK')
        self.out = tf.argmax(self.attn_dist, axis=-1, output_type=tf.int32)

        if not has_ref: return

        self.ref = tf.placeholder(tf.int32, [None], 'ref')
        self.accu = tf.reduce_sum(
            tf.cast(tf.equal(self.out, self.ref), dtype=tf.float32))

        xent = -tf.reduce_sum(
            tf.one_hot(self.ref, cand_num) * tf.log(self.attn_dist), axis=-1)

        self.loss = tf.reduce_mean(xent)

        if not is_training: return

        with tf.variable_scope("training_op"), tf.device("/gpu:1"):
            if options.optimize_type == 'adadelta':
                optimizer = tf.train.AdadeltaOptimizer(
                    learning_rate=options.learning_rate)
            elif options.optimize_type == 'adam':
                optimizer = tf.train.AdamOptimizer(
                    learning_rate=options.learning_rate)
            clipper = 50 if not options.__dict__.has_key(
                "max_grad_norm") else options.max_grad_norm
            print("Max gradient norm {}".format(clipper))
            tvars = tf.trainable_variables()
            if options.lambda_l2 > 0.0:
                l2_loss = tf.add_n([
                    tf.nn.l2_loss(v) for v in tvars if v.get_shape().ndims > 1
                ])
                self.loss = self.loss + options.lambda_l2 * l2_loss
            grads, _ = tf.clip_by_global_norm(tf.gradients(self.loss, tvars),
                                              clipper)
            self.train_op = optimizer.apply_gradients(zip(grads, tvars))

            extra_train_ops = []
            train_ops = [self.train_op] + extra_train_ops
            self.train_op = tf.group(*train_ops)
Example #3
0
    def __init__(self,
                 word_vocab,
                 char_vocab,
                 pos_vocab,
                 edgelabel_vocab,
                 options,
                 mode='train'):
        # the value of 'mode' can be:
        #  'train',
        #  'evaluate'
        self.mode = mode

        # is_training controls whether to use dropout
        is_training = True if mode in ('train', ) else False

        self.options = options
        self.word_vocab = word_vocab
        self.char_vocab = char_vocab
        self.pos_vocab = pos_vocab

        # sequential encoder that can take 0 LSTM layers
        self.encoder = encoder_utils.SeqEncoder(options, word_vocab,
                                                char_vocab, pos_vocab)
        word_repres, word_dim, sentence_repres, sentence_dim, seq_mask = \
                self.encoder.encode(is_training=is_training)

        # encode the input instance
        # encoder.graph_hidden [batch, node_num, vsize]
        # encoder.graph_cell [batch, node_num, vsize]
        self.graph_encoder = graph_encoder_utils.GraphEncoder(
            options,
            word_repres,
            word_dim,
            sentence_repres,
            sentence_dim,
            seq_mask,
            edgelabel_vocab,
            is_training=is_training)

        # collect placeholders
        self.sentence_words = self.encoder.sentence_words
        self.sentence_lengths = self.encoder.sentence_lengths
        if options.with_char:
            self.sentence_chars = self.encoder.sentence_chars
            self.sentence_chars_lengths = self.encoder.sentence_chars_lengths
        if options.with_POS:
            self.sentence_POSs = self.encoder.sentence_POSs

        self.in_neigh_indices = self.graph_encoder.in_neighbor_indices
        self.in_neigh_edges = self.graph_encoder.in_neighbor_edges
        self.in_neigh_mask = self.graph_encoder.in_neighbor_mask

        self.out_neigh_indices = self.graph_encoder.out_neighbor_indices
        self.out_neigh_edges = self.graph_encoder.out_neighbor_edges
        self.out_neigh_mask = self.graph_encoder.out_neighbor_mask

        if options.forest_prob_aware and options.forest_type != '1best':
            self.in_neigh_prob = self.graph_encoder.in_neighbor_prob
            self.out_neigh_prob = self.graph_encoder.out_neighbor_prob

        self.entity_indices = tf.placeholder(tf.int32, [None, None, None],
                                             name="entity_indices")
        self.entity_indices_mask = tf.placeholder(tf.float32,
                                                  [None, None, None],
                                                  name="entity_indices_mask")

        # collect inputs for final classifier
        final_repres = self.graph_encoder.graph_hiddens
        final_shape = tf.shape(final_repres)
        batch_size = final_shape[0]
        sentence_size_max = final_shape[1]

        # [batch, 2, indices, sentence_dim]
        entity_repres = collect_by_indices(final_repres, self.entity_indices)
        entity_repres = entity_repres * tf.expand_dims(
            self.entity_indices_mask, axis=-1)
        # [batch, 2, sentence_dim]
        entity_repres = tf.reduce_mean(entity_repres, axis=2)
        # [batch, 2*sentence_dim]
        h_final = tf.reshape(entity_repres, [batch_size, 2 * sentence_dim])

        ### regarding Zhang et al., EMNLP 2018
        #h_sent = tf.reduce_max(final_repres, axis=1)
        #hsent_loss = None
        #if options.lambda_l2_hsent > 0.0:
        #    hsent_loss = tf.reduce_mean(
        #            tf.reduce_sum(h_sent * h_sent, axis=-1), axis=-1)
        #h_s = tf.reduce_max(
        #        range_repres(final_repres, sentence_size_max, self.sbj_starts, self.sbj_ends),
        #        axis=1)
        #h_o = tf.reduce_max(
        #        range_repres(final_repres, sentence_size_max, self.obj_starts, self.obj_ends),
        #        axis=1)
        #h_final = tf.concat([h_sent, h_s, h_o], axis=1) # [batch, sentence_dim*3]
        #h_final = tf.layers.dense(h_final, options.ffnn_size, name="ffnn_1", activation=tf.nn.relu) # [batch, ffnn_size]
        #h_final = tf.layers.dense(h_final, options.ffnn_size, name="ffnn_2", activation=tf.nn.relu) # [batch, ffnn_size]

        ## [batch, class_num]
        self.distribution = _clip_and_normalize(
            tf.layers.dense(h_final,
                            options.num_relations,
                            name="ffnn_out",
                            activation=tf.nn.softmax), 1.0e-6)
        self.rsts = tf.argmax(self.distribution, axis=-1, output_type=tf.int32)

        ## calculating accuracy
        self.refs = tf.placeholder(tf.int32, [
            None,
        ])
        self.accu = tf.reduce_sum(
            tf.cast(tf.equal(self.rsts, self.refs), dtype=tf.float32))

        ## calculating loss
        # xent: [batch]
        xent = -tf.reduce_sum(tf.one_hot(self.refs, options.num_relations) *
                              tf.log(self.distribution),
                              axis=-1)

        self.loss = tf.reduce_mean(xent)

        if mode != 'train':
            print('Return from here, just evaluate')
            return

        #if options.lambda_l2_hsent > 0.0:
        #    self.loss += hsent_loss * options.lambda_l2_hsent

        clipper = 5
        tvars = tf.trainable_variables()
        if options.lambda_l2 > 0.0:
            l2_loss = tf.add_n(
                [tf.nn.l2_loss(v) for v in tvars if v.get_shape().ndims > 1])
            self.loss += options.lambda_l2 * l2_loss

        if hasattr(options, "decay") and options.decay != "none":
            global_step = tf.Variable(0, trainable=False)
            if options.decay == 'piece':
                values, bounds = [
                    options.learning_rate,
                ], []
                for i in range(10):
                    values.append(values[-1] * 0.9)
                    bounds.append(options.trn_bch_num * 10 * i)
                learning_rate = tf.train.piecewise_constant(
                    global_step, bounds, values)
            elif options.decay == 'poly':
                decay_steps = options.trn_bch_num * options.max_epochs
                learning_rate = tf.train.polynomial_decay(
                    options.learning_rate,
                    global_step,
                    decay_steps,
                    end_learning_rate=0.1 * options.learning_rate,
                    power=0.5)
            elif options.decay == 'cos':
                decay_steps = options.trn_bch_num * options.max_epochs
                learning_rate = tf.train.cosine_decay(options.learning_rate,
                                                      global_step,
                                                      decay_steps,
                                                      alpha=0.1)
            else:
                assert False, 'not supported'
        else:
            global_step = None
            learning_rate = options.learning_rate

        if options.optimize_type == 'adadelta':
            optimizer = tf.train.AdadeltaOptimizer(learning_rate=learning_rate)
        elif options.optimize_type == 'adam':
            optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
        else:
            assert False, 'not supported optimize type'
        grads, _ = tf.clip_by_global_norm(tf.gradients(self.loss, tvars),
                                          clipper)
        train_op = optimizer.apply_gradients(zip(grads, tvars),
                                             global_step=global_step)

        extra_train_ops = []
        train_ops = [train_op] + extra_train_ops
        self.train_op = tf.group(*train_ops)
Example #4
0
    def __init__(self,
                 word_vocab_enc,
                 word_vocab_dec,
                 char_vocab,
                 Edgelabel_vocab,
                 options=None,
                 mode='ce_train'):
        # here 'mode', whose value can be:
        #  'ce_train',
        #  'rl_train',
        #  'evaluate',
        #  'evaluate_bleu',
        #  'decode'.
        # it is different from 'mode_gen' in generator_utils.py
        # value of 'mode_gen' can be ['ce_loss', 'rl_loss', 'greedy' or 'sample']
        self.mode = mode

        # is_training controls whether to use dropout
        is_training = True if mode in ('ce_train', ) else False

        self.options = options
        self.word_vocab_enc = word_vocab_enc
        self.word_vocab_dec = word_vocab_dec

        self.create_placeholders(options)

        # encode the input instance
        # encoder.graph_hidden [batch, node_num, vsize]
        # encoder.graph_cell [batch, node_num, vsize]
        with tf.variable_scope('graph_encoder'):
            self.encoder = graph_encoder_utils.GraphEncoder(
                word_vocab=word_vocab_enc,
                edge_label_vocab=Edgelabel_vocab,
                char_vocab=char_vocab,
                is_training=is_training,
                options=options,
                device_str='/gpu:1')

        with tf.variable_scope('src_encoder'), tf.device('/gpu:1'):
            self.src_encoder = encoder_utils.SeqEncoder(
                self,
                options,
                word_vocab=word_vocab_enc,
                char_vocab=char_vocab)
            self.src_hidden_dim, self.src_hiddens, self.src_decinit = \
                    self.src_encoder.encode(is_training=is_training)
            self.src_mask = self.src_encoder.passage_mask

        # ============== Choices of attention memory ================
        if options.attention_type == 'hidden':
            self.encoder_dim = options.neighbor_vector_dim
            self.encoder_states = self.encoder.graph_hiddens
        elif options.attention_type == 'hidden_cell':
            self.encoder_dim = options.neighbor_vector_dim * 2
            self.encoder_states = tf.concat(
                [self.encoder.graph_hiddens, self.encoder.graph_cells], 2)
        elif options.attention_type == 'hidden_embed':
            self.encoder_dim = options.neighbor_vector_dim + options.node_dim
            self.encoder_states = tf.concat([
                self.encoder.graph_hiddens, self.encoder.node_representations
            ], 2)
        else:
            assert False, '%s not supported yet' % options.attention_type

        # ============== Choices of initializing decoder state =============
        if options.way_init_decoder == 'src':
            new_c, new_h = self.src_decinit.c, self.src_decinit.h
        elif options.way_init_decoder == 'zero':
            new_c = tf.zeros(
                [self.encoder.batch_size, options.gen_hidden_size])
            new_h = tf.zeros(
                [self.encoder.batch_size, options.gen_hidden_size])
        elif options.way_init_decoder == 'avg':
            new_c = tf.reduce_mean(self.encoder.graph_cells, axis=1)
            new_h = tf.reduce_mean(self.encoder.graph_hiddens, axis=1)
        elif options.way_init_decoder == 'root':
            new_c = self.encoder.graph_cells[:, 0, :]
            new_h = self.encoder.graph_hiddens[:, 0, :]
        else:
            assert False, 'way to initial decoder (%s) not supported' % options.way_init_decoder
        self.init_decoder_state = tf.contrib.rnn.LSTMStateTuple(new_c, new_h)

        # prepare src-side input for decoder

        # prepare AMR-side input for decoder
        self.nodes = self.encoder.passage_nodes
        self.nodes_num = self.encoder.passage_nodes_size
        if options.with_char:
            self.nodes_chars = self.encoder.passage_nodes_chars
            self.nodes_chars_num = self.encoder.passage_nodes_chars_size
        self.nodes_mask = self.encoder.passage_nodes_mask

        self.in_neigh_indices = self.encoder.passage_in_neighbor_indices
        self.in_neigh_edges = self.encoder.passage_in_neighbor_edges
        self.in_neigh_mask = self.encoder.passage_in_neighbor_mask

        self.out_neigh_indices = self.encoder.passage_out_neighbor_indices
        self.out_neigh_edges = self.encoder.passage_out_neighbor_edges
        self.out_neigh_mask = self.encoder.passage_out_neighbor_mask

        loss_weights = tf.sequence_mask(
            self.answer_len, options.max_answer_len,
            dtype=tf.float32)  # [batch_size, gen_steps]

        with variable_scope.variable_scope("generator"):
            # create generator
            self.generator = generator_utils.CovCopyAttenGen(
                self, options, word_vocab_dec)
            # calculate encoder_features
            with variable_scope.variable_scope("encoder_feats"):
                self.encoder_features = self.generator.calculate_encoder_features(
                    self.encoder_states, self.encoder_dim)

            with variable_scope.variable_scope("src_feats"):
                self.src_features = self.generator.calculate_encoder_features(
                    self.src_hiddens, self.src_hidden_dim)

            if mode == 'decode':
                self.context_encoder_t_1 = tf.placeholder(
                    tf.float32, [None, self.encoder_dim],
                    name='context_encoder_t_1')  # [batch_size, encoder_dim]
                self.context_src_t_1 = tf.placeholder(
                    tf.float32, [None, self.src_hidden_dim],
                    name='context_src_t_1')  # [batch_size, src_dim]
                self.coverage_t_1 = tf.placeholder(
                    tf.float32, [None, None],
                    name='coverage_t_1')  # [batch_size, encoder_dim]
                self.word_t = tf.placeholder(tf.int32, [None],
                                             name='word_t')  # [batch_size]

                (self.state_t, self.context_encoder_t, self.context_src_t,
                 self.coverage_t, self.attn_dist_t, self.p_gen_t, self.ouput_t,
                 self.topk_log_probs, self.topk_ids, self.greedy_prediction,
                 self.multinomial_prediction) = self.generator.decode_mode(
                     word_vocab_dec, options.beam_size,
                     self.init_decoder_state, self.context_encoder_t_1,
                     self.context_src_t_1, self.coverage_t_1, self.word_t,
                     self.encoder_states, self.encoder_features, self.nodes,
                     self.nodes_mask, self.src_hiddens, self.src_features,
                     self.src_mask)
                # not buiding training op for this mode
                return
            elif mode == 'evaluate_bleu':
                _, _, self.greedy_words = self.generator.train_mode(
                    word_vocab_dec,
                    self.encoder_dim,
                    self.encoder_states,
                    self.encoder_features,
                    self.nodes,
                    self.nodes_mask,
                    self.init_decoder_state,
                    self.answer_inp,
                    self.answer_ref,
                    loss_weights,
                    mode_gen='greedy')
                # not buiding training op for this mode
                return
            elif mode in (
                    'ce_train',
                    'evaluate',
            ):
                self.accu, self.loss, _ = self.generator.train_mode(
                    word_vocab_dec,
                    self.encoder_dim,
                    self.encoder_states,
                    self.encoder_features,
                    self.nodes,
                    self.nodes_mask,
                    self.src_hidden_dim,
                    self.src_hiddens,
                    self.src_features,
                    self.src_mask,
                    self.init_decoder_state,
                    self.answer_inp,
                    self.answer_ref,
                    loss_weights,
                    mode_gen='ce_loss')
                if mode == 'evaluate':
                    return  # not buiding training op for evaluation
            elif mode == 'rl_train':
                _, self.loss, _ = self.generator.train_mode(
                    word_vocab_dec,
                    self.encoder_dim,
                    self.encoder_states,
                    self.encoder_features,
                    self.nodes,
                    self.nodes_mask,
                    self.init_decoder_state,
                    self.answer_inp,
                    self.answer_ref,
                    loss_weights,
                    mode_gen='rl_loss')

                tf.get_variable_scope().reuse_variables()

                _, _, self.sampled_words = self.generator.train_mode(
                    word_vocab_dec,
                    self.encoder_dim,
                    self.encoder_states,
                    self.encoder_features,
                    self.nodes,
                    self.nodes_mask,
                    self.init_decoder_state,
                    self.answer_inp,
                    self.answer_ref,
                    None,
                    mode_gen='sample')

                _, _, self.greedy_words = self.generator.train_mode(
                    word_vocab_dec,
                    self.encoder_dim,
                    self.encoder_states,
                    self.encoder_features,
                    self.nodes,
                    self.nodes_mask,
                    self.init_decoder_state,
                    self.answer_inp,
                    self.answer_ref,
                    None,
                    mode_gen='greedy')

        if options.optimize_type == 'adadelta':
            clipper = 50
            optimizer = tf.train.AdadeltaOptimizer(
                learning_rate=options.learning_rate)
            tvars = tf.trainable_variables()
            if options.lambda_l2 > 0.0:
                l2_loss = tf.add_n([
                    tf.nn.l2_loss(v) for v in tvars if v.get_shape().ndims > 1
                ])
                self.loss = self.loss + options.lambda_l2 * l2_loss
            grads, _ = tf.clip_by_global_norm(tf.gradients(self.loss, tvars),
                                              clipper)
            self.train_op = optimizer.apply_gradients(zip(grads, tvars))
        elif options.optimize_type == 'adam':
            clipper = 50
            optimizer = tf.train.AdamOptimizer(
                learning_rate=options.learning_rate)
            tvars = tf.trainable_variables()
            if options.lambda_l2 > 0.0:
                l2_loss = tf.add_n([
                    tf.nn.l2_loss(v) for v in tvars if v.get_shape().ndims > 1
                ])
                self.loss = self.loss + options.lambda_l2 * l2_loss
            grads, _ = tf.clip_by_global_norm(tf.gradients(self.loss, tvars),
                                              clipper)
            self.train_op = optimizer.apply_gradients(zip(grads, tvars))

        extra_train_ops = []
        train_ops = [self.train_op] + extra_train_ops
        self.train_op = tf.group(*train_ops)
Example #5
0
    def __init__(self,
                 word_vocab=None,
                 char_vocab=None,
                 POS_vocab=None,
                 feat_vocab=None,
                 action_vocab=None,
                 options=None,
                 mode='ce_train'):

        # here 'mode', whose value can be:
        #  'ce_train',
        #  'rl_train',
        #  'evaluate',
        #  'evaluate_bleu',
        #  'decode',
        #  'topk'.
        # it is different from 'mode_gen' in soft_generator_utils.py
        # value of 'mode_gen' can be 'ce_loss', 'rl_loss', 'greedy', 'sample' or 'topk'
        self.mode = mode

        # is_training controls whether to use dropout
        is_training = True if mode in ('ce_train', ) else False

        self.options = options
        self.word_vocab = word_vocab

        with tf.variable_scope('input_encoder'):
            self.input_encoder = encoder_utils.SeqEncoder(
                options,
                word_vocab=word_vocab,
                char_vocab=char_vocab,
                POS_vocab=POS_vocab)
            self.input_hidden_dim, self.input_hiddens, self.input_decinit = \
                    self.input_encoder.encode(is_training=is_training)
            self.input_mask = self.input_encoder.passage_mask

        with tf.variable_scope('concept_encoder'):
            options_copy = copy.copy(options)
            options_copy.with_char = False
            options_copy.with_POS = False
            options_copy.with_lemma = False
            self.concept_encoder = encoder_utils.SeqEncoder(
                options_copy,
                word_vocab=word_vocab,
                char_vocab=None,
                POS_vocab=None)
            self.concept_hidden_dim, self.concept_hiddens, self.concept_decinit = \
                    self.concept_encoder.encode(is_training=is_training)
            self.concept_mask = self.concept_encoder.passage_mask

        cat_c = tf.concat([self.input_decinit.c, self.concept_decinit.c],
                          axis=1)
        cat_h = tf.concat([self.input_decinit.h, self.concept_decinit.h],
                          axis=1)
        compress_w = tf.get_variable('compress_w', [
            self.input_hidden_dim + self.concept_hidden_dim,
            options.gen_hidden_size
        ],
                                     dtype=tf.float32)
        compress_b = tf.get_variable('compress_b', [options.gen_hidden_size],
                                     dtype=tf.float32)
        cat_c = tf.matmul(cat_c, compress_w) + compress_b
        cat_h = tf.matmul(cat_h, compress_w) + compress_b
        self.init_decoder_state = tf.contrib.rnn.LSTMStateTuple(cat_c, cat_h)

        self.create_placeholders(options)

        gen_loss_mask = tf.sequence_mask(
            self.action_len, options.max_answer_len,
            dtype=tf.float32)  # [batch_size, gen_steps]

        with variable_scope.variable_scope("generator"):
            # create generator
            self.generator = soft_generator_utils.AttnGen(
                self, options, action_vocab, feat_vocab)
            # calculate encoder_features
            with variable_scope.variable_scope("input_feats"):
                self.input_features = self.generator.calculate_encoder_features(
                    self.input_hiddens, self.input_hidden_dim)
            with variable_scope.variable_scope("concept_feats"):
                self.concept_features = self.generator.calculate_encoder_features(
                    self.concept_hiddens, self.concept_hidden_dim)

            if mode == 'decode':
                # [batch_size, encode_dim]
                self.context_input_t_1 = tf.placeholder(
                    tf.float32, [None, self.input_hidden_dim],
                    name='context_input_t_1')
                # [batch_size, encode_dim]
                self.context_concept_t_1 = tf.placeholder(
                    tf.float32, [None, self.concept_hidden_dim],
                    name='context_concept_t_1')
                # [batch_size, feat_num]
                self.featidx_t = tf.placeholder(tf.int32, [None, None],
                                                name='featidx_t')
                # [batch_size]
                self.actionidx_t = tf.placeholder(tf.int32, [None],
                                                  name='actionidx_t')

                (self.state_t, self.context_input_t, self.context_concept_t,
                 self.ouput_t, self.topk_log_probs, self.topk_ids,
                 self.greedy_prediction,
                 self.sample_prediction) = self.generator.decode_mode(
                     self.init_decoder_state, self.context_input_t_1,
                     self.context_concept_t_1, self.actionidx_t,
                     self.featidx_t, self.input_hiddens, self.input_features,
                     self.input_mask, self.concept_hiddens,
                     self.concept_features, self.concept_mask)
                # not buiding training op for this mode
                return
            elif mode == 'evaluate_bleu':
                assert False, 'not in use'
                _, _, self.greedy_words = self.generator.train_mode(
                    self.input_hidden_dim,
                    self.input_hiddens,
                    self.input_features,
                    self.input_mask,
                    self.concept_hidden_dim,
                    self.concept_hiddens,
                    self.concept_features,
                    self.concept_mask,
                    self.init_decoder_state,
                    self.action_inp,
                    self.action_ref,
                    self.feats,
                    gen_loss_mask,
                    mode_gen='greedy')
                # not buiding training op for this mode
                return
            elif mode in (
                    'ce_train',
                    'evaluate',
            ):
                self.accu, self.loss, self.sampled_words = self.generator.train_mode(
                    self.input_hidden_dim,
                    self.input_hiddens,
                    self.input_features,
                    self.input_mask,
                    self.concept_hidden_dim,
                    self.concept_hiddens,
                    self.concept_features,
                    self.concept_mask,
                    self.init_decoder_state,
                    self.action_inp,
                    self.action_ref,
                    self.feats,
                    gen_loss_mask,
                    mode_gen='ce_loss')
                if mode == 'evaluate':
                    return  # not buiding training op for evaluation
            elif mode == 'topk':
                self.accu, self.sampled_words = self.generator.train_mode(
                    self.input_hidden_dim,
                    self.input_hiddens,
                    self.input_features,
                    self.input_mask,
                    self.concept_hidden_dim,
                    self.concept_hiddens,
                    self.concept_features,
                    self.concept_mask,
                    self.init_decoder_state,
                    self.action_inp,
                    self.action_ref,
                    self.feats,
                    gen_loss_mask,
                    mode_gen='topk')
                return
            elif mode == 'rl_train':
                assert False, 'not in use'
                _, self.loss, _ = self.generator.train_mode(
                    self.input_hidden_dim,
                    self.input_hiddens,
                    self.input_features,
                    self.input_mask,
                    self.concept_hidden_dim,
                    self.concept_hiddens,
                    self.concept_features,
                    self.concept_mask,
                    self.init_decoder_state,
                    self.action_inp,
                    self.action_ref,
                    self.feats,
                    gen_loss_mask,
                    mode_gen='rl_loss')

                tf.get_variable_scope().reuse_variables()

                _, _, self.sampled_words = self.generator.train_mode(
                    self.input_hidden_dim,
                    self.input_hiddens,
                    self.input_features,
                    self.input_mask,
                    self.concept_hidden_dim,
                    self.concept_hiddens,
                    self.concept_features,
                    self.concept_mask,
                    self.init_decoder_state,
                    self.action_inp,
                    self.action_ref,
                    self.feats,
                    gen_loss_mask,
                    mode_gen='sample')

                _, _, self.greedy_words = self.generator.train_mode(
                    self.input_hidden_dim,
                    self.input_hiddens,
                    self.input_features,
                    self.input_mask,
                    self.concept_hidden_dim,
                    self.concept_hiddens,
                    self.concept_features,
                    self.concept_mask,
                    self.init_decoder_state,
                    self.action_inp,
                    self.action_ref,
                    self.feats,
                    gen_loss_mask,
                    mode_gen='greedy')
            else:
                assert False, 'unknow mode'

        if options.optimize_type == 'adadelta':
            clipper = 50
            optimizer = tf.train.AdadeltaOptimizer(
                learning_rate=options.learning_rate)
            tvars = tf.trainable_variables()
            if options.lambda_l2 > 0.0:
                l2_loss = tf.add_n([
                    tf.nn.l2_loss(v) for v in tvars if v.get_shape().ndims > 1
                ])
                self.loss = self.loss + options.lambda_l2 * l2_loss
            grads, _ = tf.clip_by_global_norm(tf.gradients(self.loss, tvars),
                                              clipper)
            self.train_op = optimizer.apply_gradients(zip(grads, tvars))
        elif options.optimize_type == 'adam':
            clipper = 50
            optimizer = tf.train.AdamOptimizer(
                learning_rate=options.learning_rate)
            tvars = tf.trainable_variables()
            if options.lambda_l2 > 0.0:
                l2_loss = tf.add_n([
                    tf.nn.l2_loss(v) for v in tvars if v.get_shape().ndims > 1
                ])
                self.loss = self.loss + options.lambda_l2 * l2_loss
            grads, _ = tf.clip_by_global_norm(tf.gradients(self.loss, tvars),
                                              clipper)
            self.train_op = optimizer.apply_gradients(zip(grads, tvars))

        extra_train_ops = []
        train_ops = [self.train_op] + extra_train_ops
        self.train_op = tf.group(*train_ops)
Example #6
0
    def __init__(self, word_vocab_enc, word_vocab_dec, options=None, mode='ce_train'):
        # here 'mode', whose value can be:
        #  'ce_train',
        #  'rl_train',
        #  'evaluate',
        #  'evaluate_bleu',
        #  'decode'.
        # it is different from 'mode_gen' in generator_utils.py
        # value of 'mode_gen' can be ['ce_loss', 'rl_loss', 'greedy' or 'sample']
        self.mode = mode

        # is_training controls whether to use dropout
        is_training = True if mode in ('ce_train', ) else False

        self.options = options
        self.word_vocab_enc = word_vocab_enc
        self.word_vocab_dec = word_vocab_dec

        self.create_placeholders(options)

        # encode the input instance
        # encoder.graph_hidden [batch, node_num, vsize]
        # encoder.graph_cell [batch, node_num, vsize]
        with tf.variable_scope('linamr_encoder'):
            self.linamr_encoder = encoder_utils.SeqEncoder(options,
                    word_vocab = word_vocab_enc)
            self.linamr_hidden_dim, self.linamr_hiddens, self.linamr_decinit = \
                    self.linamr_encoder.encode(is_training=is_training)
            self.linamr_words = self.linamr_encoder.in_passage_words
            self.linamr_lengths = self.linamr_encoder.passage_lengths
            self.linamr_mask = self.linamr_encoder.passage_mask

        with tf.variable_scope('src_encoder'):
            self.src_encoder = encoder_utils.SeqEncoder(options,
                    word_vocab=word_vocab_enc)
            self.src_hidden_dim, self.src_hiddens, self.src_decinit = \
                    self.src_encoder.encode(is_training=is_training)
            self.src_words = self.src_encoder.in_passage_words
            self.src_lengths = self.src_encoder.passage_lengths
            self.src_mask = self.src_encoder.passage_mask

        # ============== Choices of initializing decoder state =============
        if options.way_init_decoder == 'src':
            new_c, new_h = self.src_decinit.c, self.src_decinit.h
        elif options.way_init_decoder == 'linamr':
            new_c, new_h = self.linamr_decinit.c, self.linamr_decinit.h
        elif options.way_init_decoder == 'zero':
            new_c = tf.zeros([self.encoder.batch_size, options.gen_hidden_size])
            new_h = tf.zeros([self.encoder.batch_size, options.gen_hidden_size])
        else:
            assert False, 'way to initial decoder (%s) not supported' % options.way_init_decoder
        self.init_decoder_state = tf.contrib.rnn.LSTMStateTuple(new_c, new_h)

        # prepare src-side input for decoder

        loss_weights = tf.sequence_mask(self.answer_len, options.max_answer_len, dtype=tf.float32) # [batch_size, gen_steps]

        with variable_scope.variable_scope("generator"):
            # create generator
            self.generator = generator_utils.CovAttenGen(self, options, word_vocab_dec, is_training=is_training)
            # calculate encoder_features
            with variable_scope.variable_scope("encoder_feats"):
                self.linamr_features = self.generator.calculate_encoder_features(
                        self.linamr_hiddens, self.linamr_hidden_dim)

            with variable_scope.variable_scope("src_feats"):
                self.src_features = self.generator.calculate_encoder_features(
                        self.src_hiddens, self.src_hidden_dim)

            if mode == 'decode':
                self.context_encoder_t_1 = tf.placeholder(tf.float32,
                        [None, self.linamr_hidden_dim], name='context_encoder_t_1') # [batch_size, encoder_dim]
                self.context_src_t_1 = tf.placeholder(tf.float32,
                        [None, self.src_hidden_dim], name='context_src_t_1') # [batch_size, src_dim]
                if options.use_coverage:
                    self.coverage_t_1 = tf.placeholder(tf.float32, [None, None], name='coverage_t_1') # [batch_size, encoder_dim]
                else:
                    self.coverage_t_1 = None
                self.word_t = tf.placeholder(tf.int32, [None], name='word_t') # [batch_size]

                (self.state_t, self.context_encoder_t, self.context_src_t,
                        self.coverage_t, self.attn_dist_t, self.ouput_t,
                        self.topk_log_probs, self.topk_ids, self.greedy_prediction, self.multinomial_prediction) = \
                            self.generator.decode_mode(
                        word_vocab_dec, options.beam_size, self.init_decoder_state,
                        self.context_encoder_t_1, self.context_src_t_1, self.coverage_t_1, self.word_t,
                        self.linamr_hiddens, self.linamr_features, self.linamr_mask,
                        self.src_hiddens, self.src_features, self.src_mask)
                # not buiding training op for this mode
                return
            elif mode == 'evaluate_bleu':
                _, _, self.greedy_words = self.generator.train_mode(word_vocab_dec,
                    self.linamr_hidden_dim, self.linamr_hiddens, self.linamr_features, self.linamr_mask,
                    self.src_hidden_dim, self.src_hiddens, self.src_features, self.src_mask,
                    self.init_decoder_state, self.answer_inp, self.answer_ref, loss_weights, mode_gen='greedy')
                # not buiding training op for this mode
                return
            elif mode in ('ce_train', 'evaluate', ):
                self.accu, self.loss, _ = self.generator.train_mode(word_vocab_dec,
                    self.linamr_hidden_dim, self.linamr_hiddens, self.linamr_features, self.linamr_mask,
                    self.src_hidden_dim, self.src_hiddens, self.src_features, self.src_mask,
                    self.init_decoder_state, self.answer_inp, self.answer_ref, loss_weights, mode_gen='ce_loss')
                if mode == 'evaluate': return # not buiding training op for evaluation

        with tf.device('/gpu:1'):
            if options.optimize_type == 'adadelta':
                optimizer = tf.train.AdadeltaOptimizer(learning_rate=options.learning_rate)
            elif options.optimize_type == 'adam':
                optimizer = tf.train.AdamOptimizer(learning_rate=options.learning_rate)
            clipper = 50 if not options.__dict__.has_key("max_gradient_norm") else options.max_gradient_norm
            print("MAX gradient norm {}".format(clipper))
            tvars = tf.trainable_variables()
            if options.lambda_l2>0.0:
                l2_loss = tf.add_n([tf.nn.l2_loss(v) for v in tvars if v.get_shape().ndims > 1])
                self.loss = self.loss + options.lambda_l2 * l2_loss
            grads, _ = tf.clip_by_global_norm(tf.gradients(self.loss, tvars), clipper)
            self.train_op = optimizer.apply_gradients(zip(grads, tvars))

            extra_train_ops = []
            train_ops = [self.train_op] + extra_train_ops
            self.train_op = tf.group(*train_ops)