Exemple #1
0
    def forward(self):

        config = self.config
        tree_lstm = self.tree_lstm


        hiddens = tree_lstm.generate_hiddens()
        nodes_size = tree_lstm.nodes_size
        self.nodes_size = nodes_size

        output_layer = tree_lstm.output_layer
        max_l = tree_lstm.max_l

        mask = tf.sequence_mask(nodes_size, max_l, dtype=tf.float32)

        with tf.variable_scope('graph_lstm'):
            encoder = graph_encoder_utils.GraphEncoder(
                (hiddens, mask), nodes_size, self.is_train, self.config)
            self.encoder = encoder
            graph_hidden = encoder.graph_hiddens
            hidden_shape = tf.shape(graph_hidden)
            hidden_flt_shape = [hidden_shape[0] * hidden_shape[1], hidden_shape[-1]]
            logits_flt = tf.nn.xw_plus_b(tf.reshape(graph_hidden, hidden_flt_shape),
                                         output_layer._weights,
                                         output_layer._bias)
            # logits = tf.reshape(logits, [tf.shape(logits)[0]*tf.shape(logits)[1], tf.shape(logits)[-1]])

            labels_flt = tf.reshape(self.labels, [-1])
            mask_flt = tf.reshape(mask, [-1])
            loss_grp = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits_flt, labels=labels_flt)
            loss_grp = tf.reduce_sum(loss_grp * mask_flt)
            rglz_items = [tf.nn.l2_loss(v) for v in tf.trainable_variables()
                          if 'bias' not in v.name and 'b_' not in v.name]

            loss_grp += tf.add_n(rglz_items) * 0.0005

            if config['all_vars_trained']:
                vars_graph = None
            else:
                vars_graph = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope="graph_lstm")
                vars_graph += [output_layer._weights, output_layer._bias]

            metrics, metrics_num = self.eval_function(logits_flt, labels_flt,
                                                      mask_flt, nodes_size,
                                                      self.labels)

            return loss_grp, metrics, metrics_num, vars_graph
Exemple #2
0
    def __init__(self,
                 word_vocab,
                 char_vocab,
                 Edgelabel_vocab,
                 options=None,
                 mode='ce_train'):
        # here 'mode', whose value can be:
        #  'ce_train',
        #  'rl_train',
        #  'evaluate',
        #  'evaluate_bleu',
        #  'decode'.
        # it is different from 'mode_gen' in generator_utils.py
        # value of 'mode_gen' can be ['ce_loss', 'rl_loss', 'greedy' or 'sample']
        self.mode = mode

        # is_training controls whether to use dropout
        is_training = True if mode in ('ce_train', ) else False

        self.options = options
        self.word_vocab = word_vocab

        # encode the input instance
        # encoder.graph_hidden [batch, node_num, vsize]
        # encoder.graph_cell [batch, node_num, vsize]
        self.encoder = graph_encoder_utils.GraphEncoder(
            word_vocab=word_vocab,
            edge_label_vocab=Edgelabel_vocab,
            char_vocab=char_vocab,
            is_training=is_training,
            options=options)

        # ============== Choices of attention memory ================
        if options.attention_type == 'hidden':
            self.encoder_dim = options.neighbor_vector_dim
            self.encoder_states = self.encoder.graph_hiddens
        elif options.attention_type == 'hidden_cell':
            self.encoder_dim = options.neighbor_vector_dim * 2
            self.encoder_states = tf.concat(
                [self.encoder.graph_hiddens, self.encoder.graph_cells], 2)
        elif options.attention_type == 'hidden_embed':
            self.encoder_dim = options.neighbor_vector_dim + self.encoder.input_dim
            self.encoder_states = tf.concat([
                self.encoder.graph_hiddens, self.encoder.node_representations
            ], 2)
        else:
            assert False, '%s not supported yet' % options.attention_type

        # ============== Choices of initializing decoder state =============
        if options.way_init_decoder == 'zero':
            new_c = tf.zeros(
                [self.encoder.batch_size, options.gen_hidden_size])
            new_h = tf.zeros(
                [self.encoder.batch_size, options.gen_hidden_size])
        elif options.way_init_decoder == 'all':
            new_c = tf.reduce_sum(self.encoder.graph_cells, axis=1)
            new_h = tf.reduce_sum(self.encoder.graph_hiddens, axis=1)
        elif options.way_init_decoder == 'root':
            new_c = self.encoder.graph_cells[:, 0, :]
            new_h = self.encoder.graph_hiddens[:, 0, :]
        else:
            assert False, 'way to initial decoder (%s) not supported' % options.way_init_decoder
        self.init_decoder_state = tf.contrib.rnn.LSTMStateTuple(new_c, new_h)

        # prepare AMR-side input for decoder
        self.nodes = self.encoder.passage_nodes
        self.nodes_num = self.encoder.passage_nodes_size
        if options.with_char:
            self.nodes_chars = self.encoder.passage_nodes_chars
            self.nodes_chars_num = self.encoder.passage_nodes_chars_size
        self.nodes_mask = self.encoder.passage_nodes_mask

        self.in_neigh_indices = self.encoder.passage_in_neighbor_indices
        self.in_neigh_edges = self.encoder.passage_in_neighbor_edges
        self.in_neigh_mask = self.encoder.passage_in_neighbor_mask

        self.out_neigh_indices = self.encoder.passage_out_neighbor_indices
        self.out_neigh_edges = self.encoder.passage_out_neighbor_edges
        self.out_neigh_mask = self.encoder.passage_out_neighbor_mask

        self.create_placeholders(options)

        loss_weights = tf.sequence_mask(
            self.answer_len, options.max_answer_len,
            dtype=tf.float32)  # [batch_size, gen_steps]

        with variable_scope.variable_scope("generator"):
            # create generator
            self.generator = generator_utils.CovCopyAttenGen(
                self, options, word_vocab)
            # calculate encoder_features
            self.encoder_features = self.generator.calculate_encoder_features(
                self.encoder_states, self.encoder_dim)

            if mode == 'decode':
                self.context_t_1 = tf.placeholder(
                    tf.float32, [None, self.encoder_dim],
                    name='context_t_1')  # [batch_size, encoder_dim]
                self.coverage_t_1 = tf.placeholder(
                    tf.float32, [None, None],
                    name='coverage_t_1')  # [batch_size, encoder_dim]
                self.word_t = tf.placeholder(tf.int32, [None],
                                             name='word_t')  # [batch_size]

                (self.state_t, self.context_t, self.coverage_t,
                 self.attn_dist_t, self.p_gen_t, self.ouput_t,
                 self.topk_log_probs, self.topk_ids, self.greedy_prediction,
                 self.multinomial_prediction) = self.generator.decode_mode(
                     word_vocab, options.beam_size, self.init_decoder_state,
                     self.context_t_1, self.coverage_t_1, self.word_t,
                     self.encoder_states, self.encoder_features, self.nodes,
                     self.nodes_mask)
                # not buiding training op for this mode
                return
            elif mode == 'evaluate_bleu':
                _, _, self.greedy_words = self.generator.train_mode(
                    word_vocab,
                    self.encoder_dim,
                    self.encoder_states,
                    self.encoder_features,
                    self.nodes,
                    self.nodes_mask,
                    self.init_decoder_state,
                    self.answer_inp,
                    self.answer_ref,
                    loss_weights,
                    mode_gen='greedy')
                # not buiding training op for this mode
                return
            elif mode in (
                    'ce_train',
                    'evaluate',
            ):
                self.accu, self.loss, _ = self.generator.train_mode(
                    word_vocab,
                    self.encoder_dim,
                    self.encoder_states,
                    self.encoder_features,
                    self.nodes,
                    self.nodes_mask,
                    self.init_decoder_state,
                    self.answer_inp,
                    self.answer_ref,
                    loss_weights,
                    mode_gen='ce_loss')
                if mode == 'evaluate':
                    return  # not buiding training op for evaluation
            elif mode == 'rl_train':
                _, self.loss, _ = self.generator.train_mode(
                    word_vocab,
                    self.encoder_dim,
                    self.encoder_states,
                    self.encoder_features,
                    self.nodes,
                    self.nodes_mask,
                    self.init_decoder_state,
                    self.answer_inp,
                    self.answer_ref,
                    loss_weights,
                    mode_gen='rl_loss')

                tf.get_variable_scope().reuse_variables()

                _, _, self.sampled_words = self.generator.train_mode(
                    word_vocab,
                    self.encoder_dim,
                    self.encoder_states,
                    self.encoder_features,
                    self.nodes,
                    self.nodes_mask,
                    self.init_decoder_state,
                    self.answer_inp,
                    self.answer_ref,
                    None,
                    mode_gen='sample')

                _, _, self.greedy_words = self.generator.train_mode(
                    word_vocab,
                    self.encoder_dim,
                    self.encoder_states,
                    self.encoder_features,
                    self.nodes,
                    self.nodes_mask,
                    self.init_decoder_state,
                    self.answer_inp,
                    self.answer_ref,
                    None,
                    mode_gen='greedy')

        if options.optimize_type == 'adadelta':
            clipper = 50
            optimizer = tf.train.AdadeltaOptimizer(
                learning_rate=options.learning_rate)
            tvars = tf.trainable_variables()
            if options.lambda_l2 > 0.0:
                l2_loss = tf.add_n([
                    tf.nn.l2_loss(v) for v in tvars if v.get_shape().ndims > 1
                ])
                self.loss = self.loss + options.lambda_l2 * l2_loss
            grads, _ = tf.clip_by_global_norm(tf.gradients(self.loss, tvars),
                                              clipper)
            self.train_op = optimizer.apply_gradients(zip(grads, tvars))
        elif options.optimize_type == 'adam':
            clipper = 50
            optimizer = tf.train.AdamOptimizer(
                learning_rate=options.learning_rate)
            tvars = tf.trainable_variables()
            if options.lambda_l2 > 0.0:
                l2_loss = tf.add_n([
                    tf.nn.l2_loss(v) for v in tvars if v.get_shape().ndims > 1
                ])
                self.loss = self.loss + options.lambda_l2 * l2_loss
            grads, _ = tf.clip_by_global_norm(tf.gradients(self.loss, tvars),
                                              clipper)
            self.train_op = optimizer.apply_gradients(zip(grads, tvars))

        extra_train_ops = []
        train_ops = [self.train_op] + extra_train_ops
        self.train_op = tf.group(*train_ops)
Exemple #3
0
    def __init__(self, word_vocab, char_vocab, Edgelabel_vocab, options=None, mode='train'):
        # the value of 'mode' can be:
        #  'train',
        #  'evaluate'
        self.mode = mode

        # is_training controls whether to use dropout
        is_training = True if mode in ('train', ) else False

        self.options = options
        self.word_vocab = word_vocab

        # encode the input instance
        # encoder.graph_hidden [batch, node_num, vsize]
        # encoder.graph_cell [batch, node_num, vsize]
        self.encoder = graph_encoder_utils.GraphEncoder(
                word_vocab = word_vocab,
                edge_label_vocab = Edgelabel_vocab,
                char_vocab = char_vocab,
                is_training = is_training, options = options)

        # ============== Choices of attention memory ================
        if options.attention_type == 'hidden':
            self.encoder_dim = options.neighbor_vector_dim
            self.encoder_states = self.encoder.graph_hiddens
        elif options.attention_type == 'hidden_cell':
            self.encoder_dim = options.neighbor_vector_dim * 2
            self.encoder_states = tf.concat([self.encoder.graph_hiddens, self.encoder.graph_cells], 2)
        elif options.attention_type == 'hidden_embed':
            self.encoder_dim = options.neighbor_vector_dim + self.encoder.input_dim
            self.encoder_states = tf.concat([self.encoder.graph_hiddens, self.encoder.node_representations], 2)
        else:
            assert False, '%s not supported yet' % options.attention_type

        self.nodes = self.encoder.passage_nodes
        self.nodes_num = self.encoder.passage_nodes_size
        if options.with_char:
            self.nodes_chars = self.encoder.passage_nodes_chars
            self.nodes_chars_num = self.encoder.passage_nodes_chars_size
        self.nodes_mask = self.encoder.passage_nodes_mask

        self.in_neigh_indices = self.encoder.passage_in_neighbor_indices
        self.in_neigh_edges = self.encoder.passage_in_neighbor_edges
        self.in_neigh_mask = self.encoder.passage_in_neighbor_mask

        self.out_neigh_indices = self.encoder.passage_out_neighbor_indices
        self.out_neigh_edges = self.encoder.passage_out_neighbor_edges
        self.out_neigh_mask = self.encoder.passage_out_neighbor_mask

        ## generating prediction results
        self.entity_indices = tf.placeholder(tf.int32, [None, None, None],
                name="entity_indices")
        self.entity_indices_mask = tf.placeholder(tf.float32, [None, None, None],
                name="entity_indices_mask")
        batch_size = tf.shape(self.encoder_states)[0]
        node_num = tf.shape(self.encoder_states)[1]
        dim = tf.shape(self.encoder_states)[2]
        entity_num = tf.shape(self.entity_indices)[1]
        entity_size = tf.shape(self.entity_indices)[2]

        # self.encoder_states [batch, node_num, encoder_dim]
        # entity_states [batch, 3, 5, dim]
        entity_states = collect_by_indices(self.encoder_states, self.entity_indices)
        # applying mask
        entity_states = entity_states * tf.expand_dims(self.entity_indices_mask, axis=-1)
        # average within each entity: [batch, 3, encoder_dim]
        entity_states = tf.reduce_mean(entity_states, axis=2)
        # flatten: [batch, 3*encoder_dim]
        entity_states = tf.reshape(entity_states, [batch_size, entity_num*dim])

        w_linear = tf.get_variable("w_linear",
                [options.entity_num*self.encoder_dim, options.class_num], dtype=tf.float32)
        b_linear = tf.get_variable("b_linear",
                [options.class_num], dtype=tf.float32)
        # [batch, class_num]
        prediction = tf.nn.softmax(tf.matmul(entity_states, w_linear) + b_linear)
        prediction = _clip_and_normalize(prediction, 1.0e-6)
        self.output = tf.argmax(prediction,axis=-1,output_type=tf.int32)

        ## calculating accuracy
        self.refs = tf.placeholder(tf.int32, [None,])
        self.accu = tf.reduce_sum(tf.cast(tf.equal(self.output,self.refs),dtype=tf.float32))

        ## calculating loss
        # xent: [batch]
        xent = -tf.reduce_sum(
                tf.one_hot(self.refs,options.class_num)*tf.log(prediction),
                axis=-1)
        self.loss = tf.reduce_mean(xent)

        if mode != 'train':
            print('Return from here, just evaluate')
            return

        if options.optimize_type == 'adadelta':
            clipper = 50
            optimizer = tf.train.AdadeltaOptimizer(learning_rate=options.learning_rate)
            tvars = tf.trainable_variables()
            if options.lambda_l2>0.0:
                l2_loss = tf.add_n([tf.nn.l2_loss(v) for v in tvars if v.get_shape().ndims > 1])
                self.loss = self.loss + options.lambda_l2 * l2_loss
            grads, _ = tf.clip_by_global_norm(tf.gradients(self.loss, tvars), clipper)
            self.train_op = optimizer.apply_gradients(zip(grads, tvars))
        elif options.optimize_type == 'adam':
            clipper = 50
            optimizer = tf.train.AdamOptimizer(learning_rate=options.learning_rate)
            tvars = tf.trainable_variables()
            if options.lambda_l2>0.0:
                l2_loss = tf.add_n([tf.nn.l2_loss(v) for v in tvars if v.get_shape().ndims > 1])
                self.loss = self.loss + options.lambda_l2 * l2_loss
            grads, _ = tf.clip_by_global_norm(tf.gradients(self.loss, tvars), clipper)
            self.train_op = optimizer.apply_gradients(zip(grads, tvars))

        extra_train_ops = []
        train_ops = [self.train_op] + extra_train_ops
        self.train_op = tf.group(*train_ops)
Exemple #4
0
    def __init__(self, word_vocab, char_vocab, Edgelabel_vocab, options=None, mode='train'):
        # the value of 'mode' can be:
        #  'train',
        #  'evaluate'
        self.mode = mode

        # is_training controls whether to use dropout
        is_training = True if mode in ('train', ) else False

        self.options = options
        self.word_vocab = word_vocab

        # encode the input instance
        # encoder.graph_hidden [batch, node_num, vsize]
        # encoder.graph_cell [batch, node_num, vsize]
        with tf.variable_scope('encoder'):
            self.encoder = graph_encoder_utils.GraphEncoder(
                    word_vocab = word_vocab,
                    edge_label_vocab = Edgelabel_vocab,
                    char_vocab = char_vocab,
                    is_training = is_training, options = options)

        with tf.variable_scope('rev_encoder'):
            self.encoder_rev = graph_encoder_utils.GraphEncoder(
                    word_vocab = word_vocab,
                    edge_label_vocab = Edgelabel_vocab,
                    char_vocab = char_vocab,
                    is_training = is_training, options = options)

        with tf.variable_scope('entity_repre'):
            self.entity = entity_utils.Entity(self.encoder.graph_hiddens)
            self.entity_rev = entity_utils.Entity(self.encoder_rev.graph_hiddens)

            batch_size = tf.shape(self.encoder.graph_hiddens)[0]
            node_num = tf.shape(self.encoder.graph_hiddens)[1]
            dim = tf.shape(self.encoder.graph_hiddens)[2]
            entity_num = tf.shape(self.entity.entity_indices)[1]
            entity_size = tf.shape(self.entity.entity_indices)[2]

            self.encoder_dim = options.neighbor_vector_dim * 2
            # [batch, 3, encoder_dim]
            entity_states = tf.concat(
                    [self.entity.entity_states, self.entity_rev.entity_states], 2)
            # [batch, 3*encoder_dim]
            entity_states = tf.reshape(entity_states, [batch_size, entity_num*dim*2])

        # placeholders
        self.nodes = self.encoder.passage_nodes
        self.nodes_num = self.encoder.passage_nodes_size
        if options.with_char:
            self.nodes_chars = self.encoder.passage_nodes_chars
            self.nodes_chars_num = self.encoder.passage_nodes_chars_size
        self.nodes_mask = self.encoder.passage_nodes_mask

        self.in_neigh_indices = self.encoder.passage_in_neighbor_indices
        self.in_neigh_hidden_indices = self.encoder.passage_in_neighbor_hidden_indices
        self.in_neigh_edges = self.encoder.passage_in_neighbor_edges
        self.in_neigh_mask = self.encoder.passage_in_neighbor_mask

        # rev placeholders
        self.rev_nodes = self.encoder_rev.passage_nodes
        self.rev_nodes_num = self.encoder_rev.passage_nodes_size
        if options.with_char:
            self.rev_nodes_chars = self.encoder_rev.passage_nodes_chars
            self.rev_nodes_chars_num = self.encoder_rev.passage_nodes_chars_size
        self.rev_nodes_mask = self.encoder_rev.passage_nodes_mask

        self.rev_in_neigh_indices = self.encoder_rev.passage_in_neighbor_indices
        self.rev_in_neigh_hidden_indices = self.encoder_rev.passage_in_neighbor_hidden_indices
        self.rev_in_neigh_edges = self.encoder_rev.passage_in_neighbor_edges
        self.rev_in_neigh_mask = self.encoder_rev.passage_in_neighbor_mask


        w_linear = tf.get_variable("w_linear",
                [options.entity_num*self.encoder_dim, options.class_num], dtype=tf.float32)
        b_linear = tf.get_variable("b_linear",
                [options.class_num], dtype=tf.float32)
        # [batch, class_num]
        logits = tf.matmul(entity_states, w_linear) + b_linear
        self.output = tf.argmax(logits, axis=-1, output_type=tf.int32)

        ## calculating accuracy
        self.answers = tf.placeholder(tf.int32, [None,])
        self.accu = tf.reduce_sum(
                tf.cast(
                    tf.equal(tf.argmax(logits,axis=-1,output_type=tf.int32),self.answers),
                    dtype=tf.float32))

        ## calculating loss
        self.loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(
            logits=logits,
            labels=tf.one_hot(self.answers,options.class_num)))

        if mode != 'train':
            print('Return from here, just evaluate')
            return

        if options.optimize_type == 'adadelta':
            clipper = 50
            optimizer = tf.train.AdadeltaOptimizer(learning_rate=options.learning_rate)
            tvars = tf.trainable_variables()
            if options.lambda_l2>0.0:
                l2_loss = tf.add_n([tf.nn.l2_loss(v) for v in tvars if v.get_shape().ndims > 1])
                self.loss = self.loss + options.lambda_l2 * l2_loss
            grads, _ = tf.clip_by_global_norm(tf.gradients(self.loss, tvars), clipper)
            self.train_op = optimizer.apply_gradients(zip(grads, tvars))
        elif options.optimize_type == 'adam':
            clipper = 50
            optimizer = tf.train.AdamOptimizer(learning_rate=options.learning_rate)
            tvars = tf.trainable_variables()
            if options.lambda_l2>0.0:
                l2_loss = tf.add_n([tf.nn.l2_loss(v) for v in tvars if v.get_shape().ndims > 1])
                self.loss = self.loss + options.lambda_l2 * l2_loss
            grads, _ = tf.clip_by_global_norm(tf.gradients(self.loss, tvars), clipper)
            self.train_op = optimizer.apply_gradients(zip(grads, tvars))

        extra_train_ops = []
        train_ops = [self.train_op] + extra_train_ops
        self.train_op = tf.group(*train_ops)
Exemple #5
0
    def __init__(self, word_vocab, char_vocab=None, POS_vocab=None, NER_vocab=None, options=None, \
            has_ref=True, is_training=True):
        # is_training controls whether to use dropout and update parameters
        self.is_training = is_training
        # has_ref distinguish 'dev' evaluation from 'final test' evaluation
        self.has_ref = has_ref

        self.options = options
        self.word_vocab = word_vocab

        # separately encode passage and question
        self.passage_encoder = encoder_utils.SeqEncoder(options,
                                                        word_vocab,
                                                        POS_vocab=POS_vocab,
                                                        NER_vocab=NER_vocab)

        self.question_encoder = encoder_utils.SeqEncoder(options,
                                                         word_vocab,
                                                         POS_vocab=POS_vocab,
                                                         NER_vocab=NER_vocab,
                                                         embed_reuse=True)

        with tf.variable_scope('passage'):
            passage_dim, passage_repre, passage_mask = self.passage_encoder.encode(
                is_training=is_training)
        with tf.variable_scope('question'):
            question_dim, question_repre, question_mask = self.question_encoder.encode(
                is_training=is_training)

        # modeling entities
        self.entity_starts = tf.placeholder(tf.int32, [None, None],
                                            'entity_starts')
        self.entity_ends = tf.placeholder(tf.int32, [None, None],
                                          'entity_ends')
        self.entity_lengths = tf.placeholder(tf.int32, [None],
                                             'entity_lengths')

        batch_size = tf.shape(self.entity_starts)[0]
        entity_len_max = tf.shape(self.entity_starts)[1]
        entity_mask = tf.sequence_mask(self.entity_lengths,
                                       entity_len_max,
                                       dtype=tf.float32)  # [batch, entity]

        entity_st_rep = operation_utils.collect_node(
            passage_repre, self.entity_starts)  # [batch, entity, rep_dim]
        entity_ed_rep = operation_utils.collect_node(
            passage_repre, self.entity_ends)  # [batch, entity, rep_dim]
        entity_rep = tf.concat([entity_st_rep, entity_ed_rep],
                               axis=2)  # [batch, entity, rep_dim * 2]
        entity_dim = passage_dim * 2

        qfull_st_rep = question_repre[:, 0, :]  # [batch, rep_dim]
        qfull_ed_rep = operation_utils.collect_final_step(
            question_repre,
            self.question_encoder.sequence_lengths - 1)  # [batch, rep_dim]
        qfull_rep = tf.concat([qfull_st_rep, qfull_ed_rep],
                              axis=1)  # [batch, rep_dim * 2]
        qfull_dim = question_dim * 2

        matching_results = []
        rst_seq = self.perform_matching(entity_rep,
                                        entity_dim,
                                        entity_mask,
                                        question_repre,
                                        qfull_rep,
                                        question_dim,
                                        question_mask,
                                        scope_name='seq_match',
                                        options=options,
                                        is_training=is_training)
        matching_results.append(rst_seq)

        # encode entity representation with GRN
        if options.with_grn or options.with_gcn:
            # merge question representation into passage
            q4p_rep = tf.tile(
                tf.expand_dims(qfull_rep, 1),  # [batch, 1, rep_dim * 2]
                [1, entity_len_max, 1])  # [batch, entity, rep_dim * 2]
            entity_rep = tf.concat([entity_rep, q4p_rep], axis=2)
            entity_dim = entity_dim + qfull_dim

            # compress before going to GRN
            merge_w = tf.get_variable('merge_w',
                                      [entity_dim, options.merge_dim])
            merge_b = tf.get_variable('merge_b', [options.merge_dim])

            entity_rep = tf.reshape(entity_rep, [-1, entity_dim])
            entity_rep = tf.matmul(entity_rep, merge_w) + merge_b
            entity_rep = tf.reshape(
                entity_rep, [batch_size, entity_len_max, options.merge_dim])
            entity_rep = entity_rep * tf.expand_dims(entity_mask, axis=-1)
            entity_dim = options.merge_dim

            # main part: encoding
            scope_name = 'GRN' if options.with_grn else 'GCN'

            with tf.variable_scope(scope_name):
                self.edges = tf.placeholder(tf.int32, [None, None, None],
                                            'edges')
                self.edges_mask = tf.placeholder(tf.float32,
                                                 [None, None, None],
                                                 'edges_mask')
                if options.with_grn:
                    print("With Graph recurrent network as the graph encoder")
                    self.graph_encoder = graph_encoder_utils.GraphEncoder(
                        entity_rep,
                        entity_mask,
                        entity_dim,
                        self.edges,
                        self.edges_mask,
                        is_training=is_training,
                        options=options)
                else:
                    print("With GCN as the graph encoder")
                    self.graph_encoder = gcn_encoder_utils.GCNEncoder(
                        entity_rep,
                        entity_mask,
                        entity_dim,
                        self.edges,
                        self.edges_mask,
                        is_training=is_training,
                        options=options)

                for i in range(options.num_grn_step):
                    if options.grn_rep_type == 'hidden':
                        entity_grn_rep = self.graph_encoder.grn_historys[
                            i]  # [batch, entity, grn_dim]
                        entity_grn_dim = options.grn_dim
                    elif options.grn_rep_type == 'hidden_embed':
                        entity_grn_rep = tf.concat(
                            [self.graph_encoder.grn_historys[i], entity_rep],
                            2)  # [batch, entity, grn_dim + merge_dim]
                        entity_grn_dim = options.grn_dim + entity_dim
                    else:
                        assert False, '%s not supported yet' % options.grn_rep_type

                    if options.with_multi_perspective:
                        assert entity_grn_dim == question_dim

                    rst_grn = self.perform_matching(entity_grn_rep,
                                                    entity_grn_dim,
                                                    entity_mask,
                                                    question_repre,
                                                    qfull_rep,
                                                    question_dim,
                                                    question_mask,
                                                    scope_name='grn%d_match' %
                                                    i,
                                                    options=options,
                                                    is_training=is_training)
                    matching_results.append(rst_grn)

        self.candidates = tf.placeholder(
            tf.int32, [None, None, None],
            'candidates')  # [batch, c_num, c_occur]
        self.candidates_len = tf.placeholder(tf.float32, [None],
                                             'candidates_len')  # [batch]
        self.candidates_occur_mask = tf.placeholder(
            tf.float32, [None, None, None],
            'candidates_occur_mask')  # [batch, c_num, c_occur]

        # matching_results: list of [batch, cands]
        self.attn_dist = self.perform_integration(matching_results,
                                                  scope_name='integration',
                                                  options=options,
                                                  is_training=is_training)

        cand_num = tf.shape(self.candidates)[1]
        self.topk_probs, self.topk_ids = tf.nn.top_k(self.attn_dist,
                                                     k=cand_num,
                                                     name='topK')
        self.out = tf.argmax(self.attn_dist, axis=-1, output_type=tf.int32)

        if not has_ref: return

        self.ref = tf.placeholder(tf.int32, [None], 'ref')
        self.accu = tf.reduce_sum(
            tf.cast(tf.equal(self.out, self.ref), dtype=tf.float32))

        xent = -tf.reduce_sum(
            tf.one_hot(self.ref, cand_num) * tf.log(self.attn_dist), axis=-1)

        self.loss = tf.reduce_mean(xent)

        if not is_training: return

        with tf.variable_scope("training_op"), tf.device("/gpu:1"):
            if options.optimize_type == 'adadelta':
                optimizer = tf.train.AdadeltaOptimizer(
                    learning_rate=options.learning_rate)
            elif options.optimize_type == 'adam':
                optimizer = tf.train.AdamOptimizer(
                    learning_rate=options.learning_rate)
            clipper = 50 if not options.__dict__.has_key(
                "max_grad_norm") else options.max_grad_norm
            print("Max gradient norm {}".format(clipper))
            tvars = tf.trainable_variables()
            if options.lambda_l2 > 0.0:
                l2_loss = tf.add_n([
                    tf.nn.l2_loss(v) for v in tvars if v.get_shape().ndims > 1
                ])
                self.loss = self.loss + options.lambda_l2 * l2_loss
            grads, _ = tf.clip_by_global_norm(tf.gradients(self.loss, tvars),
                                              clipper)
            self.train_op = optimizer.apply_gradients(zip(grads, tvars))

            extra_train_ops = []
            train_ops = [self.train_op] + extra_train_ops
            self.train_op = tf.group(*train_ops)
Exemple #6
0
    def __init__(self,
                 word_vocab,
                 char_vocab,
                 pos_vocab,
                 edgelabel_vocab,
                 options,
                 mode='train'):
        # the value of 'mode' can be:
        #  'train',
        #  'evaluate'
        self.mode = mode

        # is_training controls whether to use dropout
        is_training = True if mode in ('train', ) else False

        self.options = options
        self.word_vocab = word_vocab
        self.char_vocab = char_vocab
        self.pos_vocab = pos_vocab

        # sequential encoder that can take 0 LSTM layers
        self.encoder = encoder_utils.SeqEncoder(options, word_vocab,
                                                char_vocab, pos_vocab)
        word_repres, word_dim, sentence_repres, sentence_dim, seq_mask = \
                self.encoder.encode(is_training=is_training)

        # encode the input instance
        # encoder.graph_hidden [batch, node_num, vsize]
        # encoder.graph_cell [batch, node_num, vsize]
        self.graph_encoder = graph_encoder_utils.GraphEncoder(
            options,
            word_repres,
            word_dim,
            sentence_repres,
            sentence_dim,
            seq_mask,
            edgelabel_vocab,
            is_training=is_training)

        # collect placeholders
        self.sentence_words = self.encoder.sentence_words
        self.sentence_lengths = self.encoder.sentence_lengths
        if options.with_char:
            self.sentence_chars = self.encoder.sentence_chars
            self.sentence_chars_lengths = self.encoder.sentence_chars_lengths
        if options.with_POS:
            self.sentence_POSs = self.encoder.sentence_POSs

        self.in_neigh_indices = self.graph_encoder.in_neighbor_indices
        self.in_neigh_edges = self.graph_encoder.in_neighbor_edges
        self.in_neigh_mask = self.graph_encoder.in_neighbor_mask

        self.out_neigh_indices = self.graph_encoder.out_neighbor_indices
        self.out_neigh_edges = self.graph_encoder.out_neighbor_edges
        self.out_neigh_mask = self.graph_encoder.out_neighbor_mask

        if options.forest_prob_aware and options.forest_type != '1best':
            self.in_neigh_prob = self.graph_encoder.in_neighbor_prob
            self.out_neigh_prob = self.graph_encoder.out_neighbor_prob

        self.entity_indices = tf.placeholder(tf.int32, [None, None, None],
                                             name="entity_indices")
        self.entity_indices_mask = tf.placeholder(tf.float32,
                                                  [None, None, None],
                                                  name="entity_indices_mask")

        # collect inputs for final classifier
        final_repres = self.graph_encoder.graph_hiddens
        final_shape = tf.shape(final_repres)
        batch_size = final_shape[0]
        sentence_size_max = final_shape[1]

        # [batch, 2, indices, sentence_dim]
        entity_repres = collect_by_indices(final_repres, self.entity_indices)
        entity_repres = entity_repres * tf.expand_dims(
            self.entity_indices_mask, axis=-1)
        # [batch, 2, sentence_dim]
        entity_repres = tf.reduce_mean(entity_repres, axis=2)
        # [batch, 2*sentence_dim]
        h_final = tf.reshape(entity_repres, [batch_size, 2 * sentence_dim])

        ### regarding Zhang et al., EMNLP 2018
        #h_sent = tf.reduce_max(final_repres, axis=1)
        #hsent_loss = None
        #if options.lambda_l2_hsent > 0.0:
        #    hsent_loss = tf.reduce_mean(
        #            tf.reduce_sum(h_sent * h_sent, axis=-1), axis=-1)
        #h_s = tf.reduce_max(
        #        range_repres(final_repres, sentence_size_max, self.sbj_starts, self.sbj_ends),
        #        axis=1)
        #h_o = tf.reduce_max(
        #        range_repres(final_repres, sentence_size_max, self.obj_starts, self.obj_ends),
        #        axis=1)
        #h_final = tf.concat([h_sent, h_s, h_o], axis=1) # [batch, sentence_dim*3]
        #h_final = tf.layers.dense(h_final, options.ffnn_size, name="ffnn_1", activation=tf.nn.relu) # [batch, ffnn_size]
        #h_final = tf.layers.dense(h_final, options.ffnn_size, name="ffnn_2", activation=tf.nn.relu) # [batch, ffnn_size]

        ## [batch, class_num]
        self.distribution = _clip_and_normalize(
            tf.layers.dense(h_final,
                            options.num_relations,
                            name="ffnn_out",
                            activation=tf.nn.softmax), 1.0e-6)
        self.rsts = tf.argmax(self.distribution, axis=-1, output_type=tf.int32)

        ## calculating accuracy
        self.refs = tf.placeholder(tf.int32, [
            None,
        ])
        self.accu = tf.reduce_sum(
            tf.cast(tf.equal(self.rsts, self.refs), dtype=tf.float32))

        ## calculating loss
        # xent: [batch]
        xent = -tf.reduce_sum(tf.one_hot(self.refs, options.num_relations) *
                              tf.log(self.distribution),
                              axis=-1)

        self.loss = tf.reduce_mean(xent)

        if mode != 'train':
            print('Return from here, just evaluate')
            return

        #if options.lambda_l2_hsent > 0.0:
        #    self.loss += hsent_loss * options.lambda_l2_hsent

        clipper = 5
        tvars = tf.trainable_variables()
        if options.lambda_l2 > 0.0:
            l2_loss = tf.add_n(
                [tf.nn.l2_loss(v) for v in tvars if v.get_shape().ndims > 1])
            self.loss += options.lambda_l2 * l2_loss

        if hasattr(options, "decay") and options.decay != "none":
            global_step = tf.Variable(0, trainable=False)
            if options.decay == 'piece':
                values, bounds = [
                    options.learning_rate,
                ], []
                for i in range(10):
                    values.append(values[-1] * 0.9)
                    bounds.append(options.trn_bch_num * 10 * i)
                learning_rate = tf.train.piecewise_constant(
                    global_step, bounds, values)
            elif options.decay == 'poly':
                decay_steps = options.trn_bch_num * options.max_epochs
                learning_rate = tf.train.polynomial_decay(
                    options.learning_rate,
                    global_step,
                    decay_steps,
                    end_learning_rate=0.1 * options.learning_rate,
                    power=0.5)
            elif options.decay == 'cos':
                decay_steps = options.trn_bch_num * options.max_epochs
                learning_rate = tf.train.cosine_decay(options.learning_rate,
                                                      global_step,
                                                      decay_steps,
                                                      alpha=0.1)
            else:
                assert False, 'not supported'
        else:
            global_step = None
            learning_rate = options.learning_rate

        if options.optimize_type == 'adadelta':
            optimizer = tf.train.AdadeltaOptimizer(learning_rate=learning_rate)
        elif options.optimize_type == 'adam':
            optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
        else:
            assert False, 'not supported optimize type'
        grads, _ = tf.clip_by_global_norm(tf.gradients(self.loss, tvars),
                                          clipper)
        train_op = optimizer.apply_gradients(zip(grads, tvars),
                                             global_step=global_step)

        extra_train_ops = []
        train_ops = [train_op] + extra_train_ops
        self.train_op = tf.group(*train_ops)